diff options
132 files changed, 29854 insertions, 785 deletions
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index ef540865ad22..685964798479 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -31,7 +31,7 @@ you probably needn't concern yourself with pcmciautils. ====================== =============== ======================================== GNU C 5.1 gcc --version Clang/LLVM (optional) 11.0.0 clang --version -Rust (optional) 1.62.0 rustc --version +Rust (optional) 1.66.0 rustc --version bindgen (optional) 0.56.0 bindgen --version GNU make 3.82 make --version bash 4.2 bash --version diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst index 6982b63775da..3776059a385a 100644 --- a/Documentation/rust/arch-support.rst +++ b/Documentation/rust/arch-support.rst @@ -15,5 +15,6 @@ support corresponds to ``S`` values in the ``MAINTAINERS`` file. ============ ================ ============================================== Architecture Level of support Constraints ============ ================ ============================================== +``arm64`` Maintained None. ``x86`` Maintained ``x86_64`` only. ============ ================ ============================================== diff --git a/MAINTAINERS b/MAINTAINERS index f65def435395..f6d9a4c7d22a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18246,6 +18246,7 @@ L: rust-for-linux@vger.kernel.org S: Supported W: https://github.com/Rust-for-Linux/linux B: https://github.com/Rust-for-Linux/linux/issues +C: zulip://rust-for-linux.zulipchat.com T: git https://github.com/Rust-for-Linux/linux.git rust-next F: Documentation/rust/ F: rust/ @@ -1602,7 +1602,7 @@ endif # CONFIG_MODULES CLEAN_FILES += include/ksym vmlinux.symvers modules-only.symvers \ modules.builtin modules.builtin.modinfo modules.nsdeps \ compile_commands.json .thinlto-cache rust/test rust/doc \ - .vmlinux.objs .vmlinux.export.c + rust-project.json .vmlinux.objs .vmlinux.export.c # Directories & files removed with 'make mrproper' MRPROPER_FILES += include/config include/generated \ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c5ccca26a408..b0a60403cda2 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -207,6 +207,7 @@ config ARM64 select HAVE_FUNCTION_ARG_ACCESS_API select MMU_GATHER_RCU_TABLE_FREE select HAVE_RSEQ + select HAVE_RUST select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_KPROBES diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index d62bd221828f..33ae20fc3f56 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -41,6 +41,8 @@ KBUILD_CFLAGS += -mgeneral-regs-only \ KBUILD_CFLAGS += $(call cc-disable-warning, psabi) KBUILD_AFLAGS += $(compat_vdso) +KBUILD_RUSTFLAGS += -C target-feature="-neon,-fp-armv8" + KBUILD_CFLAGS += $(call cc-option,-mabi=lp64) KBUILD_AFLAGS += $(call cc-option,-mabi=lp64) @@ -85,8 +87,10 @@ PACRET-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) := pac-ret ifeq ($(CONFIG_ARM64_BTI_KERNEL),y) branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI) := -mbranch-protection=$(PACRET-y)+bti +KBUILD_RUSTFLAGS += -Z branch-protection=bti,pac-ret,leaf else branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET) := -mbranch-protection=$(PACRET-y) +KBUILD_RUSTFLAGS += -Z branch-protection=pac-ret,leaf endif # -march=armv8.3-a enables the non-nops instructions for PAC, to avoid the # compiler to generate them and consequently to break the single image contract diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index 264f7f3b395b..fa539047e09e 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -355,6 +355,8 @@ source "drivers/gpu/drm/tidss/Kconfig" source "drivers/gpu/drm/xlnx/Kconfig" +source "drivers/gpu/drm/asahi/Kconfig" + source "drivers/gpu/drm/gud/Kconfig" source "drivers/gpu/drm/solomon/Kconfig" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 981696e99943..38c45ba7ac88 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -198,3 +198,4 @@ obj-y += gud/ obj-$(CONFIG_DRM_HYPERV) += hyperv/ obj-y += solomon/ obj-$(CONFIG_DRM_SPRD) += sprd/ +obj-$(CONFIG_DRM_ASAHI) += asahi/ diff --git a/drivers/gpu/drm/asahi/Kconfig b/drivers/gpu/drm/asahi/Kconfig new file mode 100644 index 000000000000..633ccd0d35ac --- /dev/null +++ b/drivers/gpu/drm/asahi/Kconfig @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0 + +config RUST_DRM_SCHED + bool + select DRM_SCHED + +config RUST_DRM_GEM_SHMEM_HELPER + bool + select DRM_GEM_SHMEM_HELPER + +config RUST_APPLE_RTKIT + bool + select APPLE_RTKIT + +config DRM_ASAHI + tristate "Asahi (DRM support for Apple AGX GPUs)" + depends on RUST + depends on DRM + depends on (ARM64 && ARCH_APPLE) || (COMPILE_TEST && !GENERIC_ATOMIC64) + depends on MMU + select RUST_DRM_SCHED + select IOMMU_SUPPORT + select IOMMU_IO_PGTABLE_LPAE + select RUST_DRM_GEM_SHMEM_HELPER + select RUST_APPLE_RTKIT + help + DRM driver for Apple AGX GPUs (G13x, found in the M1 SoC family) + +config DRM_ASAHI_DEBUG_ALLOCATOR + bool "Use debug allocator" + depends on DRM_ASAHI + help + Use an alternate, simpler allocator which significantly reduces + performance, but can help find firmware- or GPU-side memory safety + issues. However, it can also trigger firmware bugs more easily, + so expect GPU crashes. + + Say N unless you are debugging firmware structures or porting to a + new firmware version. diff --git a/drivers/gpu/drm/asahi/Makefile b/drivers/gpu/drm/asahi/Makefile new file mode 100644 index 000000000000..e67248667987 --- /dev/null +++ b/drivers/gpu/drm/asahi/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_DRM_ASAHI) += asahi.o diff --git a/drivers/gpu/drm/asahi/alloc.rs b/drivers/gpu/drm/asahi/alloc.rs new file mode 100644 index 000000000000..d918b19e9721 --- /dev/null +++ b/drivers/gpu/drm/asahi/alloc.rs @@ -0,0 +1,1046 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU kernel object allocator. +//! +//! This kernel driver needs to manage a large number of GPU objects, in both firmware/kernel +//! address space and user address space. This module implements a simple grow-only heap allocator +//! based on the DRM MM range allocator, and a debug allocator that allocates each object as a +//! separate GEM object. +//! +//! Allocations may optionally have debugging enabled, which adds preambles that store metadata +//! about the allocation. This is useful for live debugging using the hypervisor or postmortem +//! debugging with a GPU memory snapshot, since it makes it easier to identify use-after-free and +//! caching issues. + +use kernel::{c_str, drm::mm, error::Result, prelude::*, str::CString, sync::LockClassKey}; + +use crate::debug::*; +use crate::driver::AsahiDevice; +use crate::fw::types::Zeroed; +use crate::mmu; +use crate::object::{GpuArray, GpuObject, GpuOnlyArray, GpuStruct, GpuWeakPointer}; + +use core::cmp::Ordering; +use core::fmt; +use core::fmt::{Debug, Formatter}; +use core::marker::PhantomData; +use core::mem; +use core::mem::MaybeUninit; +use core::ptr::NonNull; + +const DEBUG_CLASS: DebugFlags = DebugFlags::Alloc; + +#[cfg(not(CONFIG_DRM_ASAHI_DEBUG_ALLOCATOR))] +/// The driver-global allocator type +pub(crate) type DefaultAllocator = HeapAllocator; + +#[cfg(not(CONFIG_DRM_ASAHI_DEBUG_ALLOCATOR))] +/// The driver-global allocation type +pub(crate) type DefaultAllocation = HeapAllocation; + +#[cfg(CONFIG_DRM_ASAHI_DEBUG_ALLOCATOR)] +/// The driver-global allocator type +pub(crate) type DefaultAllocator = SimpleAllocator; + +#[cfg(CONFIG_DRM_ASAHI_DEBUG_ALLOCATOR)] +/// The driver-global allocation type +pub(crate) type DefaultAllocation = SimpleAllocation; + +/// Represents a raw allocation (without any type information). +pub(crate) trait RawAllocation { + /// Returns the CPU-side pointer (if CPU mapping is enabled) as a byte non-null pointer. + fn ptr(&self) -> Option<NonNull<u8>>; + /// Returns the GPU VA pointer as a u64. + fn gpu_ptr(&self) -> u64; + /// Returns the size of the allocation in bytes. + fn size(&self) -> usize; + /// Returns the AsahiDevice that owns this allocation. + fn device(&self) -> &AsahiDevice; +} + +/// Represents a typed allocation. +pub(crate) trait Allocation<T>: Debug { + /// Returns the typed CPU-side pointer (if CPU mapping is enabled). + fn ptr(&self) -> Option<NonNull<T>>; + /// Returns the GPU VA pointer as a u64. + fn gpu_ptr(&self) -> u64; + /// Returns the size of the allocation in bytes. + fn size(&self) -> usize; + /// Returns the AsahiDevice that owns this allocation. + fn device(&self) -> &AsahiDevice; +} + +/// A generic typed allocation wrapping a RawAllocation. +/// +/// This is currently the only Allocation implementation, since it is shared by all allocators. +pub(crate) struct GenericAlloc<T, U: RawAllocation> { + alloc: U, + alloc_size: usize, + debug_offset: usize, + padding: usize, + _p: PhantomData<T>, +} + +impl<T, U: RawAllocation> Allocation<T> for GenericAlloc<T, U> { + fn ptr(&self) -> Option<NonNull<T>> { + self.alloc + .ptr() + .map(|p| unsafe { NonNull::new_unchecked(p.as_ptr().add(self.debug_offset) as *mut T) }) + } + fn gpu_ptr(&self) -> u64 { + self.alloc.gpu_ptr() + self.debug_offset as u64 + } + fn size(&self) -> usize { + self.alloc_size + } + fn device(&self) -> &AsahiDevice { + self.alloc.device() + } +} + +impl<T, U: RawAllocation> Debug for GenericAlloc<T, U> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct(core::any::type_name::<GenericAlloc<T, U>>()) + .field("ptr", &format_args!("{:?}", self.ptr())) + .field("gpu_ptr", &format_args!("{:#X?}", self.gpu_ptr())) + .field("size", &format_args!("{:#X?}", self.size())) + .finish() + } +} + +/// Debugging data associated with an allocation, when debugging is enabled. +#[repr(C)] +struct AllocDebugData { + state: u32, + _pad: u32, + size: u64, + base_gpuva: u64, + obj_gpuva: u64, + name: [u8; 0x20], +} + +/// Magic flag indicating a live allocation. +const STATE_LIVE: u32 = 0x4556494c; +/// Magic flag indicating a freed allocation. +const STATE_DEAD: u32 = 0x44414544; + +/// Marker byte to identify when firmware/GPU write beyond the end of an allocation. +const GUARD_MARKER: u8 = 0x93; + +impl<T, U: RawAllocation> Drop for GenericAlloc<T, U> { + fn drop(&mut self) { + let debug_len = mem::size_of::<AllocDebugData>(); + if self.debug_offset >= debug_len { + if let Some(p) = self.alloc.ptr() { + unsafe { + let p = p.as_ptr().add(self.debug_offset - debug_len); + (p as *mut u32).write(STATE_DEAD); + } + } + } + if debug_enabled(DebugFlags::FillAllocations) { + if let Some(p) = self.ptr() { + unsafe { (p.as_ptr() as *mut u8).write_bytes(0xde, self.size()) }; + } + } + if self.padding != 0 { + if let Some(p) = self.ptr() { + let guard = unsafe { + core::slice::from_raw_parts( + (p.as_ptr() as *mut u8 as *const u8).add(self.size()), + self.padding, + ) + }; + if let Some(first_err) = guard.iter().position(|&r| r != GUARD_MARKER) { + let last_err = guard + .iter() + .rev() + .position(|&r| r != GUARD_MARKER) + .unwrap_or(0); + dev_warn!( + self.device(), + "Allocator: Corruption after object of type {} at {:#x}:{:#x} + {:#x}..={:#x}\n", + core::any::type_name::<T>(), + self.gpu_ptr(), + self.size(), + first_err, + self.padding - last_err - 1 + ); + } + } + } + } +} + +static_assert!(mem::size_of::<AllocDebugData>() == 0x40); + +/// A trait representing an allocator. +pub(crate) trait Allocator { + /// The raw allocation type used by this allocator. + type Raw: RawAllocation; + // TODO: Needs associated_type_defaults + // type Allocation<T> = GenericAlloc<T, Self::Raw>; + + /// Returns the `AsahiDevice` associated with this allocator. + fn device(&self) -> &AsahiDevice; + /// Returns whether CPU-side mapping is enabled. + fn cpu_maps(&self) -> bool; + /// Returns the minimum alignment for allocations. + fn min_align(&self) -> usize; + /// Allocate an object of the given size in bytes with the given alignment. + fn alloc(&mut self, size: usize, align: usize) -> Result<Self::Raw>; + + /// Returns a tuple of (count, size) of how much garbage (freed but not yet reusable objects) + /// exists in this allocator. Optional. + fn garbage(&self) -> (usize, usize) { + (0, 0) + } + /// Collect garbage for this allocator, up to the given object count. Optional. + fn collect_garbage(&mut self, _count: usize) {} + + /// Allocate a new GpuStruct object. See [`GpuObject::new`]. + #[inline(never)] + fn new_object<T: GpuStruct>( + &mut self, + inner: T, + callback: impl for<'a> FnOnce(&'a T) -> T::Raw<'a>, + ) -> Result<GpuObject<T, GenericAlloc<T, Self::Raw>>> { + GpuObject::<T, GenericAlloc<T, Self::Raw>>::new(self.alloc_object()?, inner, callback) + } + + /// Allocate a new GpuStruct object. See [`GpuObject::new_boxed`]. + #[inline(never)] + fn new_boxed<T: GpuStruct>( + &mut self, + inner: Box<T>, + callback: impl for<'a> FnOnce( + &'a T, + &'a mut MaybeUninit<T::Raw<'a>>, + ) -> Result<&'a mut T::Raw<'a>>, + ) -> Result<GpuObject<T, GenericAlloc<T, Self::Raw>>> { + GpuObject::<T, GenericAlloc<T, Self::Raw>>::new_boxed(self.alloc_object()?, inner, callback) + } + + /// Allocate a new GpuStruct object. See [`GpuObject::new_inplace`]. + #[inline(never)] + fn new_inplace<T: GpuStruct>( + &mut self, + inner: T, + callback: impl for<'a> FnOnce( + &'a T, + &'a mut MaybeUninit<T::Raw<'a>>, + ) -> Result<&'a mut T::Raw<'a>>, + ) -> Result<GpuObject<T, GenericAlloc<T, Self::Raw>>> { + GpuObject::<T, GenericAlloc<T, Self::Raw>>::new_inplace( + self.alloc_object()?, + inner, + callback, + ) + } + + /// Allocate a new GpuStruct object. See [`GpuObject::new_default`]. + #[inline(never)] + fn new_default<T: GpuStruct + Default>( + &mut self, + ) -> Result<GpuObject<T, GenericAlloc<T, Self::Raw>>> + where + for<'a> <T as GpuStruct>::Raw<'a>: Default + Zeroed, + { + GpuObject::<T, GenericAlloc<T, Self::Raw>>::new_default(self.alloc_object()?) + } + + /// Allocate a new GpuStruct object. See [`GpuObject::new_prealloc`]. + #[inline(never)] + fn new_prealloc<T: GpuStruct>( + &mut self, + inner_cb: impl FnOnce(GpuWeakPointer<T>) -> Result<Box<T>>, + raw_cb: impl for<'a> FnOnce( + &'a T, + &'a mut MaybeUninit<T::Raw<'a>>, + ) -> Result<&'a mut T::Raw<'a>>, + ) -> Result<GpuObject<T, GenericAlloc<T, Self::Raw>>> { + GpuObject::<T, GenericAlloc<T, Self::Raw>>::new_prealloc( + self.alloc_object()?, + inner_cb, + raw_cb, + ) + } + + /// Allocate a generic buffer of the given size and alignment, applying the debug features if + /// enabled to tag it and detect overflows. + fn alloc_generic<T>( + &mut self, + size: usize, + align: usize, + ) -> Result<GenericAlloc<T, Self::Raw>> { + let padding = if debug_enabled(DebugFlags::DetectOverflows) { + size + } else { + 0 + }; + + let ret: GenericAlloc<T, Self::Raw> = + if self.cpu_maps() && debug_enabled(debug::DebugFlags::DebugAllocations) { + let debug_align = self.min_align().max(align); + let debug_len = mem::size_of::<AllocDebugData>(); + let debug_offset = (debug_len * 2 + debug_align - 1) & !(debug_align - 1); + + let alloc = self.alloc(size + debug_offset + padding, align)?; + + let mut debug = AllocDebugData { + state: STATE_LIVE, + _pad: 0, + size: size as u64, + base_gpuva: alloc.gpu_ptr(), + obj_gpuva: alloc.gpu_ptr() + debug_offset as u64, + name: [0; 0x20], + }; + + let name = core::any::type_name::<T>().as_bytes(); + let len = name.len().min(debug.name.len() - 1); + debug.name[..len].copy_from_slice(&name[..len]); + + if let Some(p) = alloc.ptr() { + unsafe { + let p = p.as_ptr(); + p.write_bytes(0x42, debug_offset - 2 * debug_len); + let cur = p.add(debug_offset - debug_len) as *mut AllocDebugData; + let prev = p.add(debug_offset - 2 * debug_len) as *mut AllocDebugData; + prev.copy_from(cur, 1); + cur.copy_from(&debug, 1); + }; + } + + GenericAlloc { + alloc, + alloc_size: size, + debug_offset, + padding, + _p: PhantomData, + } + } else { + GenericAlloc { + alloc: self.alloc(size + padding, align)?, + alloc_size: size, + debug_offset: 0, + padding, + _p: PhantomData, + } + }; + + if debug_enabled(DebugFlags::FillAllocations) { + if let Some(p) = ret.ptr() { + unsafe { (p.as_ptr() as *mut u8).write_bytes(0xaa, ret.size()) }; + } + } + + if padding != 0 { + if let Some(p) = ret.ptr() { + unsafe { + (p.as_ptr() as *mut u8) + .add(ret.size()) + .write_bytes(GUARD_MARKER, padding); + } + } + } + + Ok(ret) + } + + /// Allocate an object of a given type, without actually initializing the allocation. + /// + /// This is useful to directly call [`GpuObject::new_*`], without borrowing a reference to the + /// allocator for the entire duration (e.g. if further allocations need to happen inside the + /// callbacks). + fn alloc_object<T: GpuStruct>(&mut self) -> Result<GenericAlloc<T, Self::Raw>> { + let size = mem::size_of::<T::Raw<'static>>(); + let align = mem::align_of::<T::Raw<'static>>(); + + self.alloc_generic(size, align) + } + + /// Allocate an empty `GpuArray` of a given type and length. + fn array_empty<T: Sized + Default>( + &mut self, + count: usize, + ) -> Result<GpuArray<T, GenericAlloc<T, Self::Raw>>> { + let size = mem::size_of::<T>() * count; + let align = mem::align_of::<T>(); + + let alloc = self.alloc_generic(size, align)?; + GpuArray::<T, GenericAlloc<T, Self::Raw>>::empty(alloc, count) + } + + /// Allocate an empty `GpuOnlyArray` of a given type and length. + fn array_gpuonly<T: Sized + Default>( + &mut self, + count: usize, + ) -> Result<GpuOnlyArray<T, GenericAlloc<T, Self::Raw>>> { + let size = mem::size_of::<T>() * count; + let align = mem::align_of::<T>(); + + let alloc = self.alloc_generic(size, align)?; + GpuOnlyArray::<T, GenericAlloc<T, Self::Raw>>::new(alloc, count) + } +} + +/// A simple allocation backed by a separate GEM object. +/// +/// # Invariants +/// `ptr` is either None or a valid, non-null pointer to the CPU view of the object. +/// `gpu_ptr` is the GPU-side VA of the object. +pub(crate) struct SimpleAllocation { + dev: AsahiDevice, + ptr: Option<NonNull<u8>>, + gpu_ptr: u64, + size: usize, + vm: mmu::Vm, + obj: crate::gem::ObjectRef, +} + +/// SAFETY: `SimpleAllocation` just points to raw memory and should be safe to send across threads. +unsafe impl Send for SimpleAllocation {} +unsafe impl Sync for SimpleAllocation {} + +impl Drop for SimpleAllocation { + fn drop(&mut self) { + mod_dev_dbg!( + self.device(), + "SimpleAllocator: drop object @ {:#x}\n", + self.gpu_ptr() + ); + if debug_enabled(DebugFlags::FillAllocations) { + if let Ok(vmap) = self.obj.vmap() { + vmap.as_mut_slice().fill(0x42); + } + } + self.obj.drop_vm_mappings(self.vm.id()); + } +} + +impl RawAllocation for SimpleAllocation { + fn ptr(&self) -> Option<NonNull<u8>> { + self.ptr + } + fn gpu_ptr(&self) -> u64 { + self.gpu_ptr + } + fn size(&self) -> usize { + self.size + } + + fn device(&self) -> &AsahiDevice { + &self.dev + } +} + +/// A simple allocator that allocates each object as its own GEM object, aligned to the end of a +/// page. +/// +/// This is very slow, but it has the advantage that over-reads by the firmware or GPU will fault on +/// the guard page after the allocation, which can be useful to validate that the firmware's or +/// GPU's idea of object size what we expect. +pub(crate) struct SimpleAllocator { + dev: AsahiDevice, + start: u64, + end: u64, + prot: u32, + vm: mmu::Vm, + min_align: usize, + cpu_maps: bool, +} + +impl SimpleAllocator { + /// Create a new `SimpleAllocator` for a given address range and `Vm`. + #[allow(dead_code)] + #[allow(clippy::too_many_arguments)] + pub(crate) fn new( + dev: &AsahiDevice, + vm: &mmu::Vm, + start: u64, + end: u64, + min_align: usize, + prot: u32, + _block_size: usize, + mut cpu_maps: bool, + _name: fmt::Arguments<'_>, + _keep_garbage: bool, + ) -> Result<SimpleAllocator> { + if debug_enabled(DebugFlags::ForceCPUMaps) { + cpu_maps = true; + } + Ok(SimpleAllocator { + dev: dev.clone(), + vm: vm.clone(), + start, + end, + prot, + min_align, + cpu_maps, + }) + } +} + +impl Allocator for SimpleAllocator { + type Raw = SimpleAllocation; + + fn device(&self) -> &AsahiDevice { + &self.dev + } + + fn cpu_maps(&self) -> bool { + self.cpu_maps + } + + fn min_align(&self) -> usize { + self.min_align + } + + #[inline(never)] + fn alloc(&mut self, size: usize, align: usize) -> Result<SimpleAllocation> { + let size_aligned = (size + mmu::UAT_PGSZ - 1) & !mmu::UAT_PGMSK; + let align = self.min_align.max(align); + let offset = (size_aligned - size) & !(align - 1); + + mod_dev_dbg!( + &self.dev, + "SimpleAllocator::new: size={:#x} size_al={:#x} al={:#x} off={:#x}\n", + size, + size_aligned, + align, + offset + ); + + let mut obj = crate::gem::new_kernel_object(&self.dev, size_aligned)?; + let p = obj.vmap()?.as_mut_ptr() as *mut u8; + if debug_enabled(DebugFlags::FillAllocations) { + obj.vmap()?.as_mut_slice().fill(0xde); + } + let iova = obj.map_into_range( + &self.vm, + self.start, + self.end, + self.min_align.max(mmu::UAT_PGSZ) as u64, + self.prot, + true, + )?; + + let ptr = unsafe { p.add(offset) } as *mut u8; + let gpu_ptr = (iova + offset) as u64; + + mod_dev_dbg!( + &self.dev, + "SimpleAllocator::new -> {:#?} / {:#?} | {:#x} / {:#x}\n", + p, + ptr, + iova, + gpu_ptr + ); + + Ok(SimpleAllocation { + dev: self.dev.clone(), + ptr: NonNull::new(ptr), + gpu_ptr, + size, + vm: self.vm.clone(), + obj, + }) + } +} + +/// Inner data for an allocation from the heap allocator. +/// +/// This is wrapped in an `mm::Node`. +pub(crate) struct HeapAllocationInner { + dev: AsahiDevice, + ptr: Option<NonNull<u8>>, + real_size: usize, +} + +/// SAFETY: `SimpleAllocation` just points to raw memory and should be safe to send across threads. +unsafe impl Send for HeapAllocationInner {} +unsafe impl Sync for HeapAllocationInner {} + +/// Outer view of a heap allocation. +/// +/// This uses an Option<> so we can move the internal `Node` into the garbage pool when it gets +/// dropped. +/// +/// # Invariants +/// The `Option` must always be `Some(...)` while this object is alive. +pub(crate) struct HeapAllocation(Option<mm::Node<HeapAllocatorInner, HeapAllocationInner>>); + +impl Drop for HeapAllocation { + fn drop(&mut self) { + let node = self.0.take().unwrap(); + let size = node.size(); + let alloc = node.alloc_ref(); + + alloc.with(|a| { + if let Some(garbage) = a.garbage.as_mut() { + if garbage.try_push(node).is_err() { + dev_err!( + &a.dev, + "HeapAllocation[{}]::drop: Failed to keep garbage\n", + &*a.name, + ); + } + a.total_garbage += size as usize; + None + } else { + // We need to ensure node survives this scope, since dropping it + // will try to take the mm lock and deadlock us + Some(node) + } + }); + } +} + +impl mm::AllocInner<HeapAllocationInner> for HeapAllocatorInner { + fn drop_object( + &mut self, + start: u64, + _size: u64, + _color: usize, + obj: &mut HeapAllocationInner, + ) { + /* real_size == 0 means it's a guard node */ + if obj.real_size > 0 { + mod_dev_dbg!( + obj.dev, + "HeapAllocator[{}]: drop object @ {:#x} ({} bytes)\n", + &*self.name, + start, + obj.real_size, + ); + self.allocated -= obj.real_size; + } + } +} + +impl RawAllocation for HeapAllocation { + // SAFETY: This function must always return a valid pointer. + // Since the HeapAllocation contains a reference to the + // backing_objects array that contains the object backing this pointer, + // and objects are only ever added to it, this pointer is guaranteed to + // remain valid for the lifetime of the HeapAllocation. + fn ptr(&self) -> Option<NonNull<u8>> { + self.0.as_ref().unwrap().ptr + } + // SAFETY: This function must always return a valid GPU pointer. + // See the explanation in ptr(). + fn gpu_ptr(&self) -> u64 { + self.0.as_ref().unwrap().start() + } + fn size(&self) -> usize { + self.0.as_ref().unwrap().size() as usize + } + fn device(&self) -> &AsahiDevice { + &self.0.as_ref().unwrap().dev + } +} + +/// Inner data for a heap allocator which uses the DRM MM range allocator to manage the heap. +/// +/// This is wrapped by an `mm::Allocator`. +struct HeapAllocatorInner { + dev: AsahiDevice, + allocated: usize, + backing_objects: Vec<(crate::gem::ObjectRef, u64)>, + garbage: Option<Vec<mm::Node<HeapAllocatorInner, HeapAllocationInner>>>, + total_garbage: usize, + name: CString, + vm_id: u64, +} + +/// A heap allocator which uses the DRM MM range allocator to manage its objects. +/// +/// The heap is composed of a series of GEM objects. This implementation only ever grows the heap, +/// never shrinks it. +pub(crate) struct HeapAllocator { + dev: AsahiDevice, + start: u64, + end: u64, + top: u64, + prot: u32, + vm: mmu::Vm, + min_align: usize, + block_size: usize, + cpu_maps: bool, + guard_nodes: Vec<mm::Node<HeapAllocatorInner, HeapAllocationInner>>, + mm: mm::Allocator<HeapAllocatorInner, HeapAllocationInner>, + name: CString, +} + +static LOCK_KEY: LockClassKey = LockClassKey::new(); + +impl HeapAllocator { + /// Create a new HeapAllocator for a given `Vm` and address range. + #[allow(dead_code)] + #[allow(clippy::too_many_arguments)] + pub(crate) fn new( + dev: &AsahiDevice, + vm: &mmu::Vm, + start: u64, + end: u64, + min_align: usize, + prot: u32, + block_size: usize, + mut cpu_maps: bool, + name: fmt::Arguments<'_>, + keep_garbage: bool, + ) -> Result<HeapAllocator> { + if !min_align.is_power_of_two() { + return Err(EINVAL); + } + if debug_enabled(DebugFlags::ForceCPUMaps) { + cpu_maps = true; + } + + let name = CString::try_from_fmt(name)?; + + let inner = HeapAllocatorInner { + dev: dev.clone(), + allocated: 0, + backing_objects: Vec::new(), + // TODO: This clearly needs a try_clone() or similar + name: CString::try_from_fmt(fmt!("{}", &*name))?, + vm_id: vm.id(), + garbage: if keep_garbage { Some(Vec::new()) } else { None }, + total_garbage: 0, + }; + + let mm = mm::Allocator::new( + start, + end - start + 1, + inner, + c_str!("HeapAllocator"), + &LOCK_KEY, + )?; + + Ok(HeapAllocator { + dev: dev.clone(), + vm: vm.clone(), + start, + end, + top: start, + prot, + min_align, + block_size: block_size.max(min_align), + cpu_maps, + guard_nodes: Vec::new(), + mm, + name, + }) + } + + /// Add a new backing block of the given size to this heap. + /// + /// If CPU mapping is enabled, this also adds a guard node to the range allocator to ensure that + /// objects cannot straddle backing block boundaries, since we cannot easily create a contiguous + /// CPU VA mapping for them. This can create some fragmentation. If CPU mapping is disabled, we + /// skip the guard blocks, since the GPU view of the heap is always contiguous. + fn add_block(&mut self, size: usize) -> Result { + let size_aligned = (size + mmu::UAT_PGSZ - 1) & !mmu::UAT_PGMSK; + + mod_dev_dbg!( + &self.dev, + "HeapAllocator[{}]::add_block: size={:#x} size_al={:#x}\n", + &*self.name, + size, + size_aligned, + ); + + if self.top.saturating_add(size_aligned as u64) >= self.end { + dev_err!( + &self.dev, + "HeapAllocator[{}]::add_block: Exhausted VA space\n", + &*self.name, + ); + } + + let mut obj = crate::gem::new_kernel_object(&self.dev, size_aligned)?; + if self.cpu_maps && debug_enabled(DebugFlags::FillAllocations) { + obj.vmap()?.as_mut_slice().fill(0xde); + } + + let gpu_ptr = self.top; + if let Err(e) = obj.map_at(&self.vm, gpu_ptr, self.prot, self.cpu_maps) { + dev_err!( + &self.dev, + "HeapAllocator[{}]::add_block: Failed to map at {:#x} ({:?})\n", + &*self.name, + gpu_ptr, + e + ); + return Err(e); + } + + self.mm + .with_inner(|inner| inner.backing_objects.try_reserve(1))?; + + let mut new_top = self.top + size_aligned as u64; + if self.cpu_maps { + let guard = self.min_align.max(mmu::UAT_PGSZ); + mod_dev_dbg!( + &self.dev, + "HeapAllocator[{}]::add_block: Adding guard node {:#x}:{:#x}\n", + &*self.name, + new_top, + guard + ); + + let inner = HeapAllocationInner { + dev: self.dev.clone(), + ptr: None, + real_size: 0, + }; + + let node = match self.mm.reserve_node(inner, new_top, guard as u64, 0) { + Ok(a) => a, + Err(a) => { + dev_err!( + &self.dev, + "HeapAllocator[{}]::add_block: Failed to reserve guard node {:#x}:{:#x}: {:?}\n", + &*self.name, + guard, + new_top, + a + ); + return Err(EIO); + } + }; + + self.guard_nodes.try_push(node)?; + + new_top += guard as u64; + } + mod_dev_dbg!( + &self.dev, + "HeapAllocator[{}]::add_block: top={:#x}\n", + &*self.name, + new_top + ); + + self.mm + .with_inner(|inner| inner.backing_objects.try_push((obj, gpu_ptr)))?; + + self.top = new_top; + + cls_dev_dbg!( + MemStats, + &self.dev, + "{} Heap: grow to {} bytes\n", + &*self.name, + self.top - self.start + ); + + Ok(()) + } + + /// Find the backing object index that backs a given GPU address. + fn find_obj(&mut self, addr: u64) -> Result<usize> { + self.mm.with_inner(|inner| { + inner + .backing_objects + .binary_search_by(|obj| { + let start = obj.1; + let end = obj.1 + obj.0.size() as u64; + if start > addr { + Ordering::Greater + } else if end <= addr { + Ordering::Less + } else { + Ordering::Equal + } + }) + .or(Err(ENOENT)) + }) + } +} + +impl Allocator for HeapAllocator { + type Raw = HeapAllocation; + + fn device(&self) -> &AsahiDevice { + &self.dev + } + + fn cpu_maps(&self) -> bool { + self.cpu_maps + } + + fn min_align(&self) -> usize { + self.min_align + } + + fn alloc(&mut self, size: usize, align: usize) -> Result<HeapAllocation> { + if align != 0 && !align.is_power_of_two() { + return Err(EINVAL); + } + let align = self.min_align.max(align); + let size_aligned = (size + align - 1) & !(align - 1); + + mod_dev_dbg!( + &self.dev, + "HeapAllocator[{}]::new: size={:#x} size_al={:#x}\n", + &*self.name, + size, + size_aligned, + ); + + let inner = HeapAllocationInner { + dev: self.dev.clone(), + ptr: None, + real_size: size, + }; + + let mut node = match self.mm.insert_node_generic( + inner, + size_aligned as u64, + align as u64, + 0, + mm::InsertMode::Best, + ) { + Ok(a) => a, + Err(a) => { + dev_err!( + &self.dev, + "HeapAllocator[{}]::new: Failed to insert node of size {:#x} / align {:#x}: {:?}\n", + &*self.name, size_aligned, align, a + ); + return Err(a); + } + }; + + self.mm.with_inner(|inner| inner.allocated += size); + + let mut new_object = false; + let start = node.start(); + let end = start + node.size(); + if end > self.top { + if start > self.top { + dev_warn!( + self.dev, + "HeapAllocator[{}]::alloc: top={:#x}, start={:#x}\n", + &*self.name, + self.top, + start + ); + } + let block_size = self.block_size.max((end - self.top) as usize); + self.add_block(block_size)?; + new_object = true; + } + assert!(end <= self.top); + + if self.cpu_maps { + mod_dev_dbg!( + self.dev, + "HeapAllocator[{}]::alloc: mapping to CPU\n", + &*self.name + ); + + let idx = if new_object { + None + } else { + Some(match self.find_obj(start) { + Ok(a) => a, + Err(_) => { + dev_warn!( + self.dev, + "HeapAllocator[{}]::alloc: Failed to find object at {:#x}\n", + &*self.name, + start + ); + return Err(EIO); + } + }) + }; + let (obj_start, obj_size, p) = self.mm.with_inner(|inner| -> Result<_> { + let idx = idx.unwrap_or(inner.backing_objects.len() - 1); + let obj = &mut inner.backing_objects[idx]; + let p = obj.0.vmap()?.as_mut_ptr() as *mut u8; + Ok((obj.1, obj.0.size(), p)) + })?; + assert!(obj_start <= start); + assert!(obj_start + obj_size as u64 >= end); + node.as_mut().inner_mut().ptr = + NonNull::new(unsafe { p.add((start - obj_start) as usize) }); + mod_dev_dbg!( + self.dev, + "HeapAllocator[{}]::alloc: CPU pointer = {:?}\n", + &*self.name, + node.ptr + ); + } + + mod_dev_dbg!( + self.dev, + "HeapAllocator[{}]::alloc: Allocated {:#x} bytes @ {:#x}\n", + &*self.name, + end - start, + start + ); + + Ok(HeapAllocation(Some(node))) + } + + fn garbage(&self) -> (usize, usize) { + self.mm.with_inner(|inner| { + if let Some(g) = inner.garbage.as_ref() { + (g.len(), inner.total_garbage) + } else { + (0, 0) + } + }) + } + + fn collect_garbage(&mut self, count: usize) { + // Take the garbage out of the inner block, so we can safely drop it without deadlocking + let mut garbage = Vec::new(); + + if garbage.try_reserve(count).is_err() { + dev_crit!( + self.dev, + "HeapAllocator[{}]:collect_garbage: failed to reserve space\n", + &*self.name, + ); + return; + } + + self.mm.with_inner(|inner| { + if let Some(g) = inner.garbage.as_mut() { + for node in g.drain(0..count) { + inner.total_garbage -= node.size() as usize; + garbage + .try_push(node) + .expect("try_push() failed after reserve()"); + } + } + }); + } +} + +impl Drop for HeapAllocatorInner { + fn drop(&mut self) { + mod_dev_dbg!( + self.dev, + "HeapAllocator[{}]: dropping allocator\n", + &*self.name + ); + if self.allocated > 0 { + // This should never happen + dev_crit!( + self.dev, + "HeapAllocator[{}]: dropping with {} bytes allocated\n", + &*self.name, + self.allocated + ); + } else { + for mut obj in self.backing_objects.drain(..) { + obj.0.drop_vm_mappings(self.vm_id); + } + } + } +} diff --git a/drivers/gpu/drm/asahi/asahi.rs b/drivers/gpu/drm/asahi/asahi.rs new file mode 100644 index 000000000000..e511d83f4cd1 --- /dev/null +++ b/drivers/gpu/drm/asahi/asahi.rs @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +#![recursion_limit = "1024"] + +//! Driver for the Apple AGX GPUs found in Apple Silicon SoCs. + +mod alloc; +mod buffer; +mod channel; +mod debug; +mod driver; +mod event; +mod file; +mod float; +mod fw; +mod gem; +mod gpu; +mod hw; +mod initdata; +mod mem; +mod microseq; +mod mmu; +mod object; +mod place; +mod queue; +mod regs; +mod slotalloc; +mod util; +mod workqueue; + +use kernel::module_platform_driver; + +module_platform_driver! { + type: driver::AsahiDriver, + name: "asahi", + license: "Dual MIT/GPL", + params: { + debug_flags: u64 { + default: 0, + permissions: 0o644, + description: "Debug flags", + }, + fault_control: u32 { + default: 0, + permissions: 0, + description: "Fault control (0x0: hard faults, 0xb: macOS default)", + }, + initial_tvb_size: usize { + default: 0x8, + permissions: 0o644, + description: "Initial TVB size in blocks", + }, + }, +} diff --git a/drivers/gpu/drm/asahi/buffer.rs b/drivers/gpu/drm/asahi/buffer.rs new file mode 100644 index 000000000000..767ea161176f --- /dev/null +++ b/drivers/gpu/drm/asahi/buffer.rs @@ -0,0 +1,694 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Tiled Vertex Buffer management +//! +//! This module manages the Tiled Vertex Buffer, also known as the Parameter Buffer (in imgtec +//! parlance) or the tiler heap (on other architectures). This buffer holds transformed primitive +//! data between the vertex/tiling stage and the fragment stage. +//! +//! On AGX, the buffer is a heap of 128K blocks split into 32K pages (which must be aligned to a +//! multiple of 32K in VA space). The buffer can be shared between multiple render jobs, and each +//! will allocate pages from it during vertex processing and return them during fragment processing. +//! +//! If the buffer runs out of free pages, the vertex pass stops and a partial fragment pass occurs, +//! spilling the intermediate render target state to RAM (a partial render). This is all managed +//! transparently by the firmware. Since partial renders are less efficient, the kernel must grow +//! the heap in response to feedback from the firmware to avoid partial renders in the future. +//! Currently, we only ever grow the heap, and never shrink it. +//! +//! AGX also supports memoryless render targets, which can be used for intermediate results within +//! a render pass. To support partial renders, it seems the GPU/firmware has the ability to borrow +//! pages from the TVB buffer as a temporary render target buffer. Since this happens during a +//! partial render itself, if the buffer runs out of space, it requires synchronous growth in +//! response to a firmware interrupt. This is not currently supported, but may be in the future, +//! though it is unclear whether it is worth the effort. +//! +//! This module is also in charge of managing the temporary objects associated with a single render +//! pass, which includes the top-level tile array, the tail pointer cache, preemption buffers, and +//! other miscellaneous structures collectively managed as a "scene". +//! +//! To avoid runaway memory usage, there is a maximum size for buffers (at that point it's unlikely +//! that partial renders will incur much overhead over the buffer data access itself). This is +//! different depending on whether memoryless render targets are in use, and is currently hardcoded. +//! to the most common value used by macOS. + +use crate::debug::*; +use crate::fw::buffer; +use crate::fw::types::*; +use crate::util::*; +use crate::{alloc, fw, gpu, mmu, slotalloc}; +use crate::{box_in_place, place}; +use core::sync::atomic::Ordering; +use kernel::prelude::*; +use kernel::sync::{smutex::Mutex, Arc}; + +const DEBUG_CLASS: DebugFlags = DebugFlags::Buffer; + +/// There are 127 GPU/firmware-side buffer manager slots (yes, 127, not 128). +const NUM_BUFFERS: u32 = 127; + +/// Page size bits for buffer pages (32K). VAs must be aligned to this size. +pub(crate) const PAGE_SHIFT: usize = 15; +/// Page size for buffer pages. +pub(crate) const PAGE_SIZE: usize = 1 << PAGE_SHIFT; +/// Number of pages in a buffer block, which should be contiguous in VA space. +pub(crate) const PAGES_PER_BLOCK: usize = 4; +/// Size of a buffer block. +pub(crate) const BLOCK_SIZE: usize = PAGE_SIZE * PAGES_PER_BLOCK; + +/// Metadata about the tiling configuration for a scene. This is computed in the `render` module. +/// based on dimensions, tile size, and other info. +pub(crate) struct TileInfo { + /// Tile count in the X dimension. Tiles are always 32x32. + pub(crate) tiles_x: u32, + /// Tile count in the Y dimension. Tiles are always 32x32. + pub(crate) tiles_y: u32, + /// Total tile count. + pub(crate) tiles: u32, + /// Micro-tile width (16 or 32). + pub(crate) utile_width: u32, + /// Micro-tile height (16 or 32). + pub(crate) utile_height: u32, + // Macro-tiles in the X dimension. Always 4. + //pub(crate) mtiles_x: u32, + // Macro-tiles in the Y dimension. Always 4. + //pub(crate) mtiles_y: u32, + /// Tiles per macro-tile in the X dimension. + pub(crate) tiles_per_mtile_x: u32, + /// Tiles per macro-tile in the Y dimension. + pub(crate) tiles_per_mtile_y: u32, + // Total tiles per macro-tile. + //pub(crate) tiles_per_mtile: u32, + /// Micro-tiles per macro-tile in the X dimension. + pub(crate) utiles_per_mtile_x: u32, + /// Micro-tiles per macro-tile in the Y dimension. + pub(crate) utiles_per_mtile_y: u32, + // Total micro-tiles per macro-tile. + //pub(crate) utiles_per_mtile: u32, + /// Size of the top-level tilemap, in bytes (for all layers, one cluster). + pub(crate) tilemap_size: usize, + /// Size of the Tail Pointer Cache, in bytes (for all layers * clusters). + pub(crate) tpc_size: usize, + /// Number of blocks in the clustering meta buffer (for clustering). + pub(crate) meta1_blocks: u32, + /// Minimum number of TVB blocks for this render. + pub(crate) min_tvb_blocks: usize, + /// XXX: Allocation factor for cluster tilemaps and meta4. Always 2? + pub(crate) cluster_factor: usize, + /// Tiling parameter structure passed to firmware. + pub(crate) params: fw::vertex::raw::TilingParameters, +} + +/// A single scene, representing a render pass and its required buffers. +#[versions(AGX)] +#[derive(Debug)] +pub(crate) struct Scene { + object: GpuObject<buffer::Scene::ver>, + slot: u32, + rebind: bool, + preempt2_off: usize, + preempt3_off: usize, + // Note: these are dead code only on some version variants. + // It's easier to do this than to propagate the version conditionals everywhere. + #[allow(dead_code)] + meta2_off: usize, + #[allow(dead_code)] + meta3_off: usize, + #[allow(dead_code)] + meta4_off: usize, +} + +#[versions(AGX)] +impl Scene::ver { + /// Returns true if the buffer was bound to a fresh manager slot, and therefore needs an init + /// command before a render. + pub(crate) fn rebind(&self) -> bool { + self.rebind + } + + /// Returns the buffer manager slot this scene's buffer was bound to. + pub(crate) fn slot(&self) -> u32 { + self.slot + } + + /// Returns the GPU pointer to the [`buffer::Scene::ver`]. + pub(crate) fn gpu_pointer(&self) -> GpuPointer<'_, buffer::Scene::ver> { + self.object.gpu_pointer() + } + + /// Returns the GPU weak pointer to the [`buffer::Scene::ver`]. + pub(crate) fn weak_pointer(&self) -> GpuWeakPointer<buffer::Scene::ver> { + self.object.weak_pointer() + } + + /// Returns the GPU weak pointer to the kernel-side temp buffer. + /// (purpose unknown...) + pub(crate) fn kernel_buffer_pointer(&self) -> GpuWeakPointer<[u8]> { + self.object.buffer.inner.lock().kernel_buffer.weak_pointer() + } + + /// Returns the GPU pointer to the `buffer::Info::ver` object associated with this Scene. + pub(crate) fn buffer_pointer(&self) -> GpuPointer<'_, buffer::Info::ver> { + // We can't return the strong pointer directly since its lifetime crosses a lock, but we know + // its lifetime will be valid as long as &self since we hold a reference to the buffer, + // so just construct the strong pointer with the right lifetime here. + unsafe { self.weak_buffer_pointer().upgrade() } + } + + /// Returns the GPU weak pointer to the `buffer::Info::ver` object associated with this Scene. + pub(crate) fn weak_buffer_pointer(&self) -> GpuWeakPointer<buffer::Info::ver> { + self.object.buffer.inner.lock().info.weak_pointer() + } + + /// Returns the GPU pointer to the TVB heap metadata buffer. + pub(crate) fn tvb_heapmeta_pointer(&self) -> GpuPointer<'_, &'_ [u8]> { + self.object.tvb_heapmeta.gpu_pointer() + } + + /// Returns the GPU pointer to the top-level TVB tilemap buffer. + pub(crate) fn tvb_tilemap_pointer(&self) -> GpuPointer<'_, &'_ [u8]> { + self.object.tvb_tilemap.gpu_pointer() + } + + /// Returns the GPU pointer to the Tail Pointer Cache buffer. + pub(crate) fn tpc_pointer(&self) -> GpuPointer<'_, &'_ [u8]> { + self.object.tpc.gpu_pointer() + } + + /// Returns the GPU pointer to the first preemption scratch buffer. + pub(crate) fn preempt_buf_1_pointer(&self) -> GpuPointer<'_, &'_ [u8]> { + self.object.preempt_buf.gpu_pointer() + } + + /// Returns the GPU pointer to the second preemption scratch buffer. + pub(crate) fn preempt_buf_2_pointer(&self) -> GpuPointer<'_, &'_ [u8]> { + self.object + .preempt_buf + .gpu_offset_pointer(self.preempt2_off) + } + + /// Returns the GPU pointer to the third preemption scratch buffer. + pub(crate) fn preempt_buf_3_pointer(&self) -> GpuPointer<'_, &'_ [u8]> { + self.object + .preempt_buf + .gpu_offset_pointer(self.preempt3_off) + } + + /// Returns the GPU pointer to the per-cluster tilemap buffer, if clustering is enabled. + #[allow(dead_code)] + pub(crate) fn cluster_tilemaps_pointer(&self) -> Option<GpuPointer<'_, &'_ [u8]>> { + self.object + .clustering + .as_ref() + .map(|c| c.tilemaps.gpu_pointer()) + } + + /// Returns the GPU pointer to the clustering metadata 1 buffer, if clustering is enabled. + #[allow(dead_code)] + pub(crate) fn meta_1_pointer(&self) -> Option<GpuPointer<'_, &'_ [u8]>> { + self.object + .clustering + .as_ref() + .map(|c| c.meta.gpu_pointer()) + } + + /// Returns the GPU pointer to the clustering metadata 2 buffer, if clustering is enabled. + #[allow(dead_code)] + pub(crate) fn meta_2_pointer(&self) -> Option<GpuPointer<'_, &'_ [u8]>> { + self.object + .clustering + .as_ref() + .map(|c| c.meta.gpu_offset_pointer(self.meta2_off)) + } + + /// Returns the GPU pointer to the clustering metadata 3 buffer, if clustering is enabled. + #[allow(dead_code)] + pub(crate) fn meta_3_pointer(&self) -> Option<GpuPointer<'_, &'_ [u8]>> { + self.object + .clustering + .as_ref() + .map(|c| c.meta.gpu_offset_pointer(self.meta3_off)) + } + + /// Returns the GPU pointer to the clustering metadata 4 buffer, if clustering is enabled. + #[allow(dead_code)] + pub(crate) fn meta_4_pointer(&self) -> Option<GpuPointer<'_, &'_ [u8]>> { + self.object + .clustering + .as_ref() + .map(|c| c.meta.gpu_offset_pointer(self.meta4_off)) + } + + /// Returns the GPU pointer to an unknown buffer with incrementing numbers. + pub(crate) fn seq_buf_pointer(&self) -> GpuPointer<'_, &'_ [u64]> { + self.object.seq_buf.gpu_pointer() + } + + /// Returns the number of TVB bytes used for this scene. + pub(crate) fn used_bytes(&self) -> usize { + self.object + .with(|raw, _inner| raw.total_page_count.load(Ordering::Relaxed) as usize * PAGE_SIZE) + } + + /// Returns whether the TVB overflowed while rendering this scene. + pub(crate) fn overflowed(&self) -> bool { + self.object.with(|raw, _inner| { + raw.total_page_count.load(Ordering::Relaxed) + > raw.pass_page_count.load(Ordering::Relaxed) + }) + } +} + +#[versions(AGX)] +impl Drop for Scene::ver { + fn drop(&mut self) { + let mut inner = self.object.buffer.inner.lock(); + assert_ne!(inner.active_scenes, 0); + inner.active_scenes -= 1; + + if inner.active_scenes == 0 { + mod_pr_debug!( + "Buffer: no scenes left, dropping slot {}", + inner.active_slot.take().unwrap().slot() + ); + inner.active_slot = None; + } + } +} + +/// Inner data for a single TVB buffer object. +#[versions(AGX)] +struct BufferInner { + info: GpuObject<buffer::Info::ver>, + ualloc: Arc<Mutex<alloc::DefaultAllocator>>, + ualloc_priv: Arc<Mutex<alloc::DefaultAllocator>>, + blocks: Vec<GpuOnlyArray<u8>>, + max_blocks: usize, + max_blocks_nomemless: usize, + mgr: BufferManager, + active_scenes: usize, + active_slot: Option<slotalloc::Guard<()>>, + last_token: Option<slotalloc::SlotToken>, + tpc: Option<Arc<GpuArray<u8>>>, + kernel_buffer: GpuArray<u8>, + stats: GpuObject<buffer::Stats>, + preempt1_size: usize, + preempt2_size: usize, + preempt3_size: usize, + num_clusters: usize, +} + +/// Locked and reference counted TVB buffer. +#[versions(AGX)] +pub(crate) struct Buffer { + inner: Arc<Mutex<BufferInner::ver>>, +} + +#[versions(AGX)] +impl Buffer::ver { + /// Create a new Buffer for a given VM, given the per-VM allocators. + pub(crate) fn new( + gpu: &dyn gpu::GpuManager, + alloc: &mut gpu::KernelAllocators, + ualloc: Arc<Mutex<alloc::DefaultAllocator>>, + ualloc_priv: Arc<Mutex<alloc::DefaultAllocator>>, + mgr: &BufferManager, + ) -> Result<Buffer::ver> { + // These are the typical max numbers on macOS. + // 8GB machines have this halved. + let max_size: usize = 862_322_688; // bytes + let max_size_nomemless = max_size / 3; + + let max_blocks = max_size / BLOCK_SIZE; + let max_blocks_nomemless = max_size_nomemless / BLOCK_SIZE; + let max_pages = max_blocks * PAGES_PER_BLOCK; + let max_pages_nomemless = max_blocks_nomemless * PAGES_PER_BLOCK; + + let num_clusters = gpu.get_dyncfg().id.num_clusters as usize; + let num_clusters_adj = if num_clusters > 1 { + num_clusters + 1 + } else { + 1 + }; + + let preempt1_size = num_clusters_adj * gpu.get_cfg().preempt1_size; + let preempt2_size = num_clusters_adj * gpu.get_cfg().preempt2_size; + let preempt3_size = num_clusters_adj * gpu.get_cfg().preempt3_size; + + let inner = box_in_place!(buffer::Info::ver { + block_ctl: alloc.shared.new_default::<buffer::BlockControl>()?, + counter: alloc.shared.new_default::<buffer::Counter>()?, + page_list: ualloc_priv.lock().array_empty(max_pages)?, + block_list: ualloc_priv.lock().array_empty(max_blocks * 2)?, + })?; + + let info = alloc.private.new_boxed(inner, |inner, ptr| { + Ok(place!( + ptr, + buffer::raw::Info::ver { + gpu_counter: 0x0, + unk_4: 0, + last_id: 0x0, + cur_id: -1, + unk_10: 0x0, + gpu_counter2: 0x0, + unk_18: 0x0, + #[ver(V < V13_0B4)] + unk_1c: 0x0, + page_list: inner.page_list.gpu_pointer(), + page_list_size: (4 * max_pages).try_into()?, + page_count: AtomicU32::new(0), + max_blocks: max_blocks.try_into()?, + block_count: AtomicU32::new(0), + unk_38: 0x0, + block_list: inner.block_list.gpu_pointer(), + block_ctl: inner.block_ctl.gpu_pointer(), + last_page: AtomicU32::new(0), + gpu_page_ptr1: 0x0, + gpu_page_ptr2: 0x0, + unk_58: 0x0, + block_size: BLOCK_SIZE as u32, + unk_60: U64(0x0), + counter: inner.counter.gpu_pointer(), + unk_70: 0x0, + unk_74: 0x0, + unk_78: 0x0, + unk_7c: 0x0, + unk_80: 0x1, + max_pages: max_pages.try_into()?, + max_pages_nomemless: max_pages_nomemless.try_into()?, + unk_8c: 0x0, + unk_90: Default::default(), + } + )) + })?; + + // Technically similar to Scene below, let's play it safe. + let kernel_buffer = alloc.shared.array_empty(0x40)?; + let stats = alloc + .shared + .new_object(Default::default(), |_inner| buffer::raw::Stats { + reset: AtomicU32::from(1), + ..Default::default() + })?; + + Ok(Buffer::ver { + inner: Arc::try_new(Mutex::new(BufferInner::ver { + info, + ualloc, + ualloc_priv, + blocks: Vec::new(), + max_blocks, + max_blocks_nomemless, + mgr: mgr.clone(), + active_scenes: 0, + active_slot: None, + last_token: None, + tpc: None, + kernel_buffer, + stats, + preempt1_size, + preempt2_size, + preempt3_size, + num_clusters, + }))?, + }) + } + + /// Returns the total block count allocated to this Buffer. + pub(crate) fn block_count(&self) -> u32 { + self.inner.lock().blocks.len() as u32 + } + + /// Returns the total size in bytes allocated to this Buffer. + pub(crate) fn size(&self) -> usize { + self.block_count() as usize * BLOCK_SIZE + } + + /// Automatically grow the Buffer based on feedback from the statistics. + pub(crate) fn auto_grow(&self) -> Result<bool> { + let inner = self.inner.lock(); + + let used_pages = inner.stats.with(|raw, _inner| { + let used = raw.max_pages.load(Ordering::Relaxed); + raw.reset.store(1, Ordering::Release); + used as usize + }); + + let need_blocks = div_ceil(used_pages * 2, PAGES_PER_BLOCK).min(inner.max_blocks_nomemless); + let want_blocks = div_ceil(used_pages * 3, PAGES_PER_BLOCK).min(inner.max_blocks_nomemless); + + let cur_count = inner.blocks.len(); + + if need_blocks <= cur_count { + Ok(false) + } else { + // Grow to 3x requested size (same logic as macOS) + core::mem::drop(inner); + self.ensure_blocks(want_blocks)?; + Ok(true) + } + } + + /// Ensure that the buffer has at least a certain minimum size in blocks. + pub(crate) fn ensure_blocks(&self, min_blocks: usize) -> Result<bool> { + let mut inner = self.inner.lock(); + + let cur_count = inner.blocks.len(); + if cur_count >= min_blocks { + return Ok(false); + } + if min_blocks > inner.max_blocks { + return Err(ENOMEM); + } + + let add_blocks = min_blocks - cur_count; + let new_count = min_blocks; + + let mut new_blocks: Vec<GpuOnlyArray<u8>> = Vec::new(); + + // Allocate the new blocks first, so if it fails they will be dropped + let mut ualloc = inner.ualloc.lock(); + for _i in 0..add_blocks { + new_blocks.try_push(ualloc.array_gpuonly(BLOCK_SIZE)?)?; + } + core::mem::drop(ualloc); + + // Then actually commit them + inner.blocks.try_reserve(add_blocks)?; + + for (i, block) in new_blocks.into_iter().enumerate() { + let page_num = (block.gpu_va().get() >> PAGE_SHIFT) as u32; + + inner + .blocks + .try_push(block) + .expect("try_push() failed after try_reserve()"); + inner.info.block_list[2 * (cur_count + i)] = page_num; + for j in 0..PAGES_PER_BLOCK { + inner.info.page_list[(cur_count + i) * PAGES_PER_BLOCK + j] = page_num + j as u32; + } + } + + inner.info.block_ctl.with(|raw, _inner| { + raw.total.store(new_count as u32, Ordering::SeqCst); + raw.wptr.store(new_count as u32, Ordering::SeqCst); + }); + + let page_count = (new_count * PAGES_PER_BLOCK) as u32; + inner.info.with(|raw, _inner| { + raw.page_count.store(page_count, Ordering::Relaxed); + raw.block_count.store(new_count as u32, Ordering::Relaxed); + raw.last_page.store(page_count - 1, Ordering::Relaxed); + }); + + Ok(true) + } + + /// Create a new [`Scene::ver`] (render pass) using this buffer. + pub(crate) fn new_scene( + &self, + alloc: &mut gpu::KernelAllocators, + tile_info: &TileInfo, + ) -> Result<Scene::ver> { + let mut inner = self.inner.lock(); + + let tilemap_size = tile_info.tilemap_size; + let tpc_size = tile_info.tpc_size; + + // TODO: what is this exactly? + mod_pr_debug!("Buffer: Allocating TVB buffers\n"); + + // This seems to be a list, with 4x2 bytes of headers and 8 bytes per entry. + // On single-cluster devices, the used length always seems to be 1. + // On M1 Ultra, it can grow and usually doesn't exceed 8 * cluster_factor + // entries. macOS allocates a whole 64K * 0x80 for this, so let's go with + // that to be safe... + let user_buffer = inner.ualloc.lock().array_empty(if inner.num_clusters > 1 { + 0x10080 + } else { + 0x80 + })?; + + let tvb_heapmeta = inner.ualloc.lock().array_empty(0x200)?; + let tvb_tilemap = inner.ualloc.lock().array_empty(tilemap_size)?; + + mod_pr_debug!("Buffer: Allocating misc buffers\n"); + let preempt_buf = inner + .ualloc + .lock() + .array_empty(inner.preempt1_size + inner.preempt2_size + inner.preempt3_size)?; + + let mut seq_buf = inner.ualloc.lock().array_empty(0x800)?; + for i in 1..0x400 { + seq_buf[i] = (i + 1) as u64; + } + + let tpc = match inner.tpc.as_ref() { + Some(buf) if buf.len() >= tpc_size => buf.clone(), + _ => { + // MacOS allocates this as shared GPU+FW, but + // priv seems to work and might be faster? + // Needs to be FW-writable anyway, so ualloc + // won't work. + let buf = Arc::try_new( + inner + .ualloc_priv + .lock() + .array_empty((tpc_size + mmu::UAT_PGMSK) & !mmu::UAT_PGMSK)?, + )?; + inner.tpc = Some(buf.clone()); + buf + } + }; + + // Maybe: (4x4 macro tiles + 1 global page)*n, 32bit each (17*4*n) + let meta1_size = align(tile_info.meta1_blocks as usize * 0x44, 0x80); + // check + let meta2_size = align(0x190 * inner.num_clusters, 0x80); + let meta3_size = align(0x280 * inner.num_clusters, 0x80); + // Like user_buffer for single-cluster modes, 0x30 per cluster * the cluster + // factor. + let meta4_size = align(0x30 * inner.num_clusters * tile_info.cluster_factor, 0x80); + let meta_size = meta1_size + meta2_size + meta3_size + meta4_size; + + let clustering = if inner.num_clusters > 1 { + mod_pr_debug!("Buffer: Allocating clustering buffers\n"); + let tilemaps = inner + .ualloc + .lock() + .array_empty(inner.num_clusters * tilemap_size * tile_info.cluster_factor)?; + let meta = inner.ualloc.lock().array_empty(meta_size)?; + Some(buffer::ClusterBuffers { tilemaps, meta }) + } else { + None + }; + + let scene_inner = box_in_place!(buffer::Scene::ver { + user_buffer: user_buffer, + buffer: self.clone(), + tvb_heapmeta: tvb_heapmeta, + tvb_tilemap: tvb_tilemap, + tpc: tpc, + clustering: clustering, + preempt_buf: preempt_buf, + seq_buf: seq_buf, + })?; + + // Could be made strong, but we wind up with a deadlock if we try to grab the + // pointer through the inner.buffer path inside the closure. + let stats_pointer = inner.stats.weak_pointer(); + + // macOS allocates this as private. However, the firmware does not + // DC CIVAC this before reading it (like it does most other things), + // which causes odd cache incoherency bugs when combined with + // speculation on the firmware side (maybe). This doesn't happen + // on macOS because these structs are a circular pool that is mapped + // already initialized. Just mark this shared for now. + let scene = alloc.shared.new_boxed(scene_inner, |inner, ptr| { + Ok(place!( + ptr, + buffer::raw::Scene { + pass_page_count: AtomicU32::new(0), + unk_4: 0, + unk_8: U64(0), + unk_10: U64(0), + user_buffer: inner.user_buffer.gpu_pointer(), + unk_20: 0, + stats: stats_pointer, + total_page_count: AtomicU32::new(0), + unk_30: U64(0), + unk_38: U64(0), + } + )) + })?; + + let mut rebind = false; + + if inner.active_slot.is_none() { + assert_eq!(inner.active_scenes, 0); + + let slot = inner.mgr.0.get(inner.last_token)?; + rebind = slot.changed(); + + mod_pr_debug!("Buffer: assigning slot {} (rebind={})", slot.slot(), rebind); + + inner.last_token = Some(slot.token()); + inner.active_slot = Some(slot); + } + + inner.active_scenes += 1; + + Ok(Scene::ver { + object: scene, + slot: inner.active_slot.as_ref().unwrap().slot(), + rebind, + preempt2_off: inner.preempt1_size, + preempt3_off: inner.preempt1_size + inner.preempt2_size, + meta2_off: meta1_size, + meta3_off: meta1_size + meta2_size, + meta4_off: meta1_size + meta2_size + meta3_size, + }) + } + + /// Increment the buffer manager usage count. Should we done once we know the Scene is ready + /// to be committed and used in commands submitted to the GPU. + pub(crate) fn increment(&self) { + let inner = self.inner.lock(); + inner.info.counter.with(|raw, _inner| { + // We could use fetch_add, but the non-LSE atomic + // sequence Rust produces confuses the hypervisor. + // We have inner locked anyway, so this is not racy. + let v = raw.count.load(Ordering::Relaxed); + raw.count.store(v + 1, Ordering::Relaxed); + }); + } +} + +#[versions(AGX)] +impl Clone for Buffer::ver { + fn clone(&self) -> Self { + Buffer::ver { + inner: self.inner.clone(), + } + } +} + +/// The GPU-global buffer manager, used to allocate and release buffer slots from the pool. +pub(crate) struct BufferManager(slotalloc::SlotAllocator<()>); + +impl BufferManager { + pub(crate) fn new() -> Result<BufferManager> { + Ok(BufferManager(slotalloc::SlotAllocator::new( + NUM_BUFFERS, + (), + |_inner, _slot| (), + )?)) + } +} + +impl Clone for BufferManager { + fn clone(&self) -> Self { + BufferManager(self.0.clone()) + } +} diff --git a/drivers/gpu/drm/asahi/channel.rs b/drivers/gpu/drm/asahi/channel.rs new file mode 100644 index 000000000000..0b3c3b65c279 --- /dev/null +++ b/drivers/gpu/drm/asahi/channel.rs @@ -0,0 +1,542 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU ring buffer channels +//! +//! The GPU firmware use a set of ring buffer channels to receive commands from the driver and send +//! it notifications and status messages. +//! +//! These ring buffers mostly follow uniform conventions, so they share the same base +//! implementation. + +use crate::debug::*; +use crate::driver::AsahiDevice; +use crate::fw::channels::*; +use crate::fw::initdata::{raw, ChannelRing}; +use crate::fw::types::*; +use crate::{event, gpu, mem}; +use core::time::Duration; +use kernel::{c_str, delay::coarse_sleep, prelude::*, sync::Arc, time}; + +pub(crate) use crate::fw::channels::PipeType; + +/// A receive (FW->driver) channel. +pub(crate) struct RxChannel<T: RxChannelState, U: Copy + Default> +where + for<'a> <T as GpuStruct>::Raw<'a>: Debug + Default + Zeroed, +{ + ring: ChannelRing<T, U>, + // FIXME: needs feature(generic_const_exprs) + //rptr: [u32; T::SUB_CHANNELS], + rptr: [u32; 6], + count: u32, +} + +impl<T: RxChannelState, U: Copy + Default> RxChannel<T, U> +where + for<'a> <T as GpuStruct>::Raw<'a>: Debug + Default + Zeroed, +{ + /// Allocates a new receive channel with a given message count. + pub(crate) fn new(alloc: &mut gpu::KernelAllocators, count: usize) -> Result<RxChannel<T, U>> { + Ok(RxChannel { + ring: ChannelRing { + state: alloc.shared.new_default()?, + ring: alloc.shared.array_empty(T::SUB_CHANNELS * count)?, + }, + rptr: Default::default(), + count: count as u32, + }) + } + + /// Receives a message on the specified sub-channel index, optionally leaving in the ring + /// buffer. + /// + /// Returns None if the channel is empty. + fn get_or_peek(&mut self, index: usize, peek: bool) -> Option<U> { + self.ring.state.with(|raw, _inner| { + let wptr = T::wptr(raw, index); + let rptr = &mut self.rptr[index]; + if wptr == *rptr { + None + } else { + let off = self.count as usize * index; + let msg = self.ring.ring[off + *rptr as usize]; + if !peek { + *rptr = (*rptr + 1) % self.count; + T::set_rptr(raw, index, *rptr); + } + Some(msg) + } + }) + } + + /// Receives a message on the specified sub-channel index, and dequeues it from the ring buffer. + /// + /// Returns None if the channel is empty. + pub(crate) fn get(&mut self, index: usize) -> Option<U> { + self.get_or_peek(index, false) + } + + /// Peeks a message on the specified sub-channel index, leaving it in the ring buffer. + /// + /// Returns None if the channel is empty. + pub(crate) fn peek(&mut self, index: usize) -> Option<U> { + self.get_or_peek(index, true) + } +} + +/// A transmit (driver->FW) channel. +pub(crate) struct TxChannel<T: TxChannelState, U: Copy + Default> +where + for<'a> <T as GpuStruct>::Raw<'a>: Debug + Default + Zeroed, +{ + ring: ChannelRing<T, U>, + wptr: u32, + count: u32, +} + +impl<T: TxChannelState, U: Copy + Default> TxChannel<T, U> +where + for<'a> <T as GpuStruct>::Raw<'a>: Debug + Default + Zeroed, +{ + /// Allocates a new cached transmit channel with a given message count. + pub(crate) fn new(alloc: &mut gpu::KernelAllocators, count: usize) -> Result<TxChannel<T, U>> { + Ok(TxChannel { + ring: ChannelRing { + state: alloc.shared.new_default()?, + ring: alloc.private.array_empty(count)?, + }, + wptr: 0, + count: count as u32, + }) + } + + /// Allocates a new uncached transmit channel with a given message count. + pub(crate) fn new_uncached( + alloc: &mut gpu::KernelAllocators, + count: usize, + ) -> Result<TxChannel<T, U>> { + Ok(TxChannel { + ring: ChannelRing { + state: alloc.shared.new_default()?, + ring: alloc.shared.array_empty(count)?, + }, + wptr: 0, + count: count as u32, + }) + } + + /// Send a message to the ring, returning a cookie with the ring buffer position. + /// + /// This will poll/block if the ring is full, which we don't really expect to happen. + pub(crate) fn put(&mut self, msg: &U) -> u32 { + self.ring.state.with(|raw, _inner| { + let next_wptr = (self.wptr + 1) % self.count; + let mut rptr = T::rptr(raw); + if next_wptr == rptr { + pr_err!( + "TX ring buffer is full! Waiting... ({}, {})\n", + next_wptr, + rptr + ); + // TODO: block properly on incoming messages? + while next_wptr == rptr { + coarse_sleep(Duration::from_millis(8)); + rptr = T::rptr(raw); + } + } + self.ring.ring[self.wptr as usize] = *msg; + mem::sync(); + T::set_wptr(raw, next_wptr); + self.wptr = next_wptr; + }); + self.wptr + } + + /// Wait for a previously submitted message to be popped off of the ring by the GPU firmware. + /// + /// This busy-loops, and is intended to be used for rare cases when we need to block for + /// completion of a cache management or invalidation operation synchronously (which + /// the firmware normally completes fast enough not to be worth sleeping for). + /// If the poll takes longer than 10ms, this switches to sleeping between polls. + pub(crate) fn wait_for(&mut self, wptr: u32, timeout_ms: u64) -> Result { + const MAX_FAST_POLL: u64 = 10; + let start = time::ktime_get(); + let timeout_fast = start + Duration::from_millis(timeout_ms.min(MAX_FAST_POLL)); + let timeout_slow = start + Duration::from_millis(timeout_ms); + self.ring.state.with(|raw, _inner| { + while time::ktime_get() < timeout_fast { + if T::rptr(raw) == wptr { + return Ok(()); + } + mem::sync(); + } + while time::ktime_get() < timeout_slow { + if T::rptr(raw) == wptr { + return Ok(()); + } + coarse_sleep(Duration::from_millis(5)); + mem::sync(); + } + Err(ETIMEDOUT) + }) + } +} + +/// Device Control channel for global device management commands. +#[versions(AGX)] +pub(crate) struct DeviceControlChannel { + dev: AsahiDevice, + ch: TxChannel<ChannelState, DeviceControlMsg::ver>, +} + +#[versions(AGX)] +impl DeviceControlChannel::ver { + const COMMAND_TIMEOUT_MS: u64 = 1000; + + /// Allocate a new Device Control channel. + pub(crate) fn new( + dev: &AsahiDevice, + alloc: &mut gpu::KernelAllocators, + ) -> Result<DeviceControlChannel::ver> { + Ok(DeviceControlChannel::ver { + dev: dev.clone(), + ch: TxChannel::<ChannelState, DeviceControlMsg::ver>::new(alloc, 0x100)?, + }) + } + + /// Returns the raw `ChannelRing` structure to pass to firmware. + pub(crate) fn to_raw(&self) -> raw::ChannelRing<ChannelState, DeviceControlMsg::ver> { + self.ch.ring.to_raw() + } + + /// Submits a Device Control command. + pub(crate) fn send(&mut self, msg: &DeviceControlMsg::ver) -> u32 { + cls_dev_dbg!(DeviceControlCh, self.dev, "DeviceControl: {:?}\n", msg); + self.ch.put(msg) + } + + /// Waits for a previously submitted Device Control command to complete. + pub(crate) fn wait_for(&mut self, wptr: u32) -> Result { + self.ch.wait_for(wptr, Self::COMMAND_TIMEOUT_MS) + } +} + +/// Pipe channel to submit WorkQueue execution requests. +#[versions(AGX)] +pub(crate) struct PipeChannel { + dev: AsahiDevice, + ch: TxChannel<ChannelState, PipeMsg::ver>, +} + +#[versions(AGX)] +impl PipeChannel::ver { + /// Allocate a new Pipe submission channel. + pub(crate) fn new( + dev: &AsahiDevice, + alloc: &mut gpu::KernelAllocators, + ) -> Result<PipeChannel::ver> { + Ok(PipeChannel::ver { + dev: dev.clone(), + ch: TxChannel::<ChannelState, PipeMsg::ver>::new(alloc, 0x100)?, + }) + } + + /// Returns the raw `ChannelRing` structure to pass to firmware. + pub(crate) fn to_raw(&self) -> raw::ChannelRing<ChannelState, PipeMsg::ver> { + self.ch.ring.to_raw() + } + + /// Submits a Pipe kick command to the firmware. + pub(crate) fn send(&mut self, msg: &PipeMsg::ver) { + cls_dev_dbg!(PipeCh, self.dev, "Pipe: {:?}\n", msg); + self.ch.put(msg); + } +} + +/// Firmware Control channel, used for secure cache flush requests. +pub(crate) struct FwCtlChannel { + dev: AsahiDevice, + ch: TxChannel<FwCtlChannelState, FwCtlMsg>, +} + +impl FwCtlChannel { + const COMMAND_TIMEOUT_MS: u64 = 1000; + + /// Allocate a new Firmware Control channel. + pub(crate) fn new( + dev: &AsahiDevice, + alloc: &mut gpu::KernelAllocators, + ) -> Result<FwCtlChannel> { + Ok(FwCtlChannel { + dev: dev.clone(), + ch: TxChannel::<FwCtlChannelState, FwCtlMsg>::new_uncached(alloc, 0x100)?, + }) + } + + /// Returns the raw `ChannelRing` structure to pass to firmware. + pub(crate) fn to_raw(&self) -> raw::ChannelRing<FwCtlChannelState, FwCtlMsg> { + self.ch.ring.to_raw() + } + + /// Submits a Firmware Control command to the firmware. + pub(crate) fn send(&mut self, msg: &FwCtlMsg) -> u32 { + cls_dev_dbg!(FwCtlCh, self.dev, "FwCtl: {:?}\n", msg); + self.ch.put(msg) + } + + /// Waits for a previously submitted Firmware Control command to complete. + pub(crate) fn wait_for(&mut self, wptr: u32) -> Result { + self.ch.wait_for(wptr, Self::COMMAND_TIMEOUT_MS) + } +} + +/// Event channel, used to notify the driver of command completions, GPU faults and errors, and +/// other events. +pub(crate) struct EventChannel { + dev: AsahiDevice, + ch: RxChannel<ChannelState, RawEventMsg>, + mgr: Arc<event::EventManager>, + gpu: Option<Arc<dyn gpu::GpuManager>>, +} + +impl EventChannel { + /// Allocate a new Event channel. + pub(crate) fn new( + dev: &AsahiDevice, + alloc: &mut gpu::KernelAllocators, + mgr: Arc<event::EventManager>, + ) -> Result<EventChannel> { + Ok(EventChannel { + dev: dev.clone(), + ch: RxChannel::<ChannelState, RawEventMsg>::new(alloc, 0x100)?, + mgr, + gpu: None, + }) + } + + /// Registers the managing `Gpu` instance that will handle events on this channel. + pub(crate) fn set_manager(&mut self, gpu: Arc<dyn gpu::GpuManager>) { + self.gpu = Some(gpu); + } + + /// Returns the raw `ChannelRing` structure to pass to firmware. + pub(crate) fn to_raw(&self) -> raw::ChannelRing<ChannelState, RawEventMsg> { + self.ch.ring.to_raw() + } + + /// Polls for new Event messages on this ring. + pub(crate) fn poll(&mut self) { + while let Some(msg) = self.ch.get(0) { + let tag = unsafe { msg.raw.0 }; + match tag { + 0..=EVENT_MAX => { + let msg = unsafe { msg.msg }; + + cls_dev_dbg!(EventCh, self.dev, "Event: {:?}\n", msg); + match msg { + EventMsg::Fault => match self.gpu.as_ref() { + Some(gpu) => gpu.handle_fault(), + None => { + dev_crit!(self.dev, "EventChannel: No GPU manager available!\n") + } + }, + EventMsg::Timeout { + counter, + event_slot, + .. + } => match self.gpu.as_ref() { + Some(gpu) => gpu.handle_timeout(counter, event_slot), + None => { + dev_crit!(self.dev, "EventChannel: No GPU manager available!\n") + } + }, + EventMsg::Flag { firing, .. } => { + for (i, flags) in firing.iter().enumerate() { + for j in 0..32 { + if flags & (1u32 << j) != 0 { + self.mgr.signal((i * 32 + j) as u32); + } + } + } + } + msg => { + dev_crit!(self.dev, "Unknown event message: {:?}\n", msg); + } + } + } + _ => { + dev_warn!(self.dev, "Unknown event message: {:?}\n", unsafe { + msg.raw + }); + } + } + } + } +} + +/// Firmware Log channel. This one is pretty special, since it has 6 sub-channels (for different log +/// levels), and it also uses a side buffer to actually hold the log messages, only passing around +/// pointers in the main buffer. +pub(crate) struct FwLogChannel { + dev: AsahiDevice, + ch: RxChannel<FwLogChannelState, RawFwLogMsg>, + payload_buf: GpuArray<RawFwLogPayloadMsg>, +} + +impl FwLogChannel { + const RING_SIZE: usize = 0x100; + const BUF_SIZE: usize = 0x100; + + /// Allocate a new Firmware Log channel. + pub(crate) fn new( + dev: &AsahiDevice, + alloc: &mut gpu::KernelAllocators, + ) -> Result<FwLogChannel> { + Ok(FwLogChannel { + dev: dev.clone(), + ch: RxChannel::<FwLogChannelState, RawFwLogMsg>::new(alloc, Self::RING_SIZE)?, + payload_buf: alloc + .shared + .array_empty(Self::BUF_SIZE * FwLogChannelState::SUB_CHANNELS)?, + }) + } + + /// Returns the raw `ChannelRing` structure to pass to firmware. + pub(crate) fn to_raw(&self) -> raw::ChannelRing<FwLogChannelState, RawFwLogMsg> { + self.ch.ring.to_raw() + } + + /// Returns the GPU pointers to the firmware log payload buffer. + pub(crate) fn get_buf(&self) -> GpuWeakPointer<[RawFwLogPayloadMsg]> { + self.payload_buf.weak_pointer() + } + + /// Polls for new log messages on all sub-rings. + pub(crate) fn poll(&mut self) { + for i in 0..=FwLogChannelState::SUB_CHANNELS - 1 { + while let Some(msg) = self.ch.peek(i) { + cls_dev_dbg!(FwLogCh, self.dev, "FwLog{}: {:?}\n", i, msg); + if msg.msg_type != 2 { + dev_warn!(self.dev, "Unknown FWLog{} message: {:?}\n", i, msg); + self.ch.get(i); + continue; + } + if msg.msg_index.0 as usize >= Self::BUF_SIZE { + dev_warn!( + self.dev, + "FWLog{} message index out of bounds: {:?}\n", + i, + msg + ); + self.ch.get(i); + continue; + } + let index = Self::BUF_SIZE * i + msg.msg_index.0 as usize; + let payload = &self.payload_buf.as_slice()[index]; + if payload.msg_type != 3 { + dev_warn!(self.dev, "Unknown FWLog{} payload: {:?}\n", i, payload); + self.ch.get(i); + continue; + } + let msg = if let Some(end) = payload.msg.iter().position(|&r| r == 0) { + CStr::from_bytes_with_nul(&(*payload.msg)[..end + 1]) + .unwrap_or(c_str!("cstr_err")) + } else { + dev_warn!( + self.dev, + "FWLog{} payload not NUL-terminated: {:?}\n", + i, + payload + ); + self.ch.get(i); + continue; + }; + match i { + 0 => dev_dbg!(self.dev, "FWLog: {}\n", msg), + 1 => dev_info!(self.dev, "FWLog: {}\n", msg), + 2 => dev_notice!(self.dev, "FWLog: {}\n", msg), + 3 => dev_warn!(self.dev, "FWLog: {}\n", msg), + 4 => dev_err!(self.dev, "FWLog: {}\n", msg), + 5 => dev_crit!(self.dev, "FWLog: {}\n", msg), + _ => (), + }; + self.ch.get(i); + } + } + } +} + +pub(crate) struct KTraceChannel { + dev: AsahiDevice, + ch: RxChannel<ChannelState, RawKTraceMsg>, +} + +/// KTrace channel, used to receive detailed execution trace markers from the firmware. +/// We currently disable this in initdata, so no messages are expected here at this time. +impl KTraceChannel { + /// Allocate a new KTrace channel. + pub(crate) fn new( + dev: &AsahiDevice, + alloc: &mut gpu::KernelAllocators, + ) -> Result<KTraceChannel> { + Ok(KTraceChannel { + dev: dev.clone(), + ch: RxChannel::<ChannelState, RawKTraceMsg>::new(alloc, 0x200)?, + }) + } + + /// Returns the raw `ChannelRing` structure to pass to firmware. + pub(crate) fn to_raw(&self) -> raw::ChannelRing<ChannelState, RawKTraceMsg> { + self.ch.ring.to_raw() + } + + /// Polls for new KTrace messages on this ring. + pub(crate) fn poll(&mut self) { + while let Some(msg) = self.ch.get(0) { + cls_dev_dbg!(KTraceCh, self.dev, "KTrace: {:?}\n", msg); + } + } +} + +/// Statistics channel, reporting power-related statistics to the driver. +/// Not really implemented other than debug logs yet... +#[versions(AGX)] +pub(crate) struct StatsChannel { + dev: AsahiDevice, + ch: RxChannel<ChannelState, RawStatsMsg::ver>, +} + +#[versions(AGX)] +impl StatsChannel::ver { + /// Allocate a new Statistics channel. + pub(crate) fn new( + dev: &AsahiDevice, + alloc: &mut gpu::KernelAllocators, + ) -> Result<StatsChannel::ver> { + Ok(StatsChannel::ver { + dev: dev.clone(), + ch: RxChannel::<ChannelState, RawStatsMsg::ver>::new(alloc, 0x100)?, + }) + } + + /// Returns the raw `ChannelRing` structure to pass to firmware. + pub(crate) fn to_raw(&self) -> raw::ChannelRing<ChannelState, RawStatsMsg::ver> { + self.ch.ring.to_raw() + } + + /// Polls for new statistics messages on this ring. + pub(crate) fn poll(&mut self) { + while let Some(msg) = self.ch.get(0) { + let tag = unsafe { msg.raw.0 }; + match tag { + 0..=STATS_MAX::ver => { + let msg = unsafe { msg.msg }; + cls_dev_dbg!(StatsCh, self.dev, "Stats: {:?}\n", msg); + } + _ => { + pr_warn!("Unknown stats message: {:?}\n", unsafe { msg.raw }); + } + } + } + } +} diff --git a/drivers/gpu/drm/asahi/debug.rs b/drivers/gpu/drm/asahi/debug.rs new file mode 100644 index 000000000000..2f3a70e04cfd --- /dev/null +++ b/drivers/gpu/drm/asahi/debug.rs @@ -0,0 +1,129 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +#![allow(dead_code)] + +//! Debug enable/disable flags and convenience macros + +#[allow(unused_imports)] +pub(crate) use super::{cls_dev_dbg, cls_pr_debug, debug, mod_dev_dbg, mod_pr_debug}; +use core::sync::atomic::{AtomicU64, Ordering}; + +static DEBUG_FLAGS: AtomicU64 = AtomicU64::new(0); + +/// Debug flag bit indices +pub(crate) enum DebugFlags { + // 0-3: Memory-related debug + Mmu = 0, + Alloc = 1, + Gem = 2, + Object = 3, + + // 4-7: Firmware objects and resources + Event = 4, + Buffer = 5, + WorkQueue = 6, + + // 8-13: DRM interface, rendering, compute, GPU globals + Gpu = 8, + File = 9, + Queue = 10, + Render = 11, + Compute = 12, + + // 14-15: Misc stats + MemStats = 14, + TVBStats = 15, + + // 16-22: Channels + FwLogCh = 16, + KTraceCh = 17, + StatsCh = 18, + EventCh = 19, + PipeCh = 20, + DeviceControlCh = 21, + FwCtlCh = 22, + + // 32-35: Allocator debugging + FillAllocations = 32, + DebugAllocations = 33, + DetectOverflows = 34, + ForceCPUMaps = 35, + + // 36-: Behavior flags + ConservativeTlbi = 36, + KeepGpuPowered = 37, + WaitForPowerOff = 38, + NoGpuRecovery = 39, + DisableClustering = 40, + + // 48-: Misc + Debug0 = 48, + Debug1 = 49, + Debug2 = 50, + Debug3 = 51, + Debug4 = 52, + Debug5 = 53, + Debug6 = 54, + Debug7 = 55, +} + +/// Update the cached global debug flags from the module parameter +pub(crate) fn update_debug_flags() { + let flags = { + let lock = crate::THIS_MODULE.kernel_param_lock(); + *crate::debug_flags.read(&lock) + }; + + DEBUG_FLAGS.store(flags, Ordering::Relaxed); +} + +/// Check whether debug is enabled for a given flag +#[inline(always)] +pub(crate) fn debug_enabled(flag: DebugFlags) -> bool { + DEBUG_FLAGS.load(Ordering::Relaxed) & 1 << (flag as usize) != 0 +} + +/// Run some code only if debug is enabled for the calling module +#[macro_export] +macro_rules! debug { + ($($arg:tt)*) => { + if $crate::debug::debug_enabled(DEBUG_CLASS) { + $($arg)* + } + }; +} + +/// pr_info!() if debug is enabled for the calling module +#[macro_export] +macro_rules! mod_pr_debug ( + ($($arg:tt)*) => ( + $crate::debug! { ::kernel::pr_info! ( $($arg)* ); } + ) +); + +/// dev_info!() if debug is enabled for the calling module +#[macro_export] +macro_rules! mod_dev_dbg ( + ($($arg:tt)*) => ( + $crate::debug! { ::kernel::dev_info! ( $($arg)* ); } + ) +); + +/// pr_info!() if debug is enabled for a specific module +#[macro_export] +macro_rules! cls_pr_debug ( + ($cls:ident, $($arg:tt)*) => ( + if $crate::debug::debug_enabled($crate::debug::DebugFlags::$cls) { + ::kernel::pr_info! ( $($arg)* ); + } + ) +); + +/// dev_info!() if debug is enabled for a specific module +#[macro_export] +macro_rules! cls_dev_dbg ( + ($cls:ident, $($arg:tt)*) => ( + if $crate::debug::debug_enabled($crate::debug::DebugFlags::$cls) { + ::kernel::dev_info! ( $($arg)* ); + } + ) +); diff --git a/drivers/gpu/drm/asahi/driver.rs b/drivers/gpu/drm/asahi/driver.rs new file mode 100644 index 000000000000..d49d8b1934a4 --- /dev/null +++ b/drivers/gpu/drm/asahi/driver.rs @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Top-level GPU driver implementation. + +use kernel::{ + c_str, device, drm, drm::drv, drm::ioctl, error::Result, of, platform, prelude::*, sync::Arc, +}; + +use crate::{debug, file, gem, gpu, hw, regs}; + +use kernel::device::RawDevice; +use kernel::macros::vtable; + +/// Driver metadata +const INFO: drv::DriverInfo = drv::DriverInfo { + major: 0, + minor: 0, + patchlevel: 0, + name: c_str!("asahi"), + desc: c_str!("Apple AGX Graphics"), + date: c_str!("20220831"), +}; + +/// Device data for the driver registration. +/// +/// Holds a reference to the top-level `GpuManager` object. +pub(crate) struct AsahiData { + pub(crate) dev: device::Device, + pub(crate) gpu: Arc<dyn gpu::GpuManager>, +} + +/// Convenience type alias for the `device::Data` type for this driver. +type DeviceData = device::Data<drv::Registration<AsahiDriver>, regs::Resources, AsahiData>; + +/// Empty struct representing this driver. +pub(crate) struct AsahiDriver; + +/// Convenience type alias for the DRM device type for this driver. +pub(crate) type AsahiDevice = kernel::drm::device::Device<AsahiDriver>; + +/// DRM Driver implementation for `AsahiDriver`. +#[vtable] +impl drv::Driver for AsahiDriver { + /// Our `DeviceData` type, reference-counted + type Data = Arc<DeviceData>; + /// Our `File` type. + type File = file::File; + /// Our `Object` type. + type Object = gem::Object; + + const INFO: drv::DriverInfo = INFO; + const FEATURES: u32 = + drv::FEAT_GEM | drv::FEAT_RENDER | drv::FEAT_SYNCOBJ | drv::FEAT_SYNCOBJ_TIMELINE; + + kernel::declare_drm_ioctls! { + (ASAHI_GET_PARAMS, drm_asahi_get_params, + ioctl::RENDER_ALLOW, file::File::get_params), + (ASAHI_VM_CREATE, drm_asahi_vm_create, + ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::vm_create), + (ASAHI_VM_DESTROY, drm_asahi_vm_destroy, + ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::vm_destroy), + (ASAHI_GEM_CREATE, drm_asahi_gem_create, + ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::gem_create), + (ASAHI_GEM_MMAP_OFFSET, drm_asahi_gem_mmap_offset, + ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::gem_mmap_offset), + (ASAHI_GEM_BIND, drm_asahi_gem_bind, + ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::gem_bind), + (ASAHI_QUEUE_CREATE, drm_asahi_queue_create, + ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::queue_create), + (ASAHI_QUEUE_DESTROY, drm_asahi_queue_destroy, + ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::queue_destroy), + (ASAHI_SUBMIT, drm_asahi_submit, + ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::submit), + } +} + +// OF Device ID table. +kernel::define_of_id_table! {ASAHI_ID_TABLE, &'static hw::HwConfig, [ + (of::DeviceId::Compatible(b"apple,agx-t8103"), Some(&hw::t8103::HWCONFIG)), + (of::DeviceId::Compatible(b"apple,agx-t8112"), Some(&hw::t8112::HWCONFIG)), + (of::DeviceId::Compatible(b"apple,agx-t6000"), Some(&hw::t600x::HWCONFIG_T6000)), + (of::DeviceId::Compatible(b"apple,agx-t6001"), Some(&hw::t600x::HWCONFIG_T6001)), + (of::DeviceId::Compatible(b"apple,agx-t6002"), Some(&hw::t600x::HWCONFIG_T6002)), +]} + +/// Platform Driver implementation for `AsahiDriver`. +impl platform::Driver for AsahiDriver { + /// Our `DeviceData` type, reference-counted + type Data = Arc<DeviceData>; + /// Data associated with each hardware ID. + type IdInfo = &'static hw::HwConfig; + + // Assign the above OF ID table to this driver. + kernel::driver_of_id_table!(ASAHI_ID_TABLE); + + /// Device probe function. + fn probe( + pdev: &mut platform::Device, + id_info: Option<&Self::IdInfo>, + ) -> Result<Arc<DeviceData>> { + debug::update_debug_flags(); + + let dev = device::Device::from_dev(pdev); + + dev_info!(dev, "Probing...\n"); + + let cfg = id_info.ok_or(ENODEV)?; + + pdev.set_dma_masks((1 << cfg.uat_oas) - 1)?; + + let res = regs::Resources::new(pdev)?; + + // Initialize misc MMIO + res.init_mmio()?; + + // Start the coprocessor CPU, so UAT can initialize the handoff + res.start_cpu()?; + + let node = dev.of_node().ok_or(EIO)?; + let compat: Vec<u32> = node.get_property(c_str!("apple,firmware-compat"))?; + + let reg = drm::drv::Registration::<AsahiDriver>::new(&dev)?; + let gpu = match (cfg.gpu_gen, compat.as_slice()) { + (hw::GpuGen::G13, &[12, 3, 0]) => { + gpu::GpuManagerG13V12_3::new(reg.device(), &res, cfg)? as Arc<dyn gpu::GpuManager> + } + (hw::GpuGen::G13, &[13, 2, 0]) => { + gpu::GpuManagerG13V13_2::new(reg.device(), &res, cfg)? as Arc<dyn gpu::GpuManager> + } + (hw::GpuGen::G14, &[12, 4, 0]) => { + gpu::GpuManagerG14V12_4::new(reg.device(), &res, cfg)? as Arc<dyn gpu::GpuManager> + } + (hw::GpuGen::G14, &[13, 2, 0]) => { + gpu::GpuManagerG14V13_2::new(reg.device(), &res, cfg)? as Arc<dyn gpu::GpuManager> + } + _ => { + dev_info!( + dev, + "Unsupported GPU/firmware combination ({:?}, {:?})\n", + cfg.gpu_gen, + compat + ); + return Err(ENODEV); + } + }; + + let data = + kernel::new_device_data!(reg, res, AsahiData { dev, gpu }, "Asahi::Registrations")?; + + let data = Arc::<DeviceData>::from(data); + + data.gpu.init()?; + + kernel::drm_device_register!( + data.registrations().ok_or(ENXIO)?.as_pinned_mut(), + data.clone(), + 0 + )?; + + dev_info!(data.dev, "Probed!\n"); + Ok(data) + } +} + +// Export the OF ID table as a module ID table, to make modpost/autoloading work. +kernel::module_of_id_table!(MOD_TABLE, ASAHI_ID_TABLE); diff --git a/drivers/gpu/drm/asahi/event.rs b/drivers/gpu/drm/asahi/event.rs new file mode 100644 index 000000000000..ccf00e4104be --- /dev/null +++ b/drivers/gpu/drm/asahi/event.rs @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU event manager +//! +//! The GPU firmware manages work completion by using event objects (Apple calls them "stamps"), +//! which are monotonically incrementing counters. There are a fixed number of objects, and +//! they are managed with a `SlotAllocator`. +//! +//! This module manages the set of available events and lets users compute expected values. +//! It also manages signaling owners when the GPU firmware reports that an event fired. + +use crate::debug::*; +use crate::fw::types::*; +use crate::{gpu, slotalloc, workqueue}; +use core::cmp; +use core::sync::atomic::Ordering; +use kernel::prelude::*; +use kernel::sync::Arc; + +const DEBUG_CLASS: DebugFlags = DebugFlags::Event; + +/// Number of events managed by the firmware. +const NUM_EVENTS: u32 = 128; + +/// Inner data associated with a given event slot. +pub(crate) struct EventInner { + /// CPU pointer to the driver notification event stamp + stamp: *const AtomicU32, + /// GPU pointer to the driver notification event stamp + gpu_stamp: GpuWeakPointer<Stamp>, + /// GPU pointer to the firmware-internal event stamp + gpu_fw_stamp: GpuWeakPointer<FwStamp>, +} + +/// SAFETY: The event slots are safe to send across threads. +unsafe impl Send for EventInner {} + +/// Alias for an event token, which allows requesting the same event. +pub(crate) type Token = slotalloc::SlotToken; +/// Alias for an allocated `Event` that has a slot. +pub(crate) type Event = slotalloc::Guard<EventInner>; + +/// Represents a given stamp value for an event. +#[derive(Eq, PartialEq, Copy, Clone, Debug)] +#[repr(transparent)] +pub(crate) struct EventValue(u32); + +impl EventValue { + /// Returns the `EventValue` that succeeds this one. + pub(crate) fn next(&self) -> EventValue { + EventValue(self.0.wrapping_add(0x100)) + } + + /// Increments this `EventValue` in place. + pub(crate) fn increment(&mut self) { + self.0 = self.0.wrapping_add(0x100); + } + + /* Not used + /// Increments this `EventValue` in place by a certain count. + pub(crate) fn add(&mut self, val: u32) { + self.0 = self + .0 + .wrapping_add(val.checked_mul(0x100).expect("Adding too many events")); + } + */ + + /// Increments this `EventValue` in place by a certain count. + pub(crate) fn sub(&mut self, val: u32) { + self.0 = self + .0 + .wrapping_sub(val.checked_mul(0x100).expect("Subtracting too many events")); + } + + /// Computes the delta between this event and another event. + pub(crate) fn delta(&self, other: &EventValue) -> i32 { + (self.0.wrapping_sub(other.0) as i32) >> 8 + } +} + +impl PartialOrd for EventValue { + fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for EventValue { + fn cmp(&self, other: &Self) -> cmp::Ordering { + self.delta(other).cmp(&0) + } +} + +impl EventInner { + /// Returns the GPU pointer to the driver notification stamp + pub(crate) fn stamp_pointer(&self) -> GpuWeakPointer<Stamp> { + self.gpu_stamp + } + + /// Returns the GPU pointer to the firmware internal stamp + pub(crate) fn fw_stamp_pointer(&self) -> GpuWeakPointer<FwStamp> { + self.gpu_fw_stamp + } + + /// Fetches the current event value from shared memory + pub(crate) fn current(&self) -> EventValue { + // SAFETY: The pointer is always valid as constructed in + // EventManager below, and outside users cannot construct + // new EventInners, nor move or copy them, and Guards as + // returned by the SlotAllocator hold a reference to the + // SlotAllocator containing the EventManagerInner, which + // keeps the GpuObject the stamp is contained within alive. + EventValue(unsafe { &*self.stamp }.load(Ordering::Acquire)) + } +} + +impl slotalloc::SlotItem for EventInner { + type Data = EventManagerInner; + + fn release(&mut self, data: &mut Self::Data, slot: u32) { + mod_pr_debug!("EventManager: Released slot {}\n", slot); + data.owners[slot as usize] = None; + } +} + +/// Inner data for the event manager, to be protected by the SlotAllocator lock. +pub(crate) struct EventManagerInner { + stamps: GpuArray<Stamp>, + fw_stamps: GpuArray<FwStamp>, + // Note: Use dyn to avoid having to version this entire module. + owners: Vec<Option<Arc<dyn workqueue::WorkQueue + Send + Sync>>>, +} + +/// Top-level EventManager object. +pub(crate) struct EventManager { + alloc: slotalloc::SlotAllocator<EventInner>, +} + +impl EventManager { + /// Create a new EventManager. + #[inline(never)] + pub(crate) fn new(alloc: &mut gpu::KernelAllocators) -> Result<EventManager> { + let mut owners = Vec::new(); + for _i in 0..(NUM_EVENTS as usize) { + owners.try_push(None)?; + } + let inner = EventManagerInner { + stamps: alloc.shared.array_empty(NUM_EVENTS as usize)?, + fw_stamps: alloc.private.array_empty(NUM_EVENTS as usize)?, + owners, + }; + + Ok(EventManager { + alloc: slotalloc::SlotAllocator::new( + NUM_EVENTS, + inner, + |inner: &mut EventManagerInner, slot| EventInner { + stamp: &inner.stamps[slot as usize].0, + gpu_stamp: inner.stamps.weak_item_pointer(slot as usize), + gpu_fw_stamp: inner.fw_stamps.weak_item_pointer(slot as usize), + }, + )?, + }) + } + + /// Gets a free `Event`, optionally trying to reuse the last one allocated by this caller. + pub(crate) fn get( + &self, + token: Option<Token>, + owner: Arc<dyn workqueue::WorkQueue + Send + Sync>, + ) -> Result<Event> { + let ev = self.alloc.get_inner(token, |inner, ev| { + mod_pr_debug!( + "EventManager: Registered owner {:p} on slot {}\n", + &*owner, + ev.slot() + ); + inner.owners[ev.slot() as usize] = Some(owner); + Ok(()) + })?; + Ok(ev) + } + + /// Signals an event by slot, indicating completion (of one or more commands). + pub(crate) fn signal(&self, slot: u32) { + match self + .alloc + .with_inner(|inner| inner.owners[slot as usize].as_ref().cloned()) + { + Some(owner) => { + owner.signal(); + } + None => { + mod_pr_debug!("EventManager: Received event for empty slot {}\n", slot); + } + } + } + + /// Marks the owner of an event as having lost its work due to a GPU error. + pub(crate) fn mark_error(&self, slot: u32, wait_value: u32, error: workqueue::WorkError) { + match self + .alloc + .with_inner(|inner| inner.owners[slot as usize].as_ref().cloned()) + { + Some(owner) => { + owner.mark_error(EventValue(wait_value), error); + } + None => { + pr_err!("Received error for empty slot {}\n", slot); + } + } + } + + /// Fail all commands, used when the GPU crashes. + pub(crate) fn fail_all(&self, error: workqueue::WorkError) { + let mut owners: Vec<Arc<dyn workqueue::WorkQueue + Send + Sync>> = Vec::new(); + + self.alloc.with_inner(|inner| { + for wq in inner.owners.iter().filter_map(|o| o.as_ref()).cloned() { + if owners.try_push(wq).is_err() { + pr_err!("Failed to signal failure to WorkQueue\n"); + } + } + }); + + for wq in owners { + wq.fail_all(error); + } + } +} diff --git a/drivers/gpu/drm/asahi/file.rs b/drivers/gpu/drm/asahi/file.rs new file mode 100644 index 000000000000..5d47feb30134 --- /dev/null +++ b/drivers/gpu/drm/asahi/file.rs @@ -0,0 +1,718 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +#![allow(clippy::unusual_byte_groupings)] + +//! File implementation, which represents a single DRM client. +//! +//! This is in charge of managing the resources associated with one GPU client, including an +//! arbitrary number of submission queues and Vm objects, and reporting hardware/driver +//! information to userspace and accepting submissions. + +use crate::debug::*; +use crate::driver::AsahiDevice; +use crate::{alloc, buffer, driver, gem, mmu, queue}; +use core::mem::MaybeUninit; +use kernel::dma_fence::RawDmaFence; +use kernel::drm::gem::BaseObject; +use kernel::io_buffer::{IoBufferReader, IoBufferWriter}; +use kernel::prelude::*; +use kernel::sync::{smutex::Mutex, Arc}; +use kernel::user_ptr::UserSlicePtr; +use kernel::{bindings, dma_fence, drm, xarray}; + +const DEBUG_CLASS: DebugFlags = DebugFlags::File; + +const MAX_SYNCS_PER_SUBMISSION: u32 = 64; +const MAX_COMMANDS_PER_SUBMISSION: u32 = 64; +pub(crate) const MAX_COMMANDS_IN_FLIGHT: u32 = 1024; + +/// A client instance of an `mmu::Vm` address space. +struct Vm { + ualloc: Arc<Mutex<alloc::DefaultAllocator>>, + ualloc_priv: Arc<Mutex<alloc::DefaultAllocator>>, + vm: mmu::Vm, + dummy_obj: gem::ObjectRef, +} + +impl Drop for Vm { + fn drop(&mut self) { + // Mappings create a reference loop, make sure to break it. + self.dummy_obj.drop_vm_mappings(self.vm.id()); + } +} + +/// Sync object from userspace. +pub(crate) struct SyncItem { + pub(crate) syncobj: drm::syncobj::SyncObj, + pub(crate) fence: Option<dma_fence::Fence>, + pub(crate) chain_fence: Option<dma_fence::FenceChain>, + pub(crate) timeline_value: u64, +} + +impl SyncItem { + fn parse_one(file: &DrmFile, data: bindings::drm_asahi_sync, out: bool) -> Result<SyncItem> { + if data.extensions != 0 { + return Err(EINVAL); + } + + match data.sync_type { + bindings::drm_asahi_sync_type_DRM_ASAHI_SYNC_SYNCOBJ => { + if data.timeline_value != 0 { + return Err(EINVAL); + } + let syncobj = drm::syncobj::SyncObj::lookup_handle(file, data.handle)?; + + Ok(SyncItem { + fence: if out { + None + } else { + Some(syncobj.fence_get().ok_or(EINVAL)?) + }, + syncobj, + chain_fence: None, + timeline_value: data.timeline_value, + }) + } + bindings::drm_asahi_sync_type_DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ => { + let syncobj = drm::syncobj::SyncObj::lookup_handle(file, data.handle)?; + let fence = if out { + None + } else { + Some( + syncobj + .fence_get() + .ok_or(EINVAL)? + .chain_find_seqno(data.timeline_value)?, + ) + }; + + Ok(SyncItem { + fence, + syncobj, + chain_fence: if out { + Some(dma_fence::FenceChain::new()?) + } else { + None + }, + timeline_value: data.timeline_value, + }) + } + _ => Err(EINVAL), + } + } + + fn parse_array(file: &DrmFile, ptr: u64, count: u32, out: bool) -> Result<Vec<SyncItem>> { + let mut vec = Vec::try_with_capacity(count as usize)?; + + const STRIDE: usize = core::mem::size_of::<bindings::drm_asahi_sync>(); + let size = STRIDE * count as usize; + + // SAFETY: We only read this once, so there are no TOCTOU issues. + let mut reader = unsafe { UserSlicePtr::new(ptr as usize as *mut _, size).reader() }; + + for _i in 0..count { + let mut sync: MaybeUninit<bindings::drm_asahi_sync> = MaybeUninit::uninit(); + + // SAFETY: The size of `sync` is STRIDE + unsafe { reader.read_raw(sync.as_mut_ptr() as *mut u8, STRIDE)? }; + + // SAFETY: All bit patterns in the struct are valid + let sync = unsafe { sync.assume_init() }; + + vec.try_push(SyncItem::parse_one(file, sync, out)?)?; + } + + Ok(vec) + } +} + +/// State associated with a client. +pub(crate) struct File { + id: u64, + vms: xarray::XArray<Box<Vm>>, + queues: xarray::XArray<Arc<Mutex<Box<dyn queue::Queue>>>>, +} + +/// Convenience type alias for our DRM `File` type. +pub(crate) type DrmFile = drm::file::File<File>; + +/// Start address of the 32-bit USC address space. +const VM_SHADER_START: u64 = 0x11_00000000; +/// End address of the 32-bit USC address space. +const VM_SHADER_END: u64 = 0x11_ffffffff; +/// Start address of the general user mapping region. +const VM_USER_START: u64 = 0x20_00000000; +/// End address of the general user mapping region. +const VM_USER_END: u64 = 0x5f_ffffffff; + +/// Start address of the kernel-managed GPU-only mapping region. +const VM_DRV_GPU_START: u64 = 0x60_00000000; +/// End address of the kernel-managed GPU-only mapping region. +const VM_DRV_GPU_END: u64 = 0x60_ffffffff; +/// Start address of the kernel-managed GPU/FW shared mapping region. +const VM_DRV_GPUFW_START: u64 = 0x61_00000000; +/// End address of the kernel-managed GPU/FW shared mapping region. +const VM_DRV_GPUFW_END: u64 = 0x61_ffffffff; +/// Address of a special dummy page? +const VM_UNK_PAGE: u64 = 0x6f_ffff8000; + +impl drm::file::DriverFile for File { + type Driver = driver::AsahiDriver; + + /// Create a new `File` instance for a fresh client. + fn open(device: &AsahiDevice) -> Result<Box<Self>> { + debug::update_debug_flags(); + + let gpu = &device.data().gpu; + let id = gpu.ids().file.next(); + + mod_dev_dbg!(device, "[File {}]: DRM device opened\n", id); + Ok(Box::try_new(Self { + id, + vms: xarray::XArray::new(xarray::flags::ALLOC1)?, + queues: xarray::XArray::new(xarray::flags::ALLOC1)?, + })?) + } +} + +impl File { + /// IOCTL: get_param: Get a driver parameter value. + pub(crate) fn get_params( + device: &AsahiDevice, + data: &mut bindings::drm_asahi_get_params, + file: &DrmFile, + ) -> Result<u32> { + mod_dev_dbg!(device, "[File {}]: IOCTL: get_params\n", file.id); + + let gpu = &device.data().gpu; + + if data.extensions != 0 || data.param_group != 0 || data.pad != 0 { + return Err(EINVAL); + } + + let mut params = bindings::drm_asahi_params_global { + unstable_uabi_version: bindings::DRM_ASAHI_UNSTABLE_UABI_VERSION, + pad0: 0, + + feat_compat: gpu.get_cfg().gpu_feat_compat, + feat_incompat: gpu.get_cfg().gpu_feat_incompat, + + gpu_generation: gpu.get_dyncfg().id.gpu_gen as u32, + gpu_variant: gpu.get_dyncfg().id.gpu_variant as u32, + gpu_revision: gpu.get_dyncfg().id.gpu_rev as u32, + chip_id: gpu.get_cfg().chip_id, + + num_dies: gpu.get_dyncfg().id.max_dies, + num_clusters_total: gpu.get_dyncfg().id.num_clusters, + num_cores_per_cluster: gpu.get_dyncfg().id.num_cores, + num_frags_per_cluster: gpu.get_dyncfg().id.num_frags, + num_gps_per_cluster: gpu.get_dyncfg().id.num_gps, + num_cores_total_active: gpu.get_dyncfg().id.total_active_cores, + core_masks: [0; bindings::DRM_ASAHI_MAX_CLUSTERS as usize], + + vm_page_size: mmu::UAT_PGSZ as u32, + pad1: 0, + vm_user_start: VM_USER_START, + vm_user_end: VM_USER_END, + vm_shader_start: VM_SHADER_START, + vm_shader_end: VM_SHADER_END, + + max_syncs_per_submission: MAX_SYNCS_PER_SUBMISSION, + max_commands_per_submission: MAX_COMMANDS_PER_SUBMISSION, + max_commands_in_flight: MAX_COMMANDS_IN_FLIGHT, + max_attachments: crate::microseq::MAX_ATTACHMENTS as u32, + + timer_frequency_hz: gpu.get_cfg().base_clock_hz, + min_frequency_khz: gpu.get_dyncfg().pwr.min_frequency_khz(), + max_frequency_khz: gpu.get_dyncfg().pwr.max_frequency_khz(), + max_power_mw: gpu.get_dyncfg().pwr.max_power_mw, + + result_render_size: core::mem::size_of::<bindings::drm_asahi_result_render>() as u32, + result_compute_size: core::mem::size_of::<bindings::drm_asahi_result_compute>() as u32, + }; + + for (i, mask) in gpu.get_dyncfg().id.core_masks.iter().enumerate() { + *(params.core_masks.get_mut(i).ok_or(EIO)?) = (*mask).try_into()?; + } + + let size = + core::mem::size_of::<bindings::drm_asahi_params_global>().min(data.size.try_into()?); + + // SAFETY: We only write to this userptr once, so there are no TOCTOU issues. + let mut params_writer = + unsafe { UserSlicePtr::new(data.pointer as usize as *mut _, size).writer() }; + + // SAFETY: `size` is at most the sizeof of `params` + unsafe { params_writer.write_raw(¶ms as *const _ as *const u8, size)? }; + + Ok(0) + } + + /// IOCTL: vm_create: Create a new `Vm`. + pub(crate) fn vm_create( + device: &AsahiDevice, + data: &mut bindings::drm_asahi_vm_create, + file: &DrmFile, + ) -> Result<u32> { + if data.extensions != 0 { + return Err(EINVAL); + } + + let gpu = &device.data().gpu; + let file_id = file.id; + let vm = gpu.new_vm(file_id)?; + + let resv = file.vms.reserve()?; + let id: u32 = resv.index().try_into()?; + + mod_dev_dbg!(device, "[File {} VM {}]: VM Create\n", file_id, id); + mod_dev_dbg!( + device, + "[File {} VM {}]: Creating allocators\n", + file_id, + id + ); + let ualloc = Arc::try_new(Mutex::new(alloc::DefaultAllocator::new( + device, + &vm, + VM_DRV_GPU_START, + VM_DRV_GPU_END, + buffer::PAGE_SIZE, + mmu::PROT_GPU_SHARED_RW, + 512 * 1024, + true, + fmt!("File {} VM {} GPU Shared", file_id, id), + false, + )?))?; + let ualloc_priv = Arc::try_new(Mutex::new(alloc::DefaultAllocator::new( + device, + &vm, + VM_DRV_GPUFW_START, + VM_DRV_GPUFW_END, + buffer::PAGE_SIZE, + mmu::PROT_GPU_FW_PRIV_RW, + 64 * 1024, + true, + fmt!("File {} VM {} GPU FW Private", file_id, id), + false, + )?))?; + + mod_dev_dbg!( + device, + "[File {} VM {}]: Creating dummy object\n", + file_id, + id + ); + let mut dummy_obj = gem::new_kernel_object(device, 0x4000)?; + dummy_obj.vmap()?.as_mut_slice().fill(0); + dummy_obj.map_at(&vm, VM_UNK_PAGE, mmu::PROT_GPU_SHARED_RW, true)?; + + mod_dev_dbg!(device, "[File {} VM {}]: VM created\n", file_id, id); + resv.store(Box::try_new(Vm { + ualloc, + ualloc_priv, + vm, + dummy_obj, + })?)?; + + data.vm_id = id; + + Ok(0) + } + + /// IOCTL: vm_destroy: Destroy a `Vm`. + pub(crate) fn vm_destroy( + _device: &AsahiDevice, + data: &mut bindings::drm_asahi_vm_destroy, + file: &DrmFile, + ) -> Result<u32> { + if data.extensions != 0 { + return Err(EINVAL); + } + + if file.vms.remove(data.vm_id as usize).is_none() { + Err(ENOENT) + } else { + Ok(0) + } + } + + /// IOCTL: gem_create: Create a new GEM object. + pub(crate) fn gem_create( + device: &AsahiDevice, + data: &mut bindings::drm_asahi_gem_create, + file: &DrmFile, + ) -> Result<u32> { + mod_dev_dbg!( + device, + "[File {}]: IOCTL: gem_create size={:#x?}\n", + file.id, + data.size + ); + + if data.extensions != 0 + || (data.flags & !(bindings::ASAHI_GEM_WRITEBACK | bindings::ASAHI_GEM_VM_PRIVATE)) != 0 + || (data.flags & bindings::ASAHI_GEM_VM_PRIVATE == 0 && data.vm_id != 0) + { + return Err(EINVAL); + } + + let vm_id = if data.flags & bindings::ASAHI_GEM_VM_PRIVATE != 0 { + Some(file.vms.get(data.vm_id.try_into()?).ok_or(ENOENT)?.vm.id()) + } else { + None + }; + + let bo = gem::new_object(device, data.size.try_into()?, data.flags, vm_id)?; + + let handle = bo.gem.create_handle(file)?; + data.handle = handle; + + mod_dev_dbg!( + device, + "[File {}]: IOCTL: gem_create size={:#x} handle={:#x?}\n", + file.id, + data.size, + data.handle + ); + + Ok(0) + } + + /// IOCTL: gem_mmap_offset: Assign an mmap offset to a GEM object. + pub(crate) fn gem_mmap_offset( + device: &AsahiDevice, + data: &mut bindings::drm_asahi_gem_mmap_offset, + file: &DrmFile, + ) -> Result<u32> { + mod_dev_dbg!( + device, + "[File {}]: IOCTL: gem_mmap_offset handle={:#x?}\n", + file.id, + data.handle + ); + + if data.extensions != 0 || data.flags != 0 { + return Err(EINVAL); + } + + let bo = gem::lookup_handle(file, data.handle)?; + data.offset = bo.gem.create_mmap_offset()?; + Ok(0) + } + + /// IOCTL: gem_bind: Map or unmap a GEM object into a Vm. + pub(crate) fn gem_bind( + device: &AsahiDevice, + data: &mut bindings::drm_asahi_gem_bind, + file: &DrmFile, + ) -> Result<u32> { + mod_dev_dbg!( + device, + "[File {} VM {}]: IOCTL: gem_bind op={:?} handle={:#x?} flags={:#x?} {:#x?}:{:#x?} -> {:#x?}\n", + file.id, + data.op, + data.vm_id, + data.handle, + data.flags, + data.offset, + data.range, + data.addr + ); + + if data.extensions != 0 { + return Err(EINVAL); + } + + match data.op { + bindings::drm_asahi_bind_op_ASAHI_BIND_OP_BIND => Self::do_gem_bind(device, data, file), + bindings::drm_asahi_bind_op_ASAHI_BIND_OP_UNBIND => Err(ENOTSUPP), + bindings::drm_asahi_bind_op_ASAHI_BIND_OP_UNBIND_ALL => { + Self::do_gem_unbind_all(device, data, file) + } + _ => Err(EINVAL), + } + } + + pub(crate) fn do_gem_bind( + _device: &AsahiDevice, + data: &mut bindings::drm_asahi_gem_bind, + file: &DrmFile, + ) -> Result<u32> { + if data.offset != 0 { + return Err(EINVAL); // Not supported yet + } + + if (data.addr | data.range) as usize & mmu::UAT_PGMSK != 0 { + return Err(EINVAL); // Must be page aligned + } + + if (data.flags & !(bindings::ASAHI_BIND_READ | bindings::ASAHI_BIND_WRITE)) != 0 { + return Err(EINVAL); + } + + let mut bo = gem::lookup_handle(file, data.handle)?; + + if data.range != bo.size().try_into()? { + return Err(EINVAL); // Not supported yet + } + + let start = data.addr; + let end = data.addr + data.range - 1; + + if (VM_SHADER_START..=VM_SHADER_END).contains(&start) { + if !(VM_SHADER_START..=VM_SHADER_END).contains(&end) { + return Err(EINVAL); // Invalid map range + } + } else if (VM_USER_START..=VM_USER_END).contains(&start) { + if !(VM_USER_START..=VM_USER_END).contains(&end) { + return Err(EINVAL); // Invalid map range + } + } else { + return Err(EINVAL); // Invalid map range + } + + // Just in case + if end >= VM_DRV_GPU_START { + return Err(EINVAL); + } + + let prot = if data.flags & bindings::ASAHI_BIND_READ != 0 { + if data.flags & bindings::ASAHI_BIND_WRITE != 0 { + mmu::PROT_GPU_SHARED_RW + } else { + mmu::PROT_GPU_SHARED_RO + } + } else if data.flags & bindings::ASAHI_BIND_WRITE != 0 { + mmu::PROT_GPU_SHARED_WO + } else { + return Err(EINVAL); // Must specify one of ASAHI_BIND_{READ,WRITE} + }; + + // Clone it immediately so we aren't holding the XArray lock + let vm = file + .vms + .get(data.vm_id.try_into()?) + .ok_or(ENOENT)? + .vm + .clone(); + + bo.map_at(&vm, start, prot, true)?; + + Ok(0) + } + + pub(crate) fn do_gem_unbind_all( + _device: &AsahiDevice, + data: &mut bindings::drm_asahi_gem_bind, + file: &DrmFile, + ) -> Result<u32> { + if data.flags != 0 || data.offset != 0 || data.range != 0 || data.addr != 0 { + return Err(EINVAL); + } + + let mut bo = gem::lookup_handle(file, data.handle)?; + + if data.vm_id == 0 { + bo.drop_file_mappings(file.id); + } else { + let vm_id = file.vms.get(data.vm_id.try_into()?).ok_or(ENOENT)?.vm.id(); + bo.drop_vm_mappings(vm_id); + } + + Ok(0) + } + + /// IOCTL: queue_create: Create a new command submission queue of a given type. + pub(crate) fn queue_create( + device: &AsahiDevice, + data: &mut bindings::drm_asahi_queue_create, + file: &DrmFile, + ) -> Result<u32> { + let file_id = file.id; + + mod_dev_dbg!( + device, + "[File {} VM {}]: Creating queue caps={:?} prio={:?} flags={:#x?}\n", + file_id, + data.vm_id, + data.queue_caps, + data.priority, + data.flags, + ); + + if data.extensions != 0 + || data.flags != 0 + || data.priority > 3 + || data.queue_caps == 0 + || (data.queue_caps + & !(bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_RENDER + | bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_BLIT + | bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_COMPUTE)) + != 0 + { + return Err(EINVAL); + } + + let resv = file.queues.reserve()?; + let file_vm = file.vms.get(data.vm_id.try_into()?).ok_or(ENOENT)?; + let vm = file_vm.vm.clone(); + let ualloc = file_vm.ualloc.clone(); + let ualloc_priv = file_vm.ualloc_priv.clone(); + // Drop the vms lock eagerly + core::mem::drop(file_vm); + + let queue = + device + .data() + .gpu + .new_queue(vm, ualloc, ualloc_priv, data.priority, data.queue_caps)?; + + data.queue_id = resv.index().try_into()?; + resv.store(Arc::try_new(Mutex::new(queue))?)?; + + Ok(0) + } + + /// IOCTL: queue_destroy: Destroy a command submission queue. + pub(crate) fn queue_destroy( + _device: &AsahiDevice, + data: &mut bindings::drm_asahi_queue_destroy, + file: &DrmFile, + ) -> Result<u32> { + if data.extensions != 0 { + return Err(EINVAL); + } + + if file.queues.remove(data.queue_id as usize).is_none() { + Err(ENOENT) + } else { + Ok(0) + } + } + + /// IOCTL: submit: Submit GPU work to a command submission queue. + pub(crate) fn submit( + device: &AsahiDevice, + data: &mut bindings::drm_asahi_submit, + file: &DrmFile, + ) -> Result<u32> { + if data.extensions != 0 + || data.flags != 0 + || data.in_sync_count > MAX_SYNCS_PER_SUBMISSION + || data.out_sync_count > MAX_SYNCS_PER_SUBMISSION + || data.command_count > MAX_COMMANDS_PER_SUBMISSION + { + return Err(EINVAL); + } + + debug::update_debug_flags(); + + let gpu = &device.data().gpu; + gpu.update_globals(); + + // Upgrade to Arc<T> to drop the XArray lock early + let queue: Arc<Mutex<Box<dyn queue::Queue>>> = file + .queues + .get(data.queue_id.try_into()?) + .ok_or(ENOENT)? + .borrow() + .into(); + + let id = gpu.ids().submission.next(); + mod_dev_dbg!( + device, + "[File {} Queue {}]: IOCTL: submit (submission ID: {})\n", + file.id, + data.queue_id, + id + ); + + mod_dev_dbg!( + device, + "[File {} Queue {}]: IOCTL: submit({}): Parsing in_syncs\n", + file.id, + data.queue_id, + id + ); + let in_syncs = SyncItem::parse_array(file, data.in_syncs, data.in_sync_count, false)?; + mod_dev_dbg!( + device, + "[File {} Queue {}]: IOCTL: submit({}): Parsing out_syncs\n", + file.id, + data.queue_id, + id + ); + let out_syncs = SyncItem::parse_array(file, data.out_syncs, data.out_sync_count, true)?; + + let result_buf = if data.result_handle != 0 { + mod_dev_dbg!( + device, + "[File {} Queue {}]: IOCTL: submit({}): Looking up result_handle {}\n", + file.id, + data.queue_id, + id, + data.result_handle + ); + Some(gem::lookup_handle(file, data.result_handle)?) + } else { + None + }; + + mod_dev_dbg!( + device, + "[File {} Queue {}]: IOCTL: submit({}): Parsing commands\n", + file.id, + data.queue_id, + id + ); + let mut commands = Vec::try_with_capacity(data.command_count as usize)?; + + const STRIDE: usize = core::mem::size_of::<bindings::drm_asahi_command>(); + let size = STRIDE * data.command_count as usize; + + // SAFETY: We only read this once, so there are no TOCTOU issues. + let mut reader = + unsafe { UserSlicePtr::new(data.commands as usize as *mut _, size).reader() }; + + for _i in 0..data.command_count { + let mut cmd: MaybeUninit<bindings::drm_asahi_command> = MaybeUninit::uninit(); + + // SAFETY: The size of `sync` is STRIDE + unsafe { reader.read_raw(cmd.as_mut_ptr() as *mut u8, STRIDE)? }; + + // SAFETY: All bit patterns in the struct are valid + commands.try_push(unsafe { cmd.assume_init() })?; + } + + let ret = queue + .lock() + .submit(id, in_syncs, out_syncs, result_buf, commands); + + match ret { + Err(ERESTARTSYS) => Err(ERESTARTSYS), + Err(e) => { + dev_info!( + device, + "[File {} Queue {}]: IOCTL: submit failed! (submission ID: {} err: {:?})\n", + file.id, + data.queue_id, + id, + e + ); + Err(e) + } + Ok(_) => Ok(0), + } + } + + /// Returns the unique file ID for this `File`. + pub(crate) fn file_id(&self) -> u64 { + self.id + } +} + +impl Drop for File { + fn drop(&mut self) { + mod_pr_debug!("[File {}]: Closing...\n", self.id); + } +} diff --git a/drivers/gpu/drm/asahi/float.rs b/drivers/gpu/drm/asahi/float.rs new file mode 100644 index 000000000000..e73b4b628cf9 --- /dev/null +++ b/drivers/gpu/drm/asahi/float.rs @@ -0,0 +1,381 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Basic soft floating-point support +//! +//! The GPU firmware requires a large number of power-related configuration values, many of which +//! are IEEE 754 32-bit floating point values. These values change not only between GPU/SoC +//! variants, but also between specific hardware platforms using these SoCs, so they must be +//! derived from device tree properties. There are many redundant values computed from the same +//! inputs with simple add/sub/mul/div calculations, plus a few values that are actually specific +//! to each individual device depending on its binning and fused voltage configuration, so it +//! doesn't make sense to store the final values to be passed to the firmware in the device tree. +//! +//! Therefore, we need a way to perform floating-point calculations in the kernel. +//! +//! Using the actual FPU from kernel mode is asking for trouble, since there is no way to bound +//! the execution of FPU instructions to a controlled section of code without outright putting it +//! in its own compilation unit, which is quite painful for Rust. Since these calculations only +//! have to happen at initialization time and there is no need for performance, let's use a simple +//! software float implementation instead. +//! +//! This implementation makes no attempt to be fully IEEE754 compliant, but it's good enough and +//! gives bit-identical results to macOS in the vast majority of cases, with one or two exceptions +//! related to slightly non-compliant rounding. + +use core::ops; +use kernel::{of, prelude::*}; + +/// An IEEE754-compatible floating point number implemented in software. +#[derive(Default, Debug, Copy, Clone)] +pub(crate) struct F32(u32); + +#[derive(Default, Debug, Copy, Clone)] +struct F32U { + sign: bool, + exp: i32, + frac: i64, +} + +impl F32 { + /// Convert a raw 32-bit representation into an F32 + pub(crate) const fn from_bits(u: u32) -> F32 { + F32(u) + } + + // Convert a `f32` value into an F32 + // + // This must ONLY be used in const context. Use the `f32!{}` macro to do it safely. + #[doc(hidden)] + pub(crate) const fn from_f32(v: f32) -> F32 { + F32(unsafe { core::mem::transmute(v) }) + } + + // Convert an F32 into a `f32` value + // + // For testing only. + #[doc(hidden)] + #[cfg(test)] + pub(crate) fn to_f32(self) -> f32 { + f32::from_bits(self.0) + } + + const fn unpack(&self) -> F32U { + F32U { + sign: self.0 & (1 << 31) != 0, + exp: ((self.0 >> 23) & 0xff) as i32 - 127, + frac: (((self.0 & 0x7fffff) | 0x800000) as i64) << 9, + } + .norm() + } +} + +/// Safely construct an `F32` out of a constant floating-point value. +/// +/// This ensures that the conversion happens in const context, so no floating point operations are +/// emitted. +#[macro_export] +macro_rules! f32 { + ([$($val:expr),*]) => {{ + [$(f32!($val)),*] + }}; + ($val:expr) => {{ + const _K: $crate::float::F32 = $crate::float::F32::from_f32($val); + _K + }}; +} + +impl ops::Neg for F32 { + type Output = F32; + + fn neg(self) -> F32 { + F32(self.0 ^ (1 << 31)) + } +} + +impl ops::Add<F32> for F32 { + type Output = F32; + + fn add(self, rhs: F32) -> F32 { + self.unpack().add(rhs.unpack()).pack() + } +} + +impl ops::Sub<F32> for F32 { + type Output = F32; + + fn sub(self, rhs: F32) -> F32 { + self.unpack().add((-rhs).unpack()).pack() + } +} + +impl ops::Mul<F32> for F32 { + type Output = F32; + + fn mul(self, rhs: F32) -> F32 { + self.unpack().mul(rhs.unpack()).pack() + } +} + +impl ops::Div<F32> for F32 { + type Output = F32; + + fn div(self, rhs: F32) -> F32 { + self.unpack().div(rhs.unpack()).pack() + } +} + +macro_rules! from_ints { + ($u:ty, $i:ty) => { + impl From<$i> for F32 { + fn from(v: $i) -> F32 { + F32U::from_i64(v as i64).pack() + } + } + impl From<$u> for F32 { + fn from(v: $u) -> F32 { + F32U::from_u64(v as u64).pack() + } + } + }; +} + +from_ints!(u8, i8); +from_ints!(u16, i16); +from_ints!(u32, i32); +from_ints!(u64, i64); + +impl F32U { + const INFINITY: F32U = f32!(f32::INFINITY).unpack(); + const NEG_INFINITY: F32U = f32!(f32::NEG_INFINITY).unpack(); + + fn from_i64(v: i64) -> F32U { + F32U { + sign: v < 0, + exp: 32, + frac: v.abs(), + } + .norm() + } + + fn from_u64(mut v: u64) -> F32U { + let mut exp = 32; + if v >= (1 << 63) { + exp = 31; + v >>= 1; + } + F32U { + sign: false, + exp, + frac: v as i64, + } + .norm() + } + + fn shr(&mut self, shift: i32) { + if shift > 63 { + self.exp = 0; + self.frac = 0; + } else { + self.frac >>= shift; + } + } + + fn align(a: &mut F32U, b: &mut F32U) { + if a.exp > b.exp { + b.shr(a.exp - b.exp); + b.exp = a.exp; + } else { + a.shr(b.exp - a.exp); + a.exp = b.exp; + } + } + + fn mul(self, other: F32U) -> F32U { + F32U { + sign: self.sign != other.sign, + exp: self.exp + other.exp, + frac: ((self.frac >> 8) * (other.frac >> 8)) >> 16, + } + } + + fn div(self, other: F32U) -> F32U { + if other.frac == 0 || self.is_inf() { + if self.sign { + F32U::NEG_INFINITY + } else { + F32U::INFINITY + } + } else { + F32U { + sign: self.sign != other.sign, + exp: self.exp - other.exp, + frac: ((self.frac << 24) / (other.frac >> 8)), + } + } + } + + fn add(mut self, mut other: F32U) -> F32U { + F32U::align(&mut self, &mut other); + if self.sign == other.sign { + self.frac += other.frac; + } else { + self.frac -= other.frac; + } + if self.frac < 0 { + self.sign = !self.sign; + self.frac = -self.frac; + } + self + } + + const fn norm(mut self) -> F32U { + let lz = self.frac.leading_zeros() as i32; + if lz > 31 { + self.frac <<= lz - 31; + self.exp -= lz - 31; + } else if lz < 31 { + self.frac >>= 31 - lz; + self.exp += 31 - lz; + } + + if self.is_zero() { + return F32U { + sign: self.sign, + frac: 0, + exp: 0, + }; + } + self + } + + const fn is_zero(&self) -> bool { + self.frac == 0 || self.exp < -126 + } + + const fn is_inf(&self) -> bool { + self.exp > 127 + } + + const fn pack(mut self) -> F32 { + self = self.norm(); + if !self.is_zero() { + self.frac += 0x100; + self = self.norm(); + } + + if self.is_inf() { + if self.sign { + return f32!(f32::NEG_INFINITY); + } else { + return f32!(f32::INFINITY); + } + } else if self.is_zero() { + if self.sign { + return f32!(-0.0); + } else { + return f32!(0.0); + } + } + + F32(if self.sign { 1u32 << 31 } else { 0u32 } + | ((self.exp + 127) as u32) << 23 + | ((self.frac >> 9) & 0x7fffff) as u32) + } +} + +impl<'a> TryFrom<of::Property<'a>> for F32 { + type Error = Error; + + fn try_from(p: of::Property<'_>) -> core::result::Result<F32, Self::Error> { + let bits: u32 = p.try_into()?; + Ok(F32::from_bits(bits)) + } +} + +impl of::PropertyUnit for F32 { + const UNIT_SIZE: usize = 4; + + fn from_bytes(data: &[u8]) -> Result<Self> { + Ok(F32::from_bits(<u32 as of::PropertyUnit>::from_bytes(data)?)) + } +} + +// TODO: Make this an actual test and figure out how to make it run. +#[cfg(test)] +mod tests { + #[test] + fn test_all() { + fn add(a: f32, b: f32) { + println!( + "{} + {} = {} {}", + a, + b, + (F32::from_f32(a) + F32::from_f32(b)).to_f32(), + a + b + ); + } + fn sub(a: f32, b: f32) { + println!( + "{} - {} = {} {}", + a, + b, + (F32::from_f32(a) - F32::from_f32(b)).to_f32(), + a - b + ); + } + fn mul(a: f32, b: f32) { + println!( + "{} * {} = {} {}", + a, + b, + (F32::from_f32(a) * F32::from_f32(b)).to_f32(), + a * b + ); + } + fn div(a: f32, b: f32) { + println!( + "{} / {} = {} {}", + a, + b, + (F32::from_f32(a) / F32::from_f32(b)).to_f32(), + a / b + ); + } + + fn test(a: f32, b: f32) { + add(a, b); + sub(a, b); + mul(a, b); + div(a, b); + } + + test(1.123, 7.567); + test(1.123, 1.456); + test(7.567, 1.123); + test(1.123, -7.567); + test(1.123, -1.456); + test(7.567, -1.123); + test(-1.123, -7.567); + test(-1.123, -1.456); + test(-7.567, -1.123); + test(1000.123, 0.001); + test(1000.123, 0.0000001); + test(0.0012, 1000.123); + test(0.0000001, 1000.123); + test(0., 0.); + test(0., 1.); + test(1., 0.); + test(1., 1.); + test(2., f32::INFINITY); + test(2., f32::NEG_INFINITY); + test(f32::INFINITY, 2.); + test(f32::NEG_INFINITY, 2.); + test(f32::NEG_INFINITY, 2.); + test(f32::MAX, 2.); + test(f32::MIN, 2.); + test(f32::MIN_POSITIVE, 2.); + test(2., f32::MAX); + test(2., f32::MIN); + test(2., f32::MIN_POSITIVE); + } +} diff --git a/drivers/gpu/drm/asahi/fw/buffer.rs b/drivers/gpu/drm/asahi/fw/buffer.rs new file mode 100644 index 000000000000..a8a467879518 --- /dev/null +++ b/drivers/gpu/drm/asahi/fw/buffer.rs @@ -0,0 +1,170 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU tiled vertex buffer control firmware structures + +use super::types::*; +use super::workqueue; +use crate::{default_zeroed, no_debug, trivial_gpustruct}; +use kernel::sync::Arc; + +pub(crate) mod raw { + use super::*; + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct BlockControl { + pub(crate) total: AtomicU32, + pub(crate) wptr: AtomicU32, + pub(crate) unk: AtomicU32, + pub(crate) pad: Pad<0x34>, + } + default_zeroed!(BlockControl); + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct Counter { + pub(crate) count: AtomicU32, + __pad: Pad<0x3c>, + } + default_zeroed!(Counter); + + #[derive(Debug, Default)] + #[repr(C)] + pub(crate) struct Stats { + pub(crate) max_pages: AtomicU32, + pub(crate) max_b: AtomicU32, + pub(crate) overflow_count: AtomicU32, + pub(crate) gpu_c: AtomicU32, + pub(crate) __pad0: Pad<0x10>, + pub(crate) reset: AtomicU32, + pub(crate) __pad1: Pad<0x1c>, + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct Info<'a> { + pub(crate) gpu_counter: u32, + pub(crate) unk_4: u32, + pub(crate) last_id: i32, + pub(crate) cur_id: i32, + pub(crate) unk_10: u32, + pub(crate) gpu_counter2: u32, + pub(crate) unk_18: u32, + + #[ver(V < V13_0B4)] + pub(crate) unk_1c: u32, + + pub(crate) page_list: GpuPointer<'a, &'a [u32]>, + pub(crate) page_list_size: u32, + pub(crate) page_count: AtomicU32, + pub(crate) max_blocks: u32, + pub(crate) block_count: AtomicU32, + pub(crate) unk_38: u32, + pub(crate) block_list: GpuPointer<'a, &'a [u32]>, + pub(crate) block_ctl: GpuPointer<'a, super::BlockControl>, + pub(crate) last_page: AtomicU32, + pub(crate) gpu_page_ptr1: u32, + pub(crate) gpu_page_ptr2: u32, + pub(crate) unk_58: u32, + pub(crate) block_size: u32, + pub(crate) unk_60: U64, + pub(crate) counter: GpuPointer<'a, super::Counter>, + pub(crate) unk_70: u32, + pub(crate) unk_74: u32, + pub(crate) unk_78: u32, + pub(crate) unk_7c: u32, + pub(crate) unk_80: u32, + pub(crate) max_pages: u32, + pub(crate) max_pages_nomemless: u32, + pub(crate) unk_8c: u32, + pub(crate) unk_90: Array<0x30, u8>, + } + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct Scene<'a> { + pub(crate) pass_page_count: AtomicU32, + pub(crate) unk_4: u32, + pub(crate) unk_8: U64, + pub(crate) unk_10: U64, + pub(crate) user_buffer: GpuPointer<'a, &'a [u8]>, + pub(crate) unk_20: u32, + pub(crate) stats: GpuWeakPointer<super::Stats>, + pub(crate) total_page_count: AtomicU32, + pub(crate) unk_30: U64, // pad + pub(crate) unk_38: U64, // pad + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct InitBuffer<'a> { + pub(crate) tag: workqueue::CommandType, + pub(crate) vm_slot: u32, + pub(crate) buffer_slot: u32, + pub(crate) unk_c: u32, + pub(crate) block_count: u32, + pub(crate) buffer: GpuPointer<'a, super::Info::ver>, + pub(crate) stamp_value: EventValue, + } +} + +trivial_gpustruct!(BlockControl); +trivial_gpustruct!(Counter); +trivial_gpustruct!(Stats); + +#[versions(AGX)] +#[derive(Debug)] +pub(crate) struct Info { + pub(crate) block_ctl: GpuObject<BlockControl>, + pub(crate) counter: GpuObject<Counter>, + pub(crate) page_list: GpuArray<u32>, + pub(crate) block_list: GpuArray<u32>, +} + +#[versions(AGX)] +impl GpuStruct for Info::ver { + type Raw<'a> = raw::Info::ver<'a>; +} + +pub(crate) struct ClusterBuffers { + pub(crate) tilemaps: GpuArray<u8>, + pub(crate) meta: GpuArray<u8>, +} + +#[versions(AGX)] +pub(crate) struct Scene { + pub(crate) user_buffer: GpuArray<u8>, + pub(crate) buffer: crate::buffer::Buffer::ver, + pub(crate) tvb_heapmeta: GpuArray<u8>, + pub(crate) tvb_tilemap: GpuArray<u8>, + pub(crate) tpc: Arc<GpuArray<u8>>, + pub(crate) clustering: Option<ClusterBuffers>, + pub(crate) preempt_buf: GpuArray<u8>, + pub(crate) seq_buf: GpuArray<u64>, +} + +#[versions(AGX)] +no_debug!(Scene::ver); + +#[versions(AGX)] +impl GpuStruct for Scene::ver { + type Raw<'a> = raw::Scene<'a>; +} + +#[versions(AGX)] +pub(crate) struct InitBuffer { + pub(crate) scene: Arc<crate::buffer::Scene::ver>, +} + +#[versions(AGX)] +no_debug!(InitBuffer::ver); + +#[versions(AGX)] +impl workqueue::Command for InitBuffer::ver {} + +#[versions(AGX)] +impl GpuStruct for InitBuffer::ver { + type Raw<'a> = raw::InitBuffer::ver<'a>; +} diff --git a/drivers/gpu/drm/asahi/fw/channels.rs b/drivers/gpu/drm/asahi/fw/channels.rs new file mode 100644 index 000000000000..db5ac9a3ded5 --- /dev/null +++ b/drivers/gpu/drm/asahi/fw/channels.rs @@ -0,0 +1,385 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU communication channel firmware structures (ring buffers) + +use super::types::*; +use crate::default_zeroed; +use core::sync::atomic::Ordering; + +pub(crate) mod raw { + use super::*; + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct ChannelState<'a> { + pub(crate) read_ptr: AtomicU32, + __pad0: Pad<0x1c>, + pub(crate) write_ptr: AtomicU32, + __pad1: Pad<0xc>, + _p: PhantomData<&'a ()>, + } + default_zeroed!(<'a>, ChannelState<'a>); + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct FwCtlChannelState<'a> { + pub(crate) read_ptr: AtomicU32, + __pad0: Pad<0xc>, + pub(crate) write_ptr: AtomicU32, + __pad1: Pad<0xc>, + _p: PhantomData<&'a ()>, + } + default_zeroed!(<'a>, FwCtlChannelState<'a>); +} + +pub(crate) trait RxChannelState: GpuStruct + Debug + Default +where + for<'a> <Self as GpuStruct>::Raw<'a>: Default + Zeroed, +{ + const SUB_CHANNELS: usize; + + fn wptr(raw: &Self::Raw<'_>, index: usize) -> u32; + fn set_rptr(raw: &Self::Raw<'_>, index: usize, rptr: u32); +} + +#[derive(Debug, Default)] +pub(crate) struct ChannelState {} + +impl GpuStruct for ChannelState { + type Raw<'a> = raw::ChannelState<'a>; +} + +impl RxChannelState for ChannelState { + const SUB_CHANNELS: usize = 1; + + fn wptr(raw: &Self::Raw<'_>, _index: usize) -> u32 { + raw.write_ptr.load(Ordering::Acquire) + } + + fn set_rptr(raw: &Self::Raw<'_>, _index: usize, rptr: u32) { + raw.read_ptr.store(rptr, Ordering::Release); + } +} + +#[derive(Debug, Default)] +pub(crate) struct FwLogChannelState {} + +impl GpuStruct for FwLogChannelState { + type Raw<'a> = Array<6, raw::ChannelState<'a>>; +} + +impl RxChannelState for FwLogChannelState { + const SUB_CHANNELS: usize = 6; + + fn wptr(raw: &Self::Raw<'_>, index: usize) -> u32 { + raw[index].write_ptr.load(Ordering::Acquire) + } + + fn set_rptr(raw: &Self::Raw<'_>, index: usize, rptr: u32) { + raw[index].read_ptr.store(rptr, Ordering::Release); + } +} + +#[derive(Debug, Default)] +pub(crate) struct FwCtlChannelState {} + +impl GpuStruct for FwCtlChannelState { + type Raw<'a> = raw::FwCtlChannelState<'a>; +} + +pub(crate) trait TxChannelState: GpuStruct + Debug + Default { + fn rptr(raw: &Self::Raw<'_>) -> u32; + fn set_wptr(raw: &Self::Raw<'_>, wptr: u32); +} + +impl TxChannelState for ChannelState { + fn rptr(raw: &Self::Raw<'_>) -> u32 { + raw.read_ptr.load(Ordering::Acquire) + } + + fn set_wptr(raw: &Self::Raw<'_>, wptr: u32) { + raw.write_ptr.store(wptr, Ordering::Release); + } +} + +impl TxChannelState for FwCtlChannelState { + fn rptr(raw: &Self::Raw<'_>) -> u32 { + raw.read_ptr.load(Ordering::Acquire) + } + + fn set_wptr(raw: &Self::Raw<'_>, wptr: u32) { + raw.write_ptr.store(wptr, Ordering::Release); + } +} + +#[derive(Debug, Copy, Clone, Default)] +#[repr(u32)] +pub(crate) enum PipeType { + #[default] + Vertex = 0, + Fragment = 1, + Compute = 2, +} + +#[versions(AGX)] +#[derive(Debug, Copy, Clone, Default)] +#[repr(C)] +pub(crate) struct RunWorkQueueMsg { + pub(crate) pipe_type: PipeType, + pub(crate) work_queue: Option<GpuWeakPointer<super::workqueue::QueueInfo::ver>>, + pub(crate) wptr: u32, + pub(crate) event_slot: u32, + pub(crate) is_new: bool, + #[ver(V >= V13_2 && G >= G14)] + pub(crate) __pad: Pad<0x2b>, + #[ver(V < V13_2 || G < G14)] + pub(crate) __pad: Pad<0x1b>, +} + +#[versions(AGX)] +pub(crate) type PipeMsg = RunWorkQueueMsg::ver; + +#[versions(AGX)] +pub(crate) const DEVICECONTROL_SZ: usize = { + #[ver(V < V13_2 || G < G14)] + { + 0x2c + } + #[ver(V >= V13_2 && G >= G14)] + { + 0x3c + } +}; + +// TODO: clean up when arbitrary_enum_discriminant is stable +// https://github.com/rust-lang/rust/issues/60553 + +#[versions(AGX)] +#[derive(Debug, Copy, Clone)] +#[repr(C, u32)] +#[allow(dead_code)] +pub(crate) enum DeviceControlMsg { + Unk00(Array<DEVICECONTROL_SZ::ver, u8>), + Unk01(Array<DEVICECONTROL_SZ::ver, u8>), + Unk02(Array<DEVICECONTROL_SZ::ver, u8>), + Unk03(Array<DEVICECONTROL_SZ::ver, u8>), + Unk04(Array<DEVICECONTROL_SZ::ver, u8>), + Unk05(Array<DEVICECONTROL_SZ::ver, u8>), + Unk06(Array<DEVICECONTROL_SZ::ver, u8>), + Unk07(Array<DEVICECONTROL_SZ::ver, u8>), + Unk08(Array<DEVICECONTROL_SZ::ver, u8>), + Unk09(Array<DEVICECONTROL_SZ::ver, u8>), + Unk0a(Array<DEVICECONTROL_SZ::ver, u8>), + Unk0b(Array<DEVICECONTROL_SZ::ver, u8>), + Unk0c(Array<DEVICECONTROL_SZ::ver, u8>), + Unk0d(Array<DEVICECONTROL_SZ::ver, u8>), + Unk0e(Array<DEVICECONTROL_SZ::ver, u8>), + Unk0f(Array<DEVICECONTROL_SZ::ver, u8>), + Unk10(Array<DEVICECONTROL_SZ::ver, u8>), + Unk11(Array<DEVICECONTROL_SZ::ver, u8>), + Unk12(Array<DEVICECONTROL_SZ::ver, u8>), + Unk13(Array<DEVICECONTROL_SZ::ver, u8>), + Unk14(Array<DEVICECONTROL_SZ::ver, u8>), + Unk15(Array<DEVICECONTROL_SZ::ver, u8>), + Unk16(Array<DEVICECONTROL_SZ::ver, u8>), + DestroyContext { + unk_4: u32, + ctx_23: u8, + __pad0: Pad<3>, + unk_c: u32, + unk_10: u32, + ctx_0: u8, + ctx_1: u8, + ctx_4: u8, + __pad1: Pad<1>, + unk_18: u32, + gpu_context: Option<GpuWeakPointer<super::workqueue::GpuContextData>>, + __pad2: Pad<{ DEVICECONTROL_SZ::ver - 0x20 }>, + }, + Unk18(Array<DEVICECONTROL_SZ::ver, u8>), + Initialize(Pad<DEVICECONTROL_SZ::ver>), +} + +#[versions(AGX)] +default_zeroed!(DeviceControlMsg::ver); + +#[derive(Copy, Clone, Default, Debug)] +#[repr(C)] +#[allow(dead_code)] +pub(crate) struct FwCtlMsg { + pub(crate) addr: U64, + pub(crate) unk_8: u32, + pub(crate) slot: u32, + pub(crate) page_count: u16, + pub(crate) unk_12: u16, +} + +pub(crate) const EVENT_SZ: usize = 0x34; + +#[derive(Debug, Copy, Clone)] +#[repr(C, u32)] +#[allow(dead_code)] +pub(crate) enum EventMsg { + Fault, + Flag { + firing: [u32; 4], + unk_14: u16, + }, + Unk2(Array<EVENT_SZ, u8>), + Unk3(Array<EVENT_SZ, u8>), + Timeout { + counter: u32, + unk_8: u32, + event_slot: u32, + }, // Max discriminant: 0x4 +} + +pub(crate) const EVENT_MAX: u32 = 0x4; + +#[derive(Copy, Clone)] +#[repr(C)] +pub(crate) union RawEventMsg { + pub(crate) raw: (u32, Array<EVENT_SZ, u8>), + pub(crate) msg: EventMsg, +} + +default_zeroed!(RawEventMsg); + +#[derive(Debug, Copy, Clone, Default)] +#[repr(C)] +pub(crate) struct RawFwLogMsg { + pub(crate) msg_type: u32, + __pad0: u32, + pub(crate) msg_index: U64, + __pad1: Pad<0x28>, +} + +#[derive(Debug, Copy, Clone, Default)] +#[repr(C)] +pub(crate) struct RawFwLogPayloadMsg { + pub(crate) msg_type: u32, + pub(crate) seq_no: u32, + pub(crate) timestamp: U64, + pub(crate) msg: Array<0xc8, u8>, +} + +#[derive(Debug, Copy, Clone, Default)] +#[repr(C)] +pub(crate) struct RawKTraceMsg { + pub(crate) msg_type: u32, + pub(crate) timestamp: U64, + pub(crate) args: Array<4, U64>, + pub(crate) code: u8, + pub(crate) channel: u8, + __pad: Pad<1>, + pub(crate) thread: u8, + pub(crate) unk_flag: U64, +} + +#[versions(AGX)] +pub(crate) const STATS_SZ: usize = { + #[ver(V < V13_0B4)] + { + 0x2c + } + #[ver(V >= V13_0B4)] + { + 0x3c + } +}; + +#[versions(AGX)] +#[derive(Debug, Copy, Clone)] +#[repr(C, u32)] +#[allow(dead_code)] +pub(crate) enum StatsMsg { + Power { + // 0x00 + __pad: Pad<0x18>, + power: U64, + }, + Unk1(Array<{ STATS_SZ::ver }, u8>), + PowerOn { + // 0x02 + off_time: U64, + }, + PowerOff { + // 0x03 + on_time: U64, + }, + Utilization { + // 0x04 + timestamp: U64, + util1: u32, + util2: u32, + util3: u32, + util4: u32, + }, + Unk5(Array<{ STATS_SZ::ver }, u8>), + Unk6(Array<{ STATS_SZ::ver }, u8>), + Unk7(Array<{ STATS_SZ::ver }, u8>), + Unk8(Array<{ STATS_SZ::ver }, u8>), + AvgPower { + // 0x09 + active_cs: U64, + unk2: u32, + unk3: u32, + unk4: u32, + avg_power: u32, + }, + Temperature { + // 0x0a + __pad: Pad<0x8>, + raw_value: u32, + scale: u32, + tmin: u32, + tmax: u32, + }, + PowerState { + // 0x0b + timestamp: U64, + last_busy_ts: U64, + active: u32, + poweroff: u32, + unk1: u32, + pstate: u32, + unk2: u32, + unk3: u32, + }, + FwBusy { + // 0x0c + timestamp: U64, + busy: u32, + }, + PState { + // 0x0d + __pad: Pad<0x8>, + ps_min: u32, + unk1: u32, + ps_max: u32, + unk2: u32, + }, + TempSensor { + // 0x0e + __pad: Pad<0x4>, + sensor_id: u32, + raw_value: u32, + scale: u32, + tmin: u32, + tmax: u32, + }, // Max discriminant: 0xe +} + +#[versions(AGX)] +pub(crate) const STATS_MAX: u32 = 0xe; + +#[versions(AGX)] +#[derive(Copy, Clone)] +#[repr(C)] +pub(crate) union RawStatsMsg { + pub(crate) raw: (u32, Array<{ STATS_SZ::ver }, u8>), + pub(crate) msg: StatsMsg::ver, +} + +#[versions(AGX)] +default_zeroed!(RawStatsMsg::ver); diff --git a/drivers/gpu/drm/asahi/fw/compute.rs b/drivers/gpu/drm/asahi/fw/compute.rs new file mode 100644 index 000000000000..0dbcd77c5e3e --- /dev/null +++ b/drivers/gpu/drm/asahi/fw/compute.rs @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU compute job firmware structures + +use super::types::*; +use super::{event, job, workqueue}; +use crate::{microseq, mmu}; +use kernel::sync::Arc; + +pub(crate) mod raw { + use super::*; + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct JobParameters1<'a> { + pub(crate) preempt_buf1: GpuPointer<'a, &'a [u8]>, + pub(crate) encoder: U64, + pub(crate) preempt_buf2: GpuPointer<'a, &'a [u8]>, + pub(crate) preempt_buf3: GpuPointer<'a, &'a [u8]>, + pub(crate) preempt_buf4: GpuPointer<'a, &'a [u8]>, + pub(crate) preempt_buf5: GpuPointer<'a, &'a [u8]>, + pub(crate) pipeline_base: U64, + pub(crate) unk_38: U64, + pub(crate) unk_40: u32, + pub(crate) unk_44: u32, + pub(crate) compute_layout_addr: U64, + pub(crate) unk_50: u32, + pub(crate) unk_54: u32, + pub(crate) unk_58: u32, + pub(crate) unk_5c: u32, + pub(crate) iogpu_unk_40: u32, + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct JobParameters2<'a> { + #[ver(V >= V13_0B4)] + pub(crate) unk_0_0: u32, + pub(crate) unk_0: Array<0x24, u8>, + pub(crate) preempt_buf1: GpuPointer<'a, &'a [u8]>, + pub(crate) encoder_end: U64, + pub(crate) unk_34: Array<0x28, u8>, + #[ver(V < V13_0B4)] + pub(crate) unk_5c: u32, + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct RunCompute<'a> { + pub(crate) tag: workqueue::CommandType, + + #[ver(V >= V13_0B4)] + pub(crate) counter: U64, + + pub(crate) unk_4: u32, + pub(crate) vm_slot: u32, + pub(crate) notifier: GpuPointer<'a, event::Notifier::ver>, + pub(crate) unk_pointee: Array<0x54, u8>, + pub(crate) job_params1: JobParameters1<'a>, + pub(crate) unk_b8: Array<0x11c, u8>, + pub(crate) microsequence: GpuPointer<'a, &'a [u8]>, + pub(crate) microsequence_size: u32, + pub(crate) job_params2: JobParameters2::ver<'a>, + pub(crate) encoder_params: job::raw::EncoderParams<'a>, + pub(crate) meta: job::raw::JobMeta, + pub(crate) cur_ts: U64, + pub(crate) start_ts: Option<GpuPointer<'a, AtomicU64>>, + pub(crate) end_ts: Option<GpuPointer<'a, AtomicU64>>, + pub(crate) unk_2c0: u32, + pub(crate) unk_2c4: u32, + pub(crate) unk_2c8: u32, + pub(crate) unk_2cc: u32, + pub(crate) client_sequence: u8, + pub(crate) pad_2d1: Array<3, u8>, + pub(crate) unk_2d4: u32, + pub(crate) unk_2d8: u8, + #[ver(V >= V13_0B4)] + pub(crate) unk_ts: U64, + #[ver(V >= V13_0B4)] + pub(crate) unk_2e1: Array<0x1c, u8>, + #[ver(V >= V13_0B4)] + pub(crate) unk_flag: U32, + #[ver(V >= V13_0B4)] + pub(crate) unk_pad: Array<0x10, u8>, + } +} + +#[versions(AGX)] +#[derive(Debug)] +pub(crate) struct RunCompute { + pub(crate) notifier: Arc<GpuObject<event::Notifier::ver>>, + pub(crate) preempt_buf: GpuArray<u8>, + pub(crate) seq_buf: GpuArray<u64>, + pub(crate) micro_seq: microseq::MicroSequence, + pub(crate) vm_bind: mmu::VmBind, + pub(crate) timestamps: Arc<GpuObject<job::JobTimestamps>>, +} + +#[versions(AGX)] +impl GpuStruct for RunCompute::ver { + type Raw<'a> = raw::RunCompute::ver<'a>; +} + +#[versions(AGX)] +impl workqueue::Command for RunCompute::ver {} diff --git a/drivers/gpu/drm/asahi/fw/event.rs b/drivers/gpu/drm/asahi/fw/event.rs new file mode 100644 index 000000000000..fbf65ab6d976 --- /dev/null +++ b/drivers/gpu/drm/asahi/fw/event.rs @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU events control structures & stamps + +use super::types::*; +use crate::{default_zeroed, trivial_gpustruct}; +use core::sync::atomic::Ordering; + +pub(crate) mod raw { + use super::*; + + #[derive(Debug, Clone, Copy, Default)] + #[repr(C)] + pub(crate) struct LinkedListHead { + pub(crate) prev: Option<GpuWeakPointer<LinkedListHead>>, + pub(crate) next: Option<GpuWeakPointer<LinkedListHead>>, + } + + #[derive(Debug, Clone, Copy)] + #[repr(C)] + pub(crate) struct NotifierList { + pub(crate) list_head: LinkedListHead, + pub(crate) unkptr_10: U64, + } + default_zeroed!(NotifierList); + + #[versions(AGX)] + #[derive(Debug, Clone, Copy)] + #[repr(C)] + pub(crate) struct NotifierState { + unk_14: u32, + unk_18: U64, + unk_20: u32, + vm_slot: u32, + has_vtx: u32, + pstamp_vtx: Array<4, U64>, + has_frag: u32, + pstamp_frag: Array<4, U64>, + has_comp: u32, + pstamp_comp: Array<4, U64>, + #[ver(G >= G14 && V < V13_0B4)] + unk_98_g14_0: Array<0x14, u8>, + in_list: u32, + list_head: LinkedListHead, + #[ver(G >= G14 && V < V13_0B4)] + unk_a8_g14_0: Pad<4>, + #[ver(V >= V13_0B4)] + pub(crate) unk_buf: Array<0x8, u8>, // Init to all-ff + } + + #[versions(AGX)] + impl Default for NotifierState::ver { + fn default() -> Self { + #[allow(unused_mut)] + let mut s: Self = unsafe { core::mem::zeroed() }; + #[ver(V >= V13_0B4)] + s.unk_buf = Array::new([0xff; 0x8]); + s + } + } + + #[derive(Debug)] + #[repr(transparent)] + pub(crate) struct Threshold(AtomicU64); + default_zeroed!(Threshold); + + impl Threshold { + pub(crate) fn increment(&self) { + // We could use fetch_add, but the non-LSE atomic + // sequence Rust produces confuses the hypervisor. + let v = self.0.load(Ordering::Relaxed); + self.0.store(v + 1, Ordering::Relaxed); + } + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct Notifier<'a> { + pub(crate) threshold: GpuPointer<'a, super::Threshold>, + pub(crate) generation: AtomicU32, + pub(crate) cur_count: AtomicU32, + pub(crate) unk_10: AtomicU32, + pub(crate) state: NotifierState::ver, + } +} + +trivial_gpustruct!(Threshold); +trivial_gpustruct!(NotifierList); + +#[versions(AGX)] +#[derive(Debug)] +pub(crate) struct Notifier { + pub(crate) threshold: GpuObject<Threshold>, +} + +#[versions(AGX)] +impl GpuStruct for Notifier::ver { + type Raw<'a> = raw::Notifier::ver<'a>; +} diff --git a/drivers/gpu/drm/asahi/fw/fragment.rs b/drivers/gpu/drm/asahi/fw/fragment.rs new file mode 100644 index 000000000000..eca275efb967 --- /dev/null +++ b/drivers/gpu/drm/asahi/fw/fragment.rs @@ -0,0 +1,276 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU fragment job firmware structures + +use super::types::*; +use super::{event, job, workqueue}; +use crate::{buffer, fw, microseq, mmu}; +use kernel::sync::Arc; + +pub(crate) mod raw { + use super::*; + + #[derive(Debug, Clone, Copy)] + #[repr(C)] + pub(crate) struct ClearPipelineBinding { + pub(crate) pipeline_bind: U64, + pub(crate) address: U64, + } + + #[derive(Debug, Clone, Copy, Default)] + #[repr(C)] + pub(crate) struct StorePipelineBinding { + pub(crate) unk_0: U64, + pub(crate) unk_8: u32, + pub(crate) pipeline_bind: u32, + pub(crate) unk_10: u32, + pub(crate) address: u32, + pub(crate) unk_18: u32, + pub(crate) unk_1c_padding: u32, + } + + impl StorePipelineBinding { + pub(crate) fn new(pipeline_bind: u32, address: u32) -> StorePipelineBinding { + StorePipelineBinding { + pipeline_bind, + address, + ..Default::default() + } + } + } + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct ArrayAddr { + pub(crate) ptr: U64, + pub(crate) unk_padding: U64, + } + + #[versions(AGX)] + #[derive(Debug, Clone, Copy)] + #[repr(C)] + pub(crate) struct AuxFBInfo { + pub(crate) iogpu_unk_214: u32, + pub(crate) unk2: u32, + pub(crate) width: u32, + pub(crate) height: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk3: U64, + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct JobParameters1<'a> { + pub(crate) utile_config: u32, + pub(crate) unk_4: u32, + pub(crate) clear_pipeline: ClearPipelineBinding, + pub(crate) ppp_multisamplectl: U64, + pub(crate) scissor_array: U64, + pub(crate) depth_bias_array: U64, + pub(crate) aux_fb_info: AuxFBInfo::ver, + pub(crate) depth_dimensions: U64, + pub(crate) visibility_result_buffer: U64, + pub(crate) zls_ctrl: U64, + + #[ver(G >= G14)] + pub(crate) unk_58_g14_0: U64, + #[ver(G >= G14)] + pub(crate) unk_58_g14_8: U64, + + pub(crate) depth_buffer_ptr1: U64, + pub(crate) depth_buffer_ptr2: U64, + pub(crate) stencil_buffer_ptr1: U64, + pub(crate) stencil_buffer_ptr2: U64, + + #[ver(G >= G14)] + pub(crate) unk_68_g14_0: Array<0x20, u8>, + + pub(crate) unk_78: Array<0x4, U64>, + pub(crate) depth_meta_buffer_ptr1: U64, + pub(crate) unk_a0: U64, + pub(crate) depth_meta_buffer_ptr2: U64, + pub(crate) unk_b0: U64, + pub(crate) stencil_meta_buffer_ptr1: U64, + pub(crate) unk_c0: U64, + pub(crate) stencil_meta_buffer_ptr2: U64, + pub(crate) unk_d0: U64, + pub(crate) tvb_tilemap: GpuPointer<'a, &'a [u8]>, + pub(crate) tvb_heapmeta: GpuPointer<'a, &'a [u8]>, + pub(crate) mtile_stride_dwords: U64, + pub(crate) tvb_heapmeta_2: GpuPointer<'a, &'a [u8]>, + pub(crate) tile_config: U64, + pub(crate) aux_fb: GpuPointer<'a, &'a [u8]>, + pub(crate) unk_108: Array<0x6, U64>, + pub(crate) pipeline_base: U64, + pub(crate) unk_140: U64, + pub(crate) unk_148: U64, + pub(crate) unk_150: U64, + pub(crate) unk_158: U64, + pub(crate) unk_160: U64, + + #[ver(G < G14)] + pub(crate) unk_168_padding: Array<0x1d8, u8>, + #[ver(G >= G14)] + pub(crate) unk_168_padding: Array<0x1a8, u8>, + #[ver(V < V13_0B4)] + pub(crate) __pad0: Pad<0x8>, + } + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct JobParameters2 { + pub(crate) store_pipeline_bind: u32, + pub(crate) store_pipeline_addr: u32, + pub(crate) unk_8: u32, + pub(crate) unk_c: u32, + pub(crate) merge_upper_x: F32, + pub(crate) merge_upper_y: F32, + pub(crate) unk_18: U64, + pub(crate) utiles_per_mtile_y: u16, + pub(crate) utiles_per_mtile_x: u16, + pub(crate) unk_24: u32, + pub(crate) tile_counts: u32, + pub(crate) iogpu_unk_212: u32, + pub(crate) isp_bgobjdepth: u32, + pub(crate) isp_bgobjvals: u32, + pub(crate) unk_38: u32, + pub(crate) unk_3c: u32, + pub(crate) unk_40: u32, + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct JobParameters3 { + pub(crate) unk_44_padding: Array<0xac, u8>, + pub(crate) depth_bias_array: ArrayAddr, + pub(crate) scissor_array: ArrayAddr, + pub(crate) visibility_result_buffer: U64, + pub(crate) unk_118: U64, + pub(crate) unk_120: Array<0x25, U64>, + pub(crate) unk_reload_pipeline: ClearPipelineBinding, + pub(crate) unk_258: U64, + pub(crate) unk_260: U64, + pub(crate) unk_268: U64, + pub(crate) unk_270: U64, + pub(crate) reload_pipeline: ClearPipelineBinding, + pub(crate) zls_ctrl: U64, + pub(crate) unk_290: U64, + pub(crate) depth_buffer_ptr1: U64, + pub(crate) unk_2a0: U64, + pub(crate) unk_2a8: U64, + pub(crate) depth_buffer_ptr2: U64, + pub(crate) depth_buffer_ptr3: U64, + pub(crate) depth_meta_buffer_ptr3: U64, + pub(crate) stencil_buffer_ptr1: U64, + pub(crate) unk_2d0: U64, + pub(crate) unk_2d8: U64, + pub(crate) stencil_buffer_ptr2: U64, + pub(crate) stencil_buffer_ptr3: U64, + pub(crate) stencil_meta_buffer_ptr3: U64, + pub(crate) unk_2f8: Array<2, U64>, + pub(crate) iogpu_unk_212: u32, + pub(crate) unk_30c: u32, + pub(crate) aux_fb_info: AuxFBInfo::ver, + pub(crate) unk_320_padding: Array<0x10, u8>, + pub(crate) unk_partial_store_pipeline: StorePipelineBinding, + pub(crate) partial_store_pipeline: StorePipelineBinding, + pub(crate) isp_bgobjdepth: u32, + pub(crate) isp_bgobjvals: u32, + pub(crate) iogpu_unk_49: u32, + pub(crate) unk_37c: u32, + pub(crate) unk_380: U64, + pub(crate) unk_388: U64, + + #[ver(V >= V13_0B4)] + pub(crate) unk_390_0: U64, + + pub(crate) depth_dimensions: U64, + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct RunFragment<'a> { + pub(crate) tag: workqueue::CommandType, + + #[ver(V >= V13_0B4)] + pub(crate) counter: U64, + + pub(crate) vm_slot: u32, + pub(crate) unk_8: u32, + pub(crate) microsequence: GpuPointer<'a, &'a [u8]>, + pub(crate) microsequence_size: u32, + pub(crate) notifier: GpuPointer<'a, event::Notifier::ver>, + pub(crate) buffer: GpuPointer<'a, fw::buffer::Info::ver>, + pub(crate) scene: GpuPointer<'a, fw::buffer::Scene::ver>, + pub(crate) unk_buffer_buf: GpuWeakPointer<[u8]>, + pub(crate) tvb_tilemap: GpuPointer<'a, &'a [u8]>, + pub(crate) ppp_multisamplectl: U64, + pub(crate) samples: u32, + pub(crate) tiles_per_mtile_y: u16, + pub(crate) tiles_per_mtile_x: u16, + pub(crate) unk_50: U64, + pub(crate) unk_58: U64, + pub(crate) merge_upper_x: F32, + pub(crate) merge_upper_y: F32, + pub(crate) unk_68: U64, + pub(crate) tile_count: U64, + pub(crate) job_params1: JobParameters1::ver<'a>, + pub(crate) job_params2: JobParameters2, + pub(crate) job_params3: JobParameters3::ver, + pub(crate) unk_758_flag: u32, + pub(crate) unk_75c_flag: u32, + pub(crate) unk_buf: Array<0x110, u8>, + pub(crate) busy_flag: u32, + pub(crate) tvb_overflow_count: u32, + pub(crate) unk_878: u32, + pub(crate) encoder_params: job::raw::EncoderParams<'a>, + pub(crate) process_empty_tiles: u32, + pub(crate) no_clear_pipeline_textures: u32, + pub(crate) unk_param: u32, + pub(crate) unk_pointee: u32, + pub(crate) meta: job::raw::JobMeta, + pub(crate) unk_after_meta: u32, + pub(crate) unk_buf_0: U64, + pub(crate) unk_buf_8: U64, + pub(crate) unk_buf_10: U64, + pub(crate) cur_ts: U64, + pub(crate) start_ts: Option<GpuPointer<'a, AtomicU64>>, + pub(crate) end_ts: Option<GpuPointer<'a, AtomicU64>>, + pub(crate) unk_914: u32, + pub(crate) unk_918: U64, + pub(crate) unk_920: u32, + pub(crate) client_sequence: u8, + pub(crate) pad_925: Array<3, u8>, + pub(crate) unk_928: u32, + pub(crate) unk_92c: u8, + + #[ver(V >= V13_0B4)] + pub(crate) unk_ts: U64, + + #[ver(V >= V13_0B4)] + pub(crate) unk_92d_8: Array<0x1b, u8>, + } +} + +#[versions(AGX)] +#[derive(Debug)] +pub(crate) struct RunFragment { + pub(crate) notifier: Arc<GpuObject<event::Notifier::ver>>, + pub(crate) scene: Arc<buffer::Scene::ver>, + pub(crate) micro_seq: microseq::MicroSequence, + pub(crate) vm_bind: mmu::VmBind, + pub(crate) aux_fb: GpuArray<u8>, + pub(crate) timestamps: Arc<GpuObject<job::RenderTimestamps>>, +} + +#[versions(AGX)] +impl GpuStruct for RunFragment::ver { + type Raw<'a> = raw::RunFragment::ver<'a>; +} + +#[versions(AGX)] +impl workqueue::Command for RunFragment::ver {} diff --git a/drivers/gpu/drm/asahi/fw/initdata.rs b/drivers/gpu/drm/asahi/fw/initdata.rs new file mode 100644 index 000000000000..44de0c1cccf3 --- /dev/null +++ b/drivers/gpu/drm/asahi/fw/initdata.rs @@ -0,0 +1,1264 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU initialization / global structures + +use super::channels; +use super::types::*; +use crate::{default_zeroed, no_debug, trivial_gpustruct}; + +pub(crate) mod raw { + use super::*; + + #[derive(Debug, Default)] + #[repr(C)] + pub(crate) struct ChannelRing<T: GpuStruct + Debug + Default, U: Copy> { + pub(crate) state: Option<GpuWeakPointer<T>>, + pub(crate) ring: Option<GpuWeakPointer<[U]>>, + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct PipeChannels { + pub(crate) vtx: ChannelRing<channels::ChannelState, channels::PipeMsg::ver>, + pub(crate) frag: ChannelRing<channels::ChannelState, channels::PipeMsg::ver>, + pub(crate) comp: ChannelRing<channels::ChannelState, channels::PipeMsg::ver>, + } + #[versions(AGX)] + default_zeroed!(PipeChannels::ver); + + #[derive(Debug, Default)] + #[repr(C)] + pub(crate) struct FwStatusFlags { + pub(crate) halt_count: AtomicU32, + __pad0: Pad<0xc>, + pub(crate) halted: AtomicU32, + __pad1: Pad<0xc>, + pub(crate) resume: AtomicU32, + __pad2: Pad<0xc>, + pub(crate) unk_40: u32, + __pad3: Pad<0xc>, + pub(crate) unk_ctr: u32, + __pad4: Pad<0xc>, + pub(crate) unk_60: u32, + __pad5: Pad<0xc>, + pub(crate) unk_70: u32, + __pad6: Pad<0xc>, + } + + #[derive(Debug, Default)] + #[repr(C)] + pub(crate) struct FwStatus { + pub(crate) fwctl_channel: ChannelRing<channels::FwCtlChannelState, channels::FwCtlMsg>, + pub(crate) flags: FwStatusFlags, + } + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct HwDataShared1 { + pub(crate) table: Array<16, i32>, + pub(crate) unk_44: Array<0x60, u8>, + pub(crate) unk_a4: u32, + pub(crate) unk_a8: u32, + } + default_zeroed!(HwDataShared1); + + #[derive(Debug, Default)] + #[repr(C)] + pub(crate) struct HwDataShared2Curve { + pub(crate) unk_0: u32, + pub(crate) unk_4: u32, + pub(crate) t1: Array<16, i16>, + pub(crate) t2: Array<16, i16>, + pub(crate) t3: Array<8, Array<16, i32>>, + } + + #[derive(Debug, Default)] + #[repr(C)] + pub(crate) struct HwDataShared2T8112 { + pub(crate) unk_0: Array<5, u32>, + pub(crate) unk_14: u32, + pub(crate) unk_18: Array<8, u32>, + pub(crate) curve1: HwDataShared2Curve, + pub(crate) curve2: HwDataShared2Curve, + } + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct HwDataShared2 { + pub(crate) table: Array<10, i32>, + pub(crate) unk_28: Array<0x10, u8>, + pub(crate) t8112: HwDataShared2T8112, + pub(crate) unk_500: u32, + pub(crate) unk_504: u32, + pub(crate) unk_508: u32, + pub(crate) unk_50c: u32, + pub(crate) unk_510: u32, + } + default_zeroed!(HwDataShared2); + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct HwDataShared3 { + pub(crate) unk_0: u32, + pub(crate) unk_4: u32, + pub(crate) unk_8: u32, + pub(crate) table: Array<16, u32>, + pub(crate) unk_4c: u32, + } + default_zeroed!(HwDataShared3); + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct HwDataA130Extra { + pub(crate) unk_0: Array<0x38, u8>, + pub(crate) unk_38: u32, + pub(crate) unk_3c: u32, + pub(crate) unk_40: u32, + pub(crate) unk_44: u32, + pub(crate) unk_48: u32, + pub(crate) unk_4c: u32, + pub(crate) unk_50: u32, + pub(crate) unk_54: u32, + pub(crate) unk_58: u32, + pub(crate) unk_5c: u32, + pub(crate) unk_60: F32, + pub(crate) unk_64: F32, + pub(crate) unk_68: F32, + pub(crate) unk_6c: F32, + pub(crate) unk_70: F32, + pub(crate) unk_74: F32, + pub(crate) unk_78: F32, + pub(crate) unk_7c: F32, + pub(crate) unk_80: F32, + pub(crate) unk_84: F32, + pub(crate) unk_88: u32, + pub(crate) unk_8c: u32, + pub(crate) max_pstate_scaled_1: u32, + pub(crate) unk_94: u32, + pub(crate) unk_98: u32, + pub(crate) unk_9c: F32, + pub(crate) unk_a0: u32, + pub(crate) unk_a4: u32, + pub(crate) unk_a8: u32, + pub(crate) unk_ac: u32, + pub(crate) unk_b0: u32, + pub(crate) unk_b4: u32, + pub(crate) unk_b8: u32, + pub(crate) unk_bc: u32, + pub(crate) unk_c0: u32, + pub(crate) unk_c4: F32, + pub(crate) unk_c8: Array<0x4c, u8>, + pub(crate) unk_114: F32, + pub(crate) unk_118: u32, + pub(crate) unk_11c: u32, + pub(crate) unk_120: u32, + pub(crate) unk_124: u32, + pub(crate) max_pstate_scaled_2: u32, + pub(crate) unk_12c: Array<0x8c, u8>, + } + default_zeroed!(HwDataA130Extra); + + #[derive(Default)] + #[repr(C)] + pub(crate) struct T81xxData { + pub(crate) unk_d8c: u32, + pub(crate) unk_d90: u32, + pub(crate) unk_d94: u32, + pub(crate) unk_d98: u32, + pub(crate) unk_d9c: F32, + pub(crate) unk_da0: u32, + pub(crate) unk_da4: F32, + pub(crate) unk_da8: u32, + pub(crate) unk_dac: F32, + pub(crate) unk_db0: u32, + pub(crate) unk_db4: u32, + pub(crate) unk_db8: F32, + pub(crate) unk_dbc: F32, + pub(crate) unk_dc0: u32, + pub(crate) unk_dc4: u32, + pub(crate) unk_dc8: u32, + pub(crate) max_pstate_scaled: u32, + } + + #[versions(AGX)] + #[derive(Default, Copy, Clone)] + #[repr(C)] + pub(crate) struct PowerZone { + pub(crate) val: F32, + pub(crate) target: u32, + pub(crate) target_off: u32, + pub(crate) filter_tc_x4: u32, + pub(crate) filter_tc_xperiod: u32, + #[ver(V >= V13_0B4)] + pub(crate) unk_10: u32, + #[ver(V >= V13_0B4)] + pub(crate) unk_14: u32, + pub(crate) filter_a_neg: F32, + pub(crate) filter_a: F32, + pub(crate) pad: u32, + } + + #[versions(AGX)] + #[repr(C)] + pub(crate) struct HwDataA { + pub(crate) unk_0: u32, + pub(crate) clocks_per_period: u32, + + #[ver(V >= V13_0B4)] + pub(crate) clocks_per_period_2: u32, + + pub(crate) unk_8: u32, + pub(crate) pwr_status: AtomicU32, + pub(crate) unk_10: F32, + pub(crate) unk_14: u32, + pub(crate) unk_18: u32, + pub(crate) unk_1c: u32, + pub(crate) unk_20: u32, + pub(crate) unk_24: u32, + pub(crate) actual_pstate: u32, + pub(crate) tgt_pstate: u32, + pub(crate) unk_30: u32, + pub(crate) cur_pstate: u32, + pub(crate) unk_38: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_3c_0: u32, + + pub(crate) base_pstate_scaled: u32, + pub(crate) unk_40: u32, + pub(crate) max_pstate_scaled: u32, + pub(crate) unk_48: u32, + pub(crate) min_pstate_scaled: u32, + pub(crate) freq_mhz: F32, + pub(crate) unk_54: Array<0x20, u8>, + + #[ver(V >= V13_0B4)] + pub(crate) unk_74_0: u32, + + pub(crate) sram_k: Array<0x10, F32>, + pub(crate) unk_b4: Array<0x100, u8>, + pub(crate) unk_1b4: u32, + pub(crate) temp_c: u32, + pub(crate) avg_power_mw: u32, + pub(crate) update_ts: U64, + pub(crate) unk_1c8: u32, + pub(crate) unk_1cc: Array<0x478, u8>, + pub(crate) pad_644: Pad<0x8>, + pub(crate) unk_64c: u32, + pub(crate) unk_650: u32, + pub(crate) pad_654: u32, + pub(crate) pwr_filter_a_neg: F32, + pub(crate) pad_65c: u32, + pub(crate) pwr_filter_a: F32, + pub(crate) pad_664: u32, + pub(crate) pwr_integral_gain: F32, + pub(crate) pad_66c: u32, + pub(crate) pwr_integral_min_clamp: F32, + pub(crate) max_power_1: F32, + pub(crate) pwr_proportional_gain: F32, + pub(crate) pad_67c: u32, + pub(crate) pwr_pstate_related_k: F32, + pub(crate) pwr_pstate_max_dc_offset: i32, + pub(crate) unk_688: u32, + pub(crate) max_pstate_scaled_2: u32, + pub(crate) pad_690: u32, + pub(crate) unk_694: u32, + pub(crate) max_power_2: u32, + pub(crate) pad_69c: Pad<0x18>, + pub(crate) unk_6b4: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_6b8_0: Array<0x10, u8>, + + pub(crate) max_pstate_scaled_3: u32, + pub(crate) unk_6bc: u32, + pub(crate) pad_6c0: Pad<0x14>, + pub(crate) ppm_filter_tc_periods_x4: u32, + pub(crate) unk_6d8: u32, + pub(crate) pad_6dc: u32, + pub(crate) ppm_filter_a_neg: F32, + pub(crate) pad_6e4: u32, + pub(crate) ppm_filter_a: F32, + pub(crate) pad_6ec: u32, + pub(crate) ppm_ki_dt: F32, + pub(crate) pad_6f4: u32, + pub(crate) pwr_integral_min_clamp_2: u32, + pub(crate) unk_6fc: F32, + pub(crate) ppm_kp: F32, + pub(crate) pad_704: u32, + pub(crate) unk_708: u32, + pub(crate) pwr_min_duty_cycle: u32, + pub(crate) max_pstate_scaled_4: u32, + pub(crate) unk_714: u32, + pub(crate) pad_718: u32, + pub(crate) unk_71c: F32, + pub(crate) max_power_3: u32, + pub(crate) cur_power_mw_2: u32, + pub(crate) ppm_filter_tc_ms: u32, + pub(crate) unk_72c: u32, + + #[ver(V >= V13_0B4)] + pub(crate) ppm_filter_tc_clks: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_730_4: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_730_8: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_730_c: u32, + + pub(crate) unk_730: F32, + pub(crate) unk_734: u32, + pub(crate) unk_738: u32, + pub(crate) unk_73c: u32, + pub(crate) unk_740: u32, + pub(crate) unk_744: u32, + pub(crate) unk_748: Array<0x4, F32>, + pub(crate) unk_758: u32, + pub(crate) perf_tgt_utilization: u32, + pub(crate) pad_760: u32, + pub(crate) perf_boost_min_util: u32, + pub(crate) perf_boost_ce_step: u32, + pub(crate) perf_reset_iters: u32, + pub(crate) pad_770: u32, + pub(crate) unk_774: u32, + pub(crate) unk_778: u32, + pub(crate) perf_filter_drop_threshold: u32, + pub(crate) perf_filter_a_neg: F32, + pub(crate) perf_filter_a2_neg: F32, + pub(crate) perf_filter_a: F32, + pub(crate) perf_filter_a2: F32, + pub(crate) perf_ki: F32, + pub(crate) perf_ki2: F32, + pub(crate) perf_integral_min_clamp: F32, + pub(crate) unk_79c: F32, + pub(crate) perf_kp: F32, + pub(crate) perf_kp2: F32, + pub(crate) boost_state_unk_k: F32, + pub(crate) base_pstate_scaled_2: u32, + pub(crate) max_pstate_scaled_5: u32, + pub(crate) base_pstate_scaled_3: u32, + pub(crate) pad_7b8: u32, + pub(crate) perf_cur_utilization: F32, + pub(crate) perf_tgt_utilization_2: u32, + pub(crate) pad_7c4: Pad<0x18>, + pub(crate) unk_7dc: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_7e0_0: Array<0x10, u8>, + + pub(crate) base_pstate_scaled_4: u32, + pub(crate) pad_7e4: u32, + pub(crate) unk_7e8: Array<0x14, u8>, + pub(crate) unk_7fc: F32, + pub(crate) pwr_min_duty_cycle_2: F32, + pub(crate) max_pstate_scaled_6: F32, + pub(crate) max_freq_mhz: u32, + pub(crate) pad_80c: u32, + pub(crate) unk_810: u32, + pub(crate) pad_814: u32, + pub(crate) pwr_min_duty_cycle_3: u32, + pub(crate) unk_81c: u32, + pub(crate) pad_820: u32, + pub(crate) min_pstate_scaled_4: F32, + pub(crate) max_pstate_scaled_7: u32, + pub(crate) unk_82c: u32, + pub(crate) unk_alpha_neg: F32, + pub(crate) unk_alpha: F32, + pub(crate) unk_838: u32, + pub(crate) unk_83c: u32, + pub(crate) pad_840: Pad<0x2c>, + pub(crate) unk_86c: u32, + pub(crate) fast_die0_sensor_mask: U64, + pub(crate) fast_die0_release_temp_cc: u32, + pub(crate) unk_87c: i32, + pub(crate) unk_880: u32, + pub(crate) unk_884: u32, + pub(crate) pad_888: u32, + pub(crate) unk_88c: u32, + pub(crate) pad_890: u32, + pub(crate) unk_894: F32, + pub(crate) pad_898: u32, + pub(crate) fast_die0_ki_dt: F32, + pub(crate) pad_8a0: u32, + pub(crate) unk_8a4: u32, + pub(crate) unk_8a8: F32, + pub(crate) fast_die0_kp: F32, + pub(crate) pad_8b0: u32, + pub(crate) unk_8b4: u32, + pub(crate) pwr_min_duty_cycle_4: u32, + pub(crate) max_pstate_scaled_8: u32, + pub(crate) max_pstate_scaled_9: u32, + pub(crate) fast_die0_prop_tgt_delta: u32, + pub(crate) unk_8c8: u32, + pub(crate) unk_8cc: u32, + pub(crate) pad_8d0: Pad<0x14>, + + #[ver(V >= V13_0B4)] + pub(crate) unk_8e4_0: Array<0x10, u8>, + + pub(crate) unk_8e4: u32, + pub(crate) unk_8e8: u32, + pub(crate) max_pstate_scaled_10: u32, + pub(crate) unk_8f0: u32, + pub(crate) unk_8f4: u32, + pub(crate) pad_8f8: u32, + pub(crate) pad_8fc: u32, + pub(crate) unk_900: Array<0x24, u8>, + pub(crate) unk_coef_a1: Array<8, Array<8, F32>>, + pub(crate) unk_coef_a2: Array<8, Array<8, F32>>, + pub(crate) pad_b24: Pad<0x70>, + pub(crate) max_pstate_scaled_11: u32, + pub(crate) freq_with_off: u32, + pub(crate) unk_b9c: u32, + pub(crate) unk_ba0: U64, + pub(crate) unk_ba8: U64, + pub(crate) unk_bb0: u32, + pub(crate) unk_bb4: u32, + pub(crate) pad_bb8: Pad<0x74>, + pub(crate) unk_c2c: u32, + pub(crate) power_zone_count: u32, + pub(crate) max_power_4: u32, + pub(crate) max_power_5: u32, + pub(crate) max_power_6: u32, + pub(crate) unk_c40: u32, + pub(crate) unk_c44: F32, + pub(crate) avg_power_target_filter_a_neg: F32, + pub(crate) avg_power_target_filter_a: F32, + pub(crate) avg_power_target_filter_tc_x4: u32, + pub(crate) avg_power_target_filter_tc_xperiod: u32, + + #[ver(V >= V13_0B4)] + pub(crate) avg_power_target_filter_tc_clks: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_c58_4: u32, + + pub(crate) power_zones: Array<5, PowerZone::ver>, + pub(crate) avg_power_filter_tc_periods_x4: u32, + pub(crate) unk_cfc: u32, + pub(crate) unk_d00: u32, + pub(crate) avg_power_filter_a_neg: F32, + pub(crate) unk_d08: u32, + pub(crate) avg_power_filter_a: F32, + pub(crate) unk_d10: u32, + pub(crate) avg_power_ki_dt: F32, + pub(crate) unk_d18: u32, + pub(crate) unk_d1c: u32, + pub(crate) unk_d20: F32, + pub(crate) avg_power_kp: F32, + pub(crate) unk_d28: u32, + pub(crate) unk_d2c: u32, + pub(crate) avg_power_min_duty_cycle: u32, + pub(crate) max_pstate_scaled_12: u32, + pub(crate) max_pstate_scaled_13: u32, + pub(crate) unk_d3c: u32, + pub(crate) max_power_7: F32, + pub(crate) max_power_8: u32, + pub(crate) unk_d48: u32, + pub(crate) avg_power_filter_tc_ms: u32, + pub(crate) unk_d50: u32, + + #[ver(V >= V13_0B4)] + pub(crate) avg_power_filter_tc_clks: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_d54_4: Array<0xc, u8>, + + pub(crate) unk_d54: Array<0x10, u8>, + pub(crate) max_pstate_scaled_14: u32, + pub(crate) unk_d68: Array<0x24, u8>, + + pub(crate) t81xx_data: T81xxData, + + pub(crate) unk_dd0: Array<0x40, u8>, + + #[ver(V >= V13_2)] + pub(crate) unk_e10_pad: Array<0x10, u8>, + + #[ver(V >= V13_0B4)] + pub(crate) unk_e10_0: HwDataA130Extra, + + pub(crate) unk_e10: Array<0xc, u8>, + pub(crate) fast_die0_sensor_mask_2: U64, + pub(crate) unk_e24: u32, + pub(crate) unk_e28: u32, + pub(crate) unk_e2c: Pad<0x1c>, + pub(crate) unk_coef_b1: Array<8, Array<8, F32>>, + pub(crate) unk_coef_b2: Array<8, Array<8, F32>>, + pub(crate) pad_1048: Pad<0x5e4>, + pub(crate) fast_die0_sensor_mask_alt: U64, + #[ver(V < V13_0B4)] + pub(crate) fast_die0_sensor_present: U64, + + pub(crate) unk_163c: u32, + + pub(crate) unk_1640: Array<0x2000, u8>, + pub(crate) unk_3640: u32, + pub(crate) unk_3644: u32, + pub(crate) hws1: HwDataShared1, + + #[ver(V >= V13_0B4)] + pub(crate) unk_pad1: Pad<0x20>, + + pub(crate) hws2: HwDataShared2, + pub(crate) unk_3c04: u32, + pub(crate) hws3: HwDataShared3, + pub(crate) unk_3c58: Array<0x3c, u8>, + pub(crate) unk_3c94: u32, + pub(crate) unk_3c98: U64, + pub(crate) unk_3ca0: U64, + pub(crate) unk_3ca8: U64, + pub(crate) unk_3cb0: U64, + pub(crate) ts_last_idle: U64, + pub(crate) ts_last_poweron: U64, + pub(crate) ts_last_poweroff: U64, + pub(crate) unk_3cd0: U64, + pub(crate) unk_3cd8: U64, + + #[ver(V >= V13_0B4)] + pub(crate) unk_3ce0_0: u32, + + pub(crate) unk_3ce0: u32, + pub(crate) unk_3ce4: u32, + pub(crate) unk_3ce8: u32, + pub(crate) unk_3cec: u32, + pub(crate) unk_3cf0: u32, + pub(crate) core_leak_coef: Array<8, F32>, + pub(crate) sram_leak_coef: Array<8, F32>, + pub(crate) unk_3d34: Array<0x38, u8>, + + #[ver(V >= V13_0B4)] + pub(crate) unk_3d6c: Array<0x38, u8>, + } + #[versions(AGX)] + default_zeroed!(HwDataA::ver); + #[versions(AGX)] + no_debug!(HwDataA::ver); + + #[derive(Debug, Default, Clone, Copy)] + #[repr(C)] + pub(crate) struct IOMapping { + pub(crate) phys_addr: U64, + pub(crate) virt_addr: U64, + pub(crate) size: u32, + pub(crate) range_size: u32, + pub(crate) readwrite: U64, + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct HwDataB { + #[ver(V < V13_0B4)] + pub(crate) unk_0: U64, + + pub(crate) unk_8: U64, + + #[ver(V < V13_0B4)] + pub(crate) unk_10: U64, + + pub(crate) unk_18: U64, + pub(crate) unk_20: U64, + pub(crate) unk_28: U64, + pub(crate) unk_30: U64, + pub(crate) unkptr_38: U64, + pub(crate) pad_40: Pad<0x20>, + + #[ver(V < V13_0B4)] + pub(crate) yuv_matrices: Array<0xf, Array<3, Array<4, i16>>>, + + #[ver(V >= V13_0B4)] + pub(crate) yuv_matrices: Array<0x3f, Array<3, Array<4, i16>>>, + + pub(crate) pad_1c8: Pad<0x8>, + pub(crate) io_mappings: Array<0x14, IOMapping>, + + #[ver(V >= V13_0B4)] + pub(crate) unk_450_0: Array<0x68, u8>, + + pub(crate) chip_id: u32, + pub(crate) unk_454: u32, + pub(crate) unk_458: u32, + pub(crate) unk_45c: u32, + pub(crate) unk_460: u32, + pub(crate) unk_464: u32, + pub(crate) unk_468: u32, + pub(crate) unk_46c: u32, + pub(crate) unk_470: u32, + pub(crate) unk_474: u32, + pub(crate) unk_478: u32, + pub(crate) unk_47c: u32, + pub(crate) unk_480: u32, + pub(crate) unk_484: u32, + pub(crate) unk_488: u32, + pub(crate) unk_48c: u32, + pub(crate) base_clock_khz: u32, + pub(crate) power_sample_period: u32, + pub(crate) pad_498: Pad<0x4>, + pub(crate) unk_49c: u32, + pub(crate) unk_4a0: u32, + pub(crate) unk_4a4: u32, + pub(crate) pad_4a8: Pad<0x4>, + pub(crate) unk_4ac: u32, + pub(crate) pad_4b0: Pad<0x8>, + pub(crate) unk_4b8: u32, + pub(crate) unk_4bc: Array<0x4, u8>, + pub(crate) unk_4c0: u32, + pub(crate) unk_4c4: u32, + pub(crate) unk_4c8: u32, + pub(crate) unk_4cc: u32, + pub(crate) unk_4d0: u32, + pub(crate) unk_4d4: u32, + pub(crate) unk_4d8: Array<0x4, u8>, + pub(crate) unk_4dc: u32, + pub(crate) unk_4e0: U64, + pub(crate) unk_4e8: u32, + pub(crate) unk_4ec: u32, + pub(crate) unk_4f0: u32, + pub(crate) unk_4f4: u32, + pub(crate) unk_4f8: u32, + pub(crate) unk_4fc: u32, + pub(crate) unk_500: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_504_0: u32, + + pub(crate) unk_504: u32, + pub(crate) unk_508: u32, + pub(crate) unk_50c: u32, + pub(crate) unk_510: u32, + pub(crate) unk_514: u32, + pub(crate) unk_518: u32, + pub(crate) unk_51c: u32, + pub(crate) unk_520: u32, + pub(crate) unk_524: u32, + pub(crate) unk_528: u32, + pub(crate) unk_52c: u32, + pub(crate) unk_530: u32, + pub(crate) unk_534: u32, + pub(crate) unk_538: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_53c_0: u32, + + pub(crate) num_frags: u32, + pub(crate) unk_540: u32, + pub(crate) unk_544: u32, + pub(crate) unk_548: u32, + pub(crate) unk_54c: u32, + pub(crate) unk_550: u32, + pub(crate) unk_554: u32, + pub(crate) uat_ttb_base: U64, + pub(crate) gpu_core_id: u32, + pub(crate) gpu_rev_id: u32, + pub(crate) num_cores: u32, + pub(crate) max_pstate: u32, + + #[ver(V < V13_0B4)] + pub(crate) num_pstates: u32, + + pub(crate) frequencies: Array<0x10, u32>, + pub(crate) voltages: Array<0x10, [u32; 0x8]>, + pub(crate) voltages_sram: Array<0x10, [u32; 0x8]>, + pub(crate) sram_k: Array<0x10, F32>, + pub(crate) unk_9f4: Array<0x10, u32>, + pub(crate) rel_max_powers: Array<0x10, u32>, + pub(crate) rel_boost_freqs: Array<0x10, u32>, + + #[ver(V < V13_0B4)] + pub(crate) min_sram_volt: u32, + + #[ver(V < V13_0B4)] + pub(crate) unk_ab8: u32, + + #[ver(V < V13_0B4)] + pub(crate) unk_abc: u32, + + #[ver(V < V13_0B4)] + pub(crate) unk_ac0: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_ac4_0: Array<0x1f0, u8>, + + pub(crate) pad_ac4: Pad<0x8>, + pub(crate) unk_acc: u32, + pub(crate) unk_ad0: u32, + pub(crate) pad_ad4: Pad<0x10>, + pub(crate) unk_ae4: Array<0x4, u32>, + pub(crate) pad_af4: Pad<0x4>, + pub(crate) unk_af8: u32, + pub(crate) pad_afc: Pad<0x8>, + pub(crate) unk_b04: u32, + pub(crate) unk_b08: u32, + pub(crate) unk_b0c: u32, + pub(crate) unk_b10: u32, + pub(crate) pad_b14: Pad<0x8>, + pub(crate) unk_b1c: u32, + pub(crate) unk_b20: u32, + pub(crate) unk_b24: u32, + pub(crate) unk_b28: u32, + pub(crate) unk_b2c: u32, + pub(crate) unk_b30: u32, + pub(crate) unk_b34: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_b38_0: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_b38_4: u32, + + pub(crate) unk_b38: Array<0xc, u32>, + pub(crate) unk_b68: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_b6c: Array<0xd0, u8>, + + #[ver(V >= V13_0B4)] + pub(crate) unk_c3c: u32, + } + #[versions(AGX)] + default_zeroed!(HwDataB::ver); + + #[derive(Debug, Clone, Copy)] + #[repr(C, packed)] + pub(crate) struct GpuQueueStatsVtx { + pub(crate) busy: u32, + pub(crate) unk_4: u32, + pub(crate) cur_cmdqueue: U64, + pub(crate) cur_count: u32, + pub(crate) unk_14: u32, + } + default_zeroed!(GpuQueueStatsVtx); + + #[versions(AGX)] + #[derive(Debug, Default, Clone, Copy)] + #[repr(C, packed)] + pub(crate) struct GpuStatsVtx { + pub(crate) unk_4: u32, + pub(crate) queues: Array<0x4, GpuQueueStatsVtx>, + pub(crate) unk_68: Array<0x8, u8>, + pub(crate) unk_70: u32, + pub(crate) unk_74: u32, + pub(crate) unk_timestamp: U64, + pub(crate) unk_80: Array<0x40, u8>, + } + + #[derive(Debug, Default, Clone, Copy)] + #[repr(C, packed)] + pub(crate) struct GpuQueueStatsFrag { + pub(crate) busy: u32, + pub(crate) cur_cmdqueue: U64, + pub(crate) unk_c: u32, + pub(crate) unk_10: u32, + pub(crate) unk_14: Array<0x14, u8>, + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct GpuStatsFrag { + pub(crate) unk_0: Array<0x18, u8>, + pub(crate) queues: Array<0x4, GpuQueueStatsFrag>, + pub(crate) unk_d0: Array<0x38, u8>, + pub(crate) tvb_overflows_1: u32, + pub(crate) tvb_overflows_2: u32, + pub(crate) unk_f8: u32, + pub(crate) unk_fc: u32, + pub(crate) cur_stamp_id: i32, + pub(crate) unk_104: Array<0x14, u8>, + pub(crate) unk_118: i32, + pub(crate) unk_11c: u32, + pub(crate) unk_120: u32, + pub(crate) unk_124: u32, + pub(crate) unk_128: u32, + pub(crate) unk_12c: u32, + pub(crate) unk_timestamp: U64, + pub(crate) unk_134: Array<0x8c, u8>, + } + #[versions(AGX)] + default_zeroed!(GpuStatsFrag::ver); + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct GpuGlobalStatsVtx { + pub(crate) total_cmds: u32, + pub(crate) stats: GpuStatsVtx::ver, + #[ver(V >= V13_0B4)] + pub(crate) unk_pad: Array<0x5c4, u8>, + } + #[versions(AGX)] + default_zeroed!(GpuGlobalStatsVtx::ver); + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct GpuGlobalStatsFrag { + pub(crate) total_cmds: u32, + pub(crate) unk_4: u32, + pub(crate) stats: GpuStatsFrag::ver, + #[ver(V >= V13_0B4)] + pub(crate) unk_pad: Array<0x580, u8>, + } + #[versions(AGX)] + default_zeroed!(GpuGlobalStatsFrag::ver); + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct GpuStatsComp { + pub(crate) unk: Array<0x140, u8>, + } + default_zeroed!(GpuStatsComp); + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct RuntimeScratch { + pub(crate) unk_280: Array<0x6800, u8>, + pub(crate) unk_6a80: u32, + pub(crate) gpu_idle: u32, + pub(crate) unkpad_6a88: Pad<0x14>, + pub(crate) unk_6a9c: u32, + pub(crate) unk_ctr0: u32, + pub(crate) unk_ctr1: u32, + pub(crate) unk_6aa8: u32, + pub(crate) unk_6aac: u32, + pub(crate) unk_ctr2: u32, + pub(crate) unk_6ab4: u32, + pub(crate) unk_6ab8: u32, + pub(crate) unk_6abc: u32, + pub(crate) unk_6ac0: u32, + pub(crate) unk_6ac4: u32, + pub(crate) unk_ctr3: u32, + pub(crate) unk_6acc: u32, + pub(crate) unk_6ad0: u32, + pub(crate) unk_6ad4: u32, + pub(crate) unk_6ad8: u32, + pub(crate) unk_6adc: u32, + pub(crate) unk_6ae0: u32, + pub(crate) unk_6ae4: u32, + pub(crate) unk_6ae8: u32, + pub(crate) unk_6aec: u32, + pub(crate) unk_6af0: u32, + pub(crate) unk_ctr4: u32, + pub(crate) unk_ctr5: u32, + pub(crate) unk_6afc: u32, + pub(crate) pad_6b00: Pad<0x38>, + pub(crate) unk_6b38: u32, + pub(crate) pad_6b3c: Pad<0x84>, + } + default_zeroed!(RuntimeScratch); + + pub(crate) type BufferMgrCtl = Array<4, u32>; + + #[versions(AGX)] + #[repr(C)] + pub(crate) struct RuntimePointers<'a> { + pub(crate) pipes: Array<4, PipeChannels::ver>, + + pub(crate) device_control: + ChannelRing<channels::ChannelState, channels::DeviceControlMsg::ver>, + pub(crate) event: ChannelRing<channels::ChannelState, channels::RawEventMsg>, + pub(crate) fw_log: ChannelRing<channels::FwLogChannelState, channels::RawFwLogMsg>, + pub(crate) ktrace: ChannelRing<channels::ChannelState, channels::RawKTraceMsg>, + pub(crate) stats: ChannelRing<channels::ChannelState, channels::RawStatsMsg::ver>, + + pub(crate) __pad0: Pad<0x50>, + pub(crate) unk_160: U64, + pub(crate) unk_168: U64, + pub(crate) stats_vtx: GpuPointer<'a, super::GpuGlobalStatsVtx::ver>, + pub(crate) stats_frag: GpuPointer<'a, super::GpuGlobalStatsFrag::ver>, + pub(crate) stats_comp: GpuPointer<'a, super::GpuStatsComp>, + pub(crate) hwdata_a: GpuPointer<'a, super::HwDataA::ver>, + pub(crate) unkptr_190: GpuPointer<'a, &'a [u8]>, + pub(crate) unkptr_198: GpuPointer<'a, &'a [u8]>, + pub(crate) hwdata_b: GpuPointer<'a, super::HwDataB::ver>, + pub(crate) hwdata_b_2: GpuPointer<'a, super::HwDataB::ver>, + pub(crate) fwlog_buf: Option<GpuWeakPointer<[channels::RawFwLogPayloadMsg]>>, + pub(crate) unkptr_1b8: GpuPointer<'a, &'a [u8]>, + pub(crate) unkptr_1c0: GpuPointer<'a, &'a [u8]>, + pub(crate) unkptr_1c8: GpuPointer<'a, &'a [u8]>, + pub(crate) unk_1d0: u32, + pub(crate) unk_1d4: u32, + pub(crate) unk_1d8: Array<0x3c, u8>, + pub(crate) buffer_mgr_ctl: GpuPointer<'a, &'a [BufferMgrCtl]>, + pub(crate) buffer_mgr_ctl_2: GpuPointer<'a, &'a [BufferMgrCtl]>, + pub(crate) __pad1: Pad<0x5c>, + pub(crate) gpu_scratch: RuntimeScratch, + } + #[versions(AGX)] + no_debug!(RuntimePointers::ver<'_>); + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct PendingStamp { + pub(crate) info: AtomicU32, + pub(crate) wait_value: AtomicU32, + } + default_zeroed!(PendingStamp); + + #[derive(Debug, Clone, Copy)] + #[repr(C, packed)] + pub(crate) struct FaultInfo { + pub(crate) unk_0: u32, + pub(crate) unk_4: u32, + pub(crate) queue_uuid: u32, + pub(crate) unk_c: u32, + pub(crate) unk_10: u32, + pub(crate) unk_14: u32, + } + default_zeroed!(FaultInfo); + + #[versions(AGX)] + #[derive(Debug, Clone, Copy)] + #[repr(C, packed)] + pub(crate) struct GlobalsSub { + pub(crate) unk_54: u16, + pub(crate) unk_56: u16, + pub(crate) unk_58: u16, + pub(crate) unk_5a: U32, + pub(crate) unk_5e: U32, + pub(crate) unk_62: U32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_66_0: Array<0xc, u8>, + + pub(crate) unk_66: U32, + pub(crate) unk_6a: Array<0x16, u8>, + } + #[versions(AGX)] + default_zeroed!(GlobalsSub::ver); + + #[derive(Debug, Clone, Copy)] + #[repr(C)] + pub(crate) struct PowerZoneGlobal { + pub(crate) target: u32, + pub(crate) target_off: u32, + pub(crate) filter_tc: u32, + } + default_zeroed!(PowerZoneGlobal); + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct Globals { + pub(crate) ktrace_enable: u32, + pub(crate) unk_4: Array<0x20, u8>, + + #[ver(V >= V13_2)] + pub(crate) unk_24_0: u32, + + pub(crate) unk_24: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_28_0: u32, + + pub(crate) unk_28: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_2c_0: u32, + + pub(crate) unk_2c: u32, + pub(crate) unk_30: u32, + pub(crate) unk_34: u32, + pub(crate) unk_38: Array<0x1c, u8>, + + pub(crate) sub: GlobalsSub::ver, + + pub(crate) unk_80: Array<0xf80, u8>, + pub(crate) unk_1000: Array<0x7000, u8>, + pub(crate) unk_8000: Array<0x900, u8>, + + #[ver(V >= V13_0B4 && V < V13_2)] + pub(crate) unk_8900_0: u32, + + pub(crate) unk_8900: u32, + pub(crate) pending_submissions: AtomicU32, + pub(crate) max_power: u32, + pub(crate) max_pstate_scaled: u32, + pub(crate) max_pstate_scaled_2: u32, + pub(crate) unk_8914: u32, + pub(crate) unk_8918: u32, + pub(crate) max_pstate_scaled_3: u32, + pub(crate) unk_8920: u32, + pub(crate) power_zone_count: u32, + pub(crate) avg_power_filter_tc_periods: u32, + pub(crate) avg_power_ki_dt: F32, + pub(crate) avg_power_kp: F32, + pub(crate) avg_power_min_duty_cycle: u32, + pub(crate) avg_power_target_filter_tc: u32, + pub(crate) power_zones: Array<5, PowerZoneGlobal>, + pub(crate) unk_8978: Array<0x44, u8>, + + #[ver(V >= V13_0B4)] + pub(crate) unk_89bc_0: Array<0x3c, u8>, + + pub(crate) unk_89bc: u32, + pub(crate) fast_die0_release_temp: u32, + pub(crate) unk_89c4: i32, + pub(crate) fast_die0_prop_tgt_delta: u32, + pub(crate) fast_die0_kp: F32, + pub(crate) fast_die0_ki_dt: F32, + pub(crate) unk_89d4: Array<0xc, u8>, + pub(crate) unk_89e0: u32, + pub(crate) max_power_2: u32, + pub(crate) ppm_kp: F32, + pub(crate) ppm_ki_dt: F32, + pub(crate) unk_89f0: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_89f4_0: Array<0x8, u8>, + + #[ver(V >= V13_0B4)] + pub(crate) unk_89f4_8: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_89f4_c: Array<0x50, u8>, + + pub(crate) unk_89f4: u32, + pub(crate) hws1: HwDataShared1, + pub(crate) hws2: HwDataShared2, + + #[ver(V >= V13_0B4)] + pub(crate) unk_hws2_0: Array<0x28, u8>, + + pub(crate) hws3: HwDataShared3, + pub(crate) unk_9004: Array<8, u8>, + pub(crate) unk_900c: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_9010_0: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_9010_4: Array<0x14, u8>, + + pub(crate) unk_9010: Array<0x2c, u8>, + pub(crate) unk_903c: u32, + pub(crate) unk_9040: Array<0xc0, u8>, + pub(crate) unk_9100: Array<0x6f00, u8>, + pub(crate) unk_10000: Array<0xe50, u8>, + pub(crate) unk_10e50: u32, + pub(crate) unk_10e54: Array<0x2c, u8>, + pub(crate) fault_control: u32, + pub(crate) do_init: u32, + pub(crate) unk_10e88: Array<0x188, u8>, + pub(crate) idle_ts: U64, + pub(crate) idle_unk: U64, + pub(crate) unk_11020: u32, + pub(crate) unk_11024: u32, + pub(crate) unk_11028: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_1102c_0: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_1102c_4: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_1102c_8: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_1102c_c: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_1102c_10: u32, + + pub(crate) unk_1102c: u32, + pub(crate) idle_off_delay_ms: AtomicU32, + pub(crate) fender_idle_off_delay_ms: u32, + pub(crate) fw_early_wake_timeout_ms: u32, + pub(crate) pending_stamps: Array<0x110, PendingStamp>, + pub(crate) unk_117bc: u32, + pub(crate) fault_info: FaultInfo, + pub(crate) counter: u32, + pub(crate) unk_118dc: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_118e0_0: Array<0x9c, u8>, + + pub(crate) unk_118e0: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_118e4_0: u32, + + pub(crate) unk_118e4: u32, + pub(crate) unk_118e8: u32, + pub(crate) unk_118ec: Array<0x15, u8>, + pub(crate) unk_11901: Array<0x43f, u8>, + + #[ver(V >= V13_0B4)] + pub(crate) unk_11d40: Array<0x19c, u8>, + + #[ver(V >= V13_0B4)] + pub(crate) unk_11edc: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_11ee0: Array<0x1c, u8>, + + #[ver(V >= V13_0B4)] + pub(crate) unk_11efc: u32, + } + #[versions(AGX)] + default_zeroed!(Globals::ver); + + #[derive(Debug, Default, Clone, Copy)] + #[repr(C, packed)] + pub(crate) struct UatLevelInfo { + pub(crate) unk_3: u8, + pub(crate) unk_1: u8, + pub(crate) unk_2: u8, + pub(crate) index_shift: u8, + pub(crate) num_entries: u16, + pub(crate) unk_4: u16, + pub(crate) unk_8: U64, + pub(crate) unk_10: U64, + pub(crate) index_mask: U64, + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct InitData<'a> { + #[ver(V >= V13_0B4)] + pub(crate) ver_info: Array<0x4, u16>, + + pub(crate) unk_buf: GpuPointer<'a, &'a [u8]>, + pub(crate) unk_8: u32, + pub(crate) unk_c: u32, + pub(crate) runtime_pointers: GpuPointer<'a, super::RuntimePointers::ver>, + pub(crate) globals: GpuPointer<'a, super::Globals::ver>, + pub(crate) fw_status: GpuPointer<'a, super::FwStatus>, + pub(crate) uat_page_size: u16, + pub(crate) uat_page_bits: u8, + pub(crate) uat_num_levels: u8, + pub(crate) uat_level_info: Array<0x3, UatLevelInfo>, + pub(crate) __pad0: Pad<0x14>, + pub(crate) host_mapped_fw_allocations: u32, + pub(crate) unk_ac: u32, + pub(crate) unk_b0: u32, + pub(crate) unk_b4: u32, + pub(crate) unk_b8: u32, + } +} + +#[derive(Debug)] +pub(crate) struct ChannelRing<T: GpuStruct + Debug + Default, U: Copy> +where + for<'a> <T as GpuStruct>::Raw<'a>: Debug, +{ + pub(crate) state: GpuObject<T>, + pub(crate) ring: GpuArray<U>, +} + +impl<T: GpuStruct + Debug + Default, U: Copy> ChannelRing<T, U> +where + for<'a> <T as GpuStruct>::Raw<'a>: Debug, +{ + pub(crate) fn to_raw(&self) -> raw::ChannelRing<T, U> { + raw::ChannelRing { + state: Some(self.state.weak_pointer()), + ring: Some(self.ring.weak_pointer()), + } + } +} + +trivial_gpustruct!(FwStatus); + +#[versions(AGX)] +#[derive(Debug, Default)] +pub(crate) struct GpuGlobalStatsVtx {} + +#[versions(AGX)] +impl GpuStruct for GpuGlobalStatsVtx::ver { + type Raw<'a> = raw::GpuGlobalStatsVtx::ver; +} + +#[versions(AGX)] +#[derive(Debug, Default)] +pub(crate) struct GpuGlobalStatsFrag {} + +#[versions(AGX)] +impl GpuStruct for GpuGlobalStatsFrag::ver { + type Raw<'a> = raw::GpuGlobalStatsFrag::ver; +} + +#[derive(Debug, Default)] +pub(crate) struct GpuStatsComp {} + +impl GpuStruct for GpuStatsComp { + type Raw<'a> = raw::GpuStatsComp; +} + +#[versions(AGX)] +#[derive(Debug, Default)] +pub(crate) struct HwDataA {} + +#[versions(AGX)] +impl GpuStruct for HwDataA::ver { + type Raw<'a> = raw::HwDataA::ver; +} + +#[versions(AGX)] +#[derive(Debug, Default)] +pub(crate) struct HwDataB {} + +#[versions(AGX)] +impl GpuStruct for HwDataB::ver { + type Raw<'a> = raw::HwDataB::ver; +} + +#[versions(AGX)] +#[derive(Debug)] +pub(crate) struct Stats { + pub(crate) vtx: GpuObject<GpuGlobalStatsVtx::ver>, + pub(crate) frag: GpuObject<GpuGlobalStatsFrag::ver>, + pub(crate) comp: GpuObject<GpuStatsComp>, +} + +#[versions(AGX)] +#[derive(Debug)] +pub(crate) struct RuntimePointers { + pub(crate) stats: Stats::ver, + + pub(crate) hwdata_a: GpuObject<HwDataA::ver>, + pub(crate) unkptr_190: GpuArray<u8>, + pub(crate) unkptr_198: GpuArray<u8>, + pub(crate) hwdata_b: GpuObject<HwDataB::ver>, + + pub(crate) unkptr_1b8: GpuArray<u8>, + pub(crate) unkptr_1c0: GpuArray<u8>, + pub(crate) unkptr_1c8: GpuArray<u8>, + + pub(crate) buffer_mgr_ctl: GpuArray<raw::BufferMgrCtl>, +} + +#[versions(AGX)] +impl GpuStruct for RuntimePointers::ver { + type Raw<'a> = raw::RuntimePointers::ver<'a>; +} + +#[versions(AGX)] +#[derive(Debug, Default)] +pub(crate) struct Globals {} + +#[versions(AGX)] +impl GpuStruct for Globals::ver { + type Raw<'a> = raw::Globals::ver; +} + +#[versions(AGX)] +#[derive(Debug)] +pub(crate) struct InitData { + pub(crate) unk_buf: GpuArray<u8>, + pub(crate) runtime_pointers: GpuObject<RuntimePointers::ver>, + pub(crate) globals: GpuObject<Globals::ver>, + pub(crate) fw_status: GpuObject<FwStatus>, +} + +#[versions(AGX)] +impl GpuStruct for InitData::ver { + type Raw<'a> = raw::InitData::ver<'a>; +} diff --git a/drivers/gpu/drm/asahi/fw/job.rs b/drivers/gpu/drm/asahi/fw/job.rs new file mode 100644 index 000000000000..a0bbf67b1b1d --- /dev/null +++ b/drivers/gpu/drm/asahi/fw/job.rs @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Common GPU job firmware structures + +use super::types::*; +use crate::{default_zeroed, trivial_gpustruct}; + +pub(crate) mod raw { + use super::*; + + #[derive(Debug, Clone, Copy)] + #[repr(C)] + pub(crate) struct JobMeta { + pub(crate) unk_4: u32, + pub(crate) stamp: GpuWeakPointer<Stamp>, + pub(crate) fw_stamp: GpuWeakPointer<FwStamp>, + pub(crate) stamp_value: EventValue, + pub(crate) stamp_slot: u32, + pub(crate) evctl_index: u32, + pub(crate) flush_stamps: u32, + pub(crate) uuid: u32, + pub(crate) cmd_seq: u32, + } + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct EncoderParams<'a> { + pub(crate) unk_8: u32, + pub(crate) unk_c: u32, + pub(crate) unk_10: u32, + pub(crate) encoder_id: u32, + pub(crate) unk_18: u32, + pub(crate) iogpu_compute_unk44: u32, + pub(crate) seq_buffer: GpuPointer<'a, &'a [u64]>, + pub(crate) unk_28: U64, + } + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct JobTimestamps { + pub(crate) start: AtomicU64, + pub(crate) end: AtomicU64, + } + default_zeroed!(JobTimestamps); + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct RenderTimestamps { + pub(crate) vtx: JobTimestamps, + pub(crate) frag: JobTimestamps, + } + default_zeroed!(RenderTimestamps); +} + +trivial_gpustruct!(JobTimestamps); +trivial_gpustruct!(RenderTimestamps); diff --git a/drivers/gpu/drm/asahi/fw/microseq.rs b/drivers/gpu/drm/asahi/fw/microseq.rs new file mode 100644 index 000000000000..8deea3fb9914 --- /dev/null +++ b/drivers/gpu/drm/asahi/fw/microseq.rs @@ -0,0 +1,384 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU firmware microsequence operations + +use super::types::*; +use super::{buffer, compute, fragment, initdata, vertex, workqueue}; +use crate::default_zeroed; + +pub(crate) trait Operation {} + +#[derive(Debug, Copy, Clone)] +#[repr(u32)] +enum OpCode { + WaitForIdle = 0x01, + RetireStamp = 0x18, + #[allow(dead_code)] + Timestamp = 0x19, + StartVertex = 0x22, + FinalizeVertex = 0x23, + StartFragment = 0x24, + FinalizeFragment = 0x25, + StartCompute = 0x29, + FinalizeCompute = 0x2a, +} + +#[derive(Debug, Copy, Clone)] +#[repr(u32)] +pub(crate) enum Pipe { + Vertex = 1 << 0, + Fragment = 1 << 8, + Compute = 1 << 15, +} + +pub(crate) const MAX_ATTACHMENTS: usize = 16; + +#[derive(Debug, Clone, Copy)] +#[repr(C)] +pub(crate) struct Attachment { + pub(crate) address: U64, + pub(crate) size: u32, + pub(crate) unk_c: u16, + pub(crate) unk_e: u16, +} +default_zeroed!(Attachment); + +#[derive(Debug, Clone, Copy, Default)] +#[repr(C)] +pub(crate) struct Attachments { + pub(crate) list: Array<MAX_ATTACHMENTS, Attachment>, + pub(crate) count: u32, +} + +#[derive(Debug, Copy, Clone)] +#[repr(transparent)] +pub(crate) struct OpHeader(u32); + +impl OpHeader { + const fn new(opcode: OpCode) -> OpHeader { + OpHeader(opcode as u32) + } + const fn with_args(opcode: OpCode, args: u32) -> OpHeader { + OpHeader(opcode as u32 | args) + } +} + +macro_rules! simple_op { + ($name:ident) => { + #[derive(Debug, Copy, Clone)] + pub(crate) struct $name(OpHeader); + + impl $name { + pub(crate) const HEADER: $name = $name(OpHeader::new(OpCode::$name)); + } + }; +} + +pub(crate) mod op { + use super::*; + + simple_op!(StartVertex); + simple_op!(FinalizeVertex); + simple_op!(StartFragment); + simple_op!(FinalizeFragment); + simple_op!(StartCompute); + simple_op!(FinalizeCompute); + + #[derive(Debug, Copy, Clone)] + pub(crate) struct RetireStamp(OpHeader); + impl RetireStamp { + pub(crate) const HEADER: RetireStamp = + RetireStamp(OpHeader::with_args(OpCode::RetireStamp, 0x40000000)); + } + + #[derive(Debug, Copy, Clone)] + pub(crate) struct WaitForIdle(OpHeader); + impl WaitForIdle { + pub(crate) const fn new(pipe: Pipe) -> WaitForIdle { + WaitForIdle(OpHeader::with_args(OpCode::WaitForIdle, (pipe as u32) << 8)) + } + } + + #[derive(Debug, Copy, Clone)] + pub(crate) struct Timestamp(OpHeader); + impl Timestamp { + #[allow(dead_code)] + pub(crate) const fn new(flag: bool) -> Timestamp { + Timestamp(OpHeader::with_args(OpCode::Timestamp, (flag as u32) << 31)) + } + } +} + +#[derive(Debug)] +#[repr(C)] +pub(crate) struct WaitForIdle { + pub(crate) header: op::WaitForIdle, +} + +impl Operation for WaitForIdle {} + +#[derive(Debug)] +#[repr(C)] +pub(crate) struct RetireStamp { + pub(crate) header: op::RetireStamp, +} + +impl Operation for RetireStamp {} + +#[versions(AGX)] +#[derive(Debug)] +#[repr(C)] +pub(crate) struct Timestamp<'a> { + pub(crate) header: op::Timestamp, + pub(crate) cur_ts: GpuWeakPointer<U64>, + pub(crate) start_ts: GpuWeakPointer<Option<GpuPointer<'a, AtomicU64>>>, + pub(crate) update_ts: GpuWeakPointer<Option<GpuPointer<'a, AtomicU64>>>, + pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>, + pub(crate) unk_24: U64, + + #[ver(V >= V13_0B4)] + pub(crate) unk_ts: GpuWeakPointer<U64>, + + pub(crate) uuid: u32, + pub(crate) unk_30_padding: u32, +} + +#[versions(AGX)] +impl<'a> Operation for Timestamp::ver<'a> {} + +#[versions(AGX)] +#[derive(Debug)] +#[repr(C)] +pub(crate) struct StartVertex<'a> { + pub(crate) header: op::StartVertex, + pub(crate) tiling_params: GpuWeakPointer<vertex::raw::TilingParameters>, + pub(crate) job_params1: GpuWeakPointer<vertex::raw::JobParameters1::ver<'a>>, + pub(crate) buffer: GpuWeakPointer<buffer::Info::ver>, + pub(crate) scene: GpuWeakPointer<buffer::Scene::ver>, + pub(crate) stats: GpuWeakPointer<initdata::raw::GpuStatsVtx::ver>, + pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>, + pub(crate) vm_slot: u32, + pub(crate) unk_38: u32, + pub(crate) event_generation: u32, + pub(crate) buffer_slot: u32, + pub(crate) unk_44: u32, + pub(crate) cmd_seq: U64, + pub(crate) unk_50: u32, + pub(crate) unk_pointer: GpuWeakPointer<u32>, + pub(crate) unk_job_buf: GpuWeakPointer<U64>, + pub(crate) unk_64: u32, + pub(crate) unk_68: u32, + pub(crate) uuid: u32, + pub(crate) unk_70: u32, + pub(crate) unk_74: Array<0x1d, U64>, + pub(crate) unk_15c: u32, + pub(crate) unk_160: U64, + pub(crate) unk_168: u32, + pub(crate) unk_16c: u32, + pub(crate) unk_170: U64, + + #[ver(V >= V13_0B4)] + pub(crate) counter: U64, + + #[ver(V >= V13_0B4)] + pub(crate) notifier_buf: GpuWeakPointer<Array<0x8, u8>>, + + pub(crate) unk_178: u32, +} + +#[versions(AGX)] +impl<'a> Operation for StartVertex::ver<'a> {} + +#[versions(AGX)] +#[derive(Debug)] +#[repr(C)] +pub(crate) struct FinalizeVertex { + pub(crate) header: op::FinalizeVertex, + pub(crate) scene: GpuWeakPointer<buffer::Scene::ver>, + pub(crate) buffer: GpuWeakPointer<buffer::Info::ver>, + pub(crate) stats: GpuWeakPointer<initdata::raw::GpuStatsVtx::ver>, + pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>, + pub(crate) vm_slot: u32, + pub(crate) unk_28: u32, + pub(crate) unk_pointer: GpuWeakPointer<u32>, + pub(crate) unk_34: u32, + pub(crate) uuid: u32, + pub(crate) fw_stamp: GpuWeakPointer<FwStamp>, + pub(crate) stamp_value: EventValue, + pub(crate) unk_48: U64, + pub(crate) unk_50: u32, + pub(crate) unk_54: u32, + pub(crate) unk_58: U64, + pub(crate) unk_60: u32, + pub(crate) unk_64: u32, + pub(crate) unk_68: u32, + + #[ver(G >= G14 && V < V13_0B4)] + pub(crate) unk_68_g14: U64, + + pub(crate) restart_branch_offset: i32, + pub(crate) unk_70: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_74: Array<0x10, u8>, +} + +#[versions(AGX)] +impl Operation for FinalizeVertex::ver {} + +#[versions(AGX)] +#[derive(Debug)] +#[repr(C)] +pub(crate) struct StartFragment<'a> { + pub(crate) header: op::StartFragment, + pub(crate) job_params2: GpuWeakPointer<fragment::raw::JobParameters2>, + pub(crate) job_params1: GpuWeakPointer<fragment::raw::JobParameters1::ver<'a>>, + pub(crate) scene: GpuPointer<'a, buffer::Scene::ver>, + pub(crate) stats: GpuWeakPointer<initdata::raw::GpuStatsFrag::ver>, + pub(crate) busy_flag: GpuWeakPointer<u32>, + pub(crate) tvb_overflow_count: GpuWeakPointer<u32>, + pub(crate) unk_pointer: GpuWeakPointer<u32>, + pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>, + pub(crate) work_item: GpuWeakPointer<fragment::RunFragment::ver>, + pub(crate) vm_slot: u32, + pub(crate) unk_50: u32, + pub(crate) event_generation: u32, + pub(crate) buffer_slot: u32, + pub(crate) unk_5c: u32, + pub(crate) cmd_seq: U64, + pub(crate) unk_68: u32, + pub(crate) unk_758_flag: GpuWeakPointer<u32>, + pub(crate) unk_job_buf: GpuWeakPointer<U64>, + pub(crate) unk_7c: u32, + pub(crate) unk_80: u32, + pub(crate) unk_84: u32, + pub(crate) uuid: u32, + pub(crate) attachments: Attachments, + pub(crate) unk_190: u32, + + #[ver(V >= V13_0B4)] + pub(crate) counter: U64, + + #[ver(V >= V13_0B4)] + pub(crate) notifier_buf: GpuWeakPointer<Array<0x8, u8>>, +} + +#[versions(AGX)] +impl<'a> Operation for StartFragment::ver<'a> {} + +#[versions(AGX)] +#[derive(Debug)] +#[repr(C)] +pub(crate) struct FinalizeFragment { + pub(crate) header: op::FinalizeFragment, + pub(crate) uuid: u32, + pub(crate) unk_8: u32, + pub(crate) fw_stamp: GpuWeakPointer<FwStamp>, + pub(crate) stamp_value: EventValue, + pub(crate) unk_18: u32, + pub(crate) scene: GpuWeakPointer<buffer::Scene::ver>, + pub(crate) buffer: GpuWeakPointer<buffer::Info::ver>, + pub(crate) unk_2c: U64, + pub(crate) stats: GpuWeakPointer<initdata::raw::GpuStatsFrag::ver>, + pub(crate) unk_pointer: GpuWeakPointer<u32>, + pub(crate) busy_flag: GpuWeakPointer<u32>, + pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>, + pub(crate) work_item: GpuWeakPointer<fragment::RunFragment::ver>, + pub(crate) vm_slot: u32, + pub(crate) unk_60: u32, + pub(crate) unk_758_flag: GpuWeakPointer<u32>, + pub(crate) unk_6c: U64, + pub(crate) unk_74: U64, + pub(crate) unk_7c: U64, + pub(crate) unk_84: U64, + pub(crate) unk_8c: U64, + + #[ver(G == G14 && V < V13_0B4)] + pub(crate) unk_8c_g14: U64, + + pub(crate) restart_branch_offset: i32, + pub(crate) unk_98: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_9c: Array<0x10, u8>, +} + +#[versions(AGX)] +impl Operation for FinalizeFragment::ver {} + +#[versions(AGX)] +#[derive(Debug)] +#[repr(C)] +pub(crate) struct StartCompute<'a> { + pub(crate) header: op::StartCompute, + pub(crate) unk_pointer: GpuWeakPointer<Array<0x54, u8>>, + pub(crate) job_params1: GpuWeakPointer<compute::raw::JobParameters1<'a>>, + pub(crate) stats: GpuWeakPointer<initdata::GpuStatsComp>, + pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>, + pub(crate) vm_slot: u32, + pub(crate) unk_28: u32, + pub(crate) event_generation: u32, + pub(crate) cmd_seq: U64, + pub(crate) unk_38: u32, + pub(crate) job_params2: GpuWeakPointer<compute::raw::JobParameters2::ver<'a>>, + pub(crate) unk_44: u32, + pub(crate) uuid: u32, + pub(crate) attachments: Attachments, + pub(crate) padding: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_flag: GpuWeakPointer<U32>, + + #[ver(V >= V13_0B4)] + pub(crate) counter: U64, + + #[ver(V >= V13_0B4)] + pub(crate) notifier_buf: GpuWeakPointer<Array<0x8, u8>>, +} + +#[versions(AGX)] +impl<'a> Operation for StartCompute::ver<'a> {} + +#[versions(AGX)] +#[derive(Debug)] +#[repr(C)] +pub(crate) struct FinalizeCompute<'a> { + pub(crate) header: op::FinalizeCompute, + pub(crate) stats: GpuWeakPointer<initdata::GpuStatsComp>, + pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>, + pub(crate) vm_slot: u32, + #[ver(V < V13_0B4)] + pub(crate) unk_18: u32, + pub(crate) job_params2: GpuWeakPointer<compute::raw::JobParameters2::ver<'a>>, + pub(crate) unk_24: u32, + pub(crate) uuid: u32, + pub(crate) fw_stamp: GpuWeakPointer<FwStamp>, + pub(crate) stamp_value: EventValue, + pub(crate) unk_38: u32, + pub(crate) unk_3c: u32, + pub(crate) unk_40: u32, + pub(crate) unk_44: u32, + pub(crate) unk_48: u32, + pub(crate) unk_4c: u32, + pub(crate) unk_50: u32, + pub(crate) unk_54: u32, + pub(crate) unk_58: u32, + + #[ver(G == G14 && V < V13_0B4)] + pub(crate) unk_5c_g14: U64, + + pub(crate) restart_branch_offset: i32, + pub(crate) unk_60: u32, + + #[ver(V >= V13_0B4)] + pub(crate) unk_64: Array<0xd, u8>, + + #[ver(V >= V13_0B4)] + pub(crate) unk_flag: GpuWeakPointer<U32>, + + #[ver(V >= V13_0B4)] + pub(crate) unk_79: Array<0x7, u8>, +} + +#[versions(AGX)] +impl<'a> Operation for FinalizeCompute::ver<'a> {} diff --git a/drivers/gpu/drm/asahi/fw/mod.rs b/drivers/gpu/drm/asahi/fw/mod.rs new file mode 100644 index 000000000000..a5649aa20d3a --- /dev/null +++ b/drivers/gpu/drm/asahi/fw/mod.rs @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Firmware structures for Apple AGX GPUs + +pub(crate) mod buffer; +pub(crate) mod channels; +pub(crate) mod compute; +pub(crate) mod event; +pub(crate) mod fragment; +pub(crate) mod initdata; +pub(crate) mod job; +pub(crate) mod microseq; +pub(crate) mod types; +pub(crate) mod vertex; +pub(crate) mod workqueue; diff --git a/drivers/gpu/drm/asahi/fw/types.rs b/drivers/gpu/drm/asahi/fw/types.rs new file mode 100644 index 000000000000..c1a07be1e047 --- /dev/null +++ b/drivers/gpu/drm/asahi/fw/types.rs @@ -0,0 +1,233 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Common types for firmware structure definitions + +use crate::{alloc, object}; +use core::fmt; +use core::ops::{Deref, DerefMut, Index, IndexMut}; + +pub(crate) use crate::event::EventValue; +pub(crate) use crate::object::{GpuPointer, GpuStruct, GpuWeakPointer}; +pub(crate) use crate::{f32, float::F32}; + +pub(crate) use ::alloc::boxed::Box; +pub(crate) use core::fmt::Debug; +pub(crate) use core::marker::PhantomData; +pub(crate) use core::sync::atomic::{AtomicI32, AtomicU32, AtomicU64}; +pub(crate) use kernel::macros::versions; + +// Make the trait visible +pub(crate) use crate::alloc::Allocator as _Allocator; + +/// General allocator type used for the driver +pub(crate) type Allocator = alloc::DefaultAllocator; + +/// General GpuObject type used for the driver +pub(crate) type GpuObject<T> = + object::GpuObject<T, alloc::GenericAlloc<T, alloc::DefaultAllocation>>; + +/// General GpuArray type used for the driver +pub(crate) type GpuArray<T> = object::GpuArray<T, alloc::GenericAlloc<T, alloc::DefaultAllocation>>; + +/// General GpuOnlyArray type used for the driver +pub(crate) type GpuOnlyArray<T> = + object::GpuOnlyArray<T, alloc::GenericAlloc<T, alloc::DefaultAllocation>>; + +/// A stamp slot that is shared between firmware and the driver. +#[derive(Debug, Default)] +#[repr(transparent)] +pub(crate) struct Stamp(pub(crate) AtomicU32); + +/// A stamp slot that is for private firmware use. +/// +/// This is a separate type to guard against pointer type confusion. +#[derive(Debug, Default)] +#[repr(transparent)] +pub(crate) struct FwStamp(pub(crate) AtomicU32); + +/// An unaligned u64 type. +/// +/// This is useful to avoid having to pack firmware structures entirely, since that is incompatible +/// with `#[derive(Debug)]` and atomics. +#[derive(Copy, Clone, Default)] +#[repr(C, packed(1))] +pub(crate) struct U64(pub(crate) u64); + +unsafe impl Zeroed for U64 {} + +impl fmt::Debug for U64 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let v = self.0; + f.write_fmt(format_args!("{:#x}", v)) + } +} + +/// An unaligned u32 type. +/// +/// This is useful to avoid having to pack firmware structures entirely, since that is incompatible +/// with `#[derive(Debug)]` and atomics. +#[derive(Copy, Clone, Default)] +#[repr(C, packed(1))] +pub(crate) struct U32(pub(crate) u32); + +unsafe impl Zeroed for U32 {} + +impl fmt::Debug for U32 { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let v = self.0; + f.write_fmt(format_args!("{:#x}", v)) + } +} + +unsafe impl Zeroed for u8 {} +unsafe impl Zeroed for u16 {} +unsafe impl Zeroed for u32 {} +unsafe impl Zeroed for u64 {} +unsafe impl Zeroed for i8 {} +unsafe impl Zeroed for i16 {} +unsafe impl Zeroed for i32 {} +unsafe impl Zeroed for i64 {} + +/// Create a dummy `Debug` implementation, for when we need it but it's too painful to write by +/// hand or not very useful. +#[macro_export] +macro_rules! no_debug { + ($type:ty) => { + impl Debug for $type { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + write!(f, "...") + } + } + }; +} + +/// Types which can be safely initialized with an all-zero bit pattern. +/// +/// See: https://github.com/rust-lang/rfcs/issues/2626 +/// +/// # Safety +/// +/// This trait must only be implemented if a type only contains primitive types which can be +/// zero-initialized, FFI structs intended to be zero-initialized, or other types which impl Zeroed. +pub(crate) unsafe trait Zeroed: Default { + fn zeroed() -> Self { + // SAFETY: The user is responsible for ensuring this is safe. + unsafe { core::mem::zeroed() } + } +} + +/// Implement Zeroed for a given type (and Default along with it). +/// +/// # Safety +/// +/// This macro must only be used if a type only contains primitive types which can be +/// zero-initialized, FFI structs intended to be zero-initialized, or other types which impl Zeroed. +#[macro_export] +macro_rules! default_zeroed { + (<$($lt:lifetime),*>, $type:ty) => { + impl<$($lt),*> Default for $type { + fn default() -> $type { + Zeroed::zeroed() + } + } + // SAFETY: The user is responsible for ensuring this is safe. + unsafe impl<$($lt),*> Zeroed for $type {} + }; + ($type:ty) => { + impl Default for $type { + fn default() -> $type { + Zeroed::zeroed() + } + } + // SAFETY: The user is responsible for ensuring this is safe. + unsafe impl Zeroed for $type {} + }; +} + +/// A convenience type for a number of padding bytes. Hidden from Debug formatting. +#[derive(Copy, Clone)] +#[repr(C, packed)] +pub(crate) struct Pad<const N: usize>([u8; N]); + +/// SAFETY: Primitive type, safe to zero-init. +unsafe impl<const N: usize> Zeroed for Pad<N> {} + +impl<const N: usize> Default for Pad<N> { + fn default() -> Self { + Zeroed::zeroed() + } +} + +impl<const N: usize> fmt::Debug for Pad<N> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_fmt(format_args!("<pad>")) + } +} + +/// A convenience type for a fixed-sized array with Default/Zeroed impls. +#[derive(Copy, Clone)] +#[repr(C)] +pub(crate) struct Array<const N: usize, T>([T; N]); + +impl<const N: usize, T> Array<N, T> { + pub(crate) fn new(data: [T; N]) -> Self { + Self(data) + } +} + +// SAFETY: Arrays of Zeroed values can be safely Zeroed. +unsafe impl<const N: usize, T: Zeroed> Zeroed for Array<N, T> {} + +impl<const N: usize, T: Zeroed> Default for Array<N, T> { + fn default() -> Self { + Zeroed::zeroed() + } +} + +impl<const N: usize, T> Index<usize> for Array<N, T> { + type Output = T; + + fn index(&self, index: usize) -> &Self::Output { + &self.0[index] + } +} + +impl<const N: usize, T> IndexMut<usize> for Array<N, T> { + fn index_mut(&mut self, index: usize) -> &mut Self::Output { + &mut self.0[index] + } +} + +impl<const N: usize, T> Deref for Array<N, T> { + type Target = [T; N]; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<const N: usize, T> DerefMut for Array<N, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl<const N: usize, T: Sized + fmt::Debug> fmt::Debug for Array<N, T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.0.fmt(f) + } +} + +/// Convenience macro to define an identically-named trivial GpuStruct with no inner fields for a +/// given raw type name. +#[macro_export] +macro_rules! trivial_gpustruct { + ($type:ident) => { + #[derive(Debug, Default)] + pub(crate) struct $type {} + + impl GpuStruct for $type { + type Raw<'a> = raw::$type; + } + }; +} diff --git a/drivers/gpu/drm/asahi/fw/vertex.rs b/drivers/gpu/drm/asahi/fw/vertex.rs new file mode 100644 index 000000000000..959a0913e693 --- /dev/null +++ b/drivers/gpu/drm/asahi/fw/vertex.rs @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU vertex job firmware structures + +use super::types::*; +use super::{event, job, workqueue}; +use crate::{buffer, fw, microseq, mmu}; +use kernel::sync::Arc; + +pub(crate) mod raw { + use super::*; + + #[derive(Debug, Default, Copy, Clone)] + #[repr(C)] + pub(crate) struct TilingParameters { + pub(crate) rgn_size: u32, + pub(crate) unk_4: u32, + pub(crate) ppp_ctrl: u32, + pub(crate) x_max: u16, + pub(crate) y_max: u16, + pub(crate) te_screen: u32, + pub(crate) te_mtile1: u32, + pub(crate) te_mtile2: u32, + pub(crate) tiles_per_mtile: u32, + pub(crate) tpc_stride: u32, + pub(crate) unk_24: u32, + pub(crate) unk_28: u32, + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct JobParameters1<'a> { + pub(crate) unk_0: U64, + pub(crate) unk_8: F32, + pub(crate) unk_c: F32, + pub(crate) tvb_tilemap: GpuPointer<'a, &'a [u8]>, + #[ver(G < G14)] + pub(crate) tvb_cluster_tilemaps: Option<GpuPointer<'a, &'a [u8]>>, + pub(crate) tpc: GpuPointer<'a, &'a [u8]>, + pub(crate) tvb_heapmeta: GpuPointer<'a, &'a [u8]>, + pub(crate) iogpu_unk_54: u32, + pub(crate) iogpu_unk_55: u32, + pub(crate) iogpu_unk_56: U64, + #[ver(G < G14)] + pub(crate) tvb_cluster_meta1: Option<GpuPointer<'a, &'a [u8]>>, + pub(crate) utile_config: u32, + pub(crate) unk_4c: u32, + pub(crate) ppp_multisamplectl: U64, + pub(crate) tvb_heapmeta_2: GpuPointer<'a, &'a [u8]>, + #[ver(G < G14)] + pub(crate) unk_60: U64, + #[ver(G < G14)] + pub(crate) core_mask: Array<2, u32>, + pub(crate) preempt_buf1: GpuPointer<'a, &'a [u8]>, + pub(crate) preempt_buf2: GpuPointer<'a, &'a [u8]>, + pub(crate) unk_80: U64, + pub(crate) preempt_buf3: GpuPointer<'a, &'a [u8]>, + pub(crate) encoder_addr: U64, + #[ver(G < G14)] + pub(crate) tvb_cluster_meta2: Option<GpuPointer<'a, &'a [u8]>>, + #[ver(G < G14)] + pub(crate) tvb_cluster_meta3: Option<GpuPointer<'a, &'a [u8]>>, + #[ver(G < G14)] + pub(crate) tiling_control: u32, + #[ver(G < G14)] + pub(crate) unk_ac: u32, + pub(crate) unk_b0: Array<6, U64>, + pub(crate) pipeline_base: U64, + #[ver(G < G14)] + pub(crate) tvb_cluster_meta4: Option<GpuPointer<'a, &'a [u8]>>, + #[ver(G < G14)] + pub(crate) unk_f0: U64, + pub(crate) unk_f8: U64, + pub(crate) unk_100: Array<3, U64>, + pub(crate) unk_118: u32, + #[ver(G >= G14)] + pub(crate) __pad: Pad<{ 8 * 9 }>, + } + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct JobParameters2<'a> { + pub(crate) unk_480: Array<4, u32>, + pub(crate) unk_498: U64, + pub(crate) unk_4a0: u32, + pub(crate) preempt_buf1: GpuPointer<'a, &'a [u8]>, + pub(crate) unk_4ac: u32, + pub(crate) unk_4b0: U64, + pub(crate) unk_4b8: u32, + pub(crate) unk_4bc: U64, + pub(crate) unk_4c4_padding: Array<0x48, u8>, + pub(crate) unk_50c: u32, + pub(crate) unk_510: U64, + pub(crate) unk_518: U64, + pub(crate) unk_520: U64, + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct RunVertex<'a> { + pub(crate) tag: workqueue::CommandType, + + #[ver(V >= V13_0B4)] + pub(crate) counter: U64, + + pub(crate) vm_slot: u32, + pub(crate) unk_8: u32, + pub(crate) notifier: GpuPointer<'a, event::Notifier::ver>, + pub(crate) buffer_slot: u32, + pub(crate) unk_1c: u32, + pub(crate) buffer: GpuPointer<'a, fw::buffer::Info::ver>, + pub(crate) scene: GpuPointer<'a, fw::buffer::Scene::ver>, + pub(crate) unk_buffer_buf: GpuWeakPointer<[u8]>, + pub(crate) unk_34: u32, + pub(crate) job_params1: JobParameters1::ver<'a>, + pub(crate) unk_154: Array<0x268, u8>, + pub(crate) tiling_params: TilingParameters, + pub(crate) unk_3e8: Array<0x74, u8>, + pub(crate) tpc: GpuPointer<'a, &'a [u8]>, + pub(crate) tpc_size: U64, + pub(crate) microsequence: GpuPointer<'a, &'a [u8]>, + pub(crate) microsequence_size: u32, + pub(crate) fragment_stamp_slot: u32, + pub(crate) fragment_stamp_value: EventValue, + pub(crate) unk_pointee: u32, + pub(crate) unk_pad: u32, + pub(crate) job_params2: JobParameters2<'a>, + pub(crate) encoder_params: job::raw::EncoderParams<'a>, + pub(crate) unk_55c: u32, + pub(crate) unk_560: u32, + pub(crate) memoryless_rts_used: u32, + pub(crate) unk_568: u32, + pub(crate) unk_56c: u32, + pub(crate) meta: job::raw::JobMeta, + pub(crate) unk_after_meta: u32, + pub(crate) unk_buf_0: U64, + pub(crate) unk_buf_8: U64, + pub(crate) unk_buf_10: U64, + pub(crate) cur_ts: U64, + pub(crate) start_ts: Option<GpuPointer<'a, AtomicU64>>, + pub(crate) end_ts: Option<GpuPointer<'a, AtomicU64>>, + pub(crate) unk_5c4: u32, + pub(crate) unk_5c8: u32, + pub(crate) unk_5cc: u32, + pub(crate) unk_5d0: u32, + pub(crate) client_sequence: u8, + pub(crate) pad_5d5: Array<3, u8>, + pub(crate) unk_5d8: u32, + pub(crate) unk_5dc: u8, + + #[ver(V >= V13_0B4)] + pub(crate) unk_ts: U64, + + #[ver(V >= V13_0B4)] + pub(crate) unk_5dd_8: Array<0x1b, u8>, + } +} + +#[versions(AGX)] +#[derive(Debug)] +pub(crate) struct RunVertex { + pub(crate) notifier: Arc<GpuObject<event::Notifier::ver>>, + pub(crate) scene: Arc<buffer::Scene::ver>, + pub(crate) micro_seq: microseq::MicroSequence, + pub(crate) vm_bind: mmu::VmBind, + pub(crate) timestamps: Arc<GpuObject<job::RenderTimestamps>>, +} + +#[versions(AGX)] +impl GpuStruct for RunVertex::ver { + type Raw<'a> = raw::RunVertex::ver<'a>; +} + +#[versions(AGX)] +impl workqueue::Command for RunVertex::ver {} diff --git a/drivers/gpu/drm/asahi/fw/workqueue.rs b/drivers/gpu/drm/asahi/fw/workqueue.rs new file mode 100644 index 000000000000..e81025b6c014 --- /dev/null +++ b/drivers/gpu/drm/asahi/fw/workqueue.rs @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU work queue firmware structes + +use super::event; +use super::types::*; +use crate::event::EventValue; +use crate::{default_zeroed, trivial_gpustruct}; +use kernel::sync::Arc; + +#[derive(Debug)] +#[repr(u32)] +pub(crate) enum CommandType { + RunVertex = 0, + RunFragment = 1, + #[allow(dead_code)] + RunBlitter = 2, + RunCompute = 3, + Barrier = 4, + InitBuffer = 6, +} + +pub(crate) trait Command: GpuStruct + Send + Sync {} + +pub(crate) mod raw { + use super::*; + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct Barrier { + pub(crate) tag: CommandType, + pub(crate) wait_stamp: GpuWeakPointer<FwStamp>, + pub(crate) wait_value: EventValue, + pub(crate) wait_slot: u32, + pub(crate) stamp_self: EventValue, + pub(crate) uuid: u32, + pub(crate) unk: u32, + } + + #[derive(Debug, Clone, Copy)] + #[repr(C)] + pub(crate) struct GpuContextData { + pub(crate) unk_0: u8, + pub(crate) unk_1: u8, + unk_2: Array<0x2, u8>, + pub(crate) unk_4: u8, + pub(crate) unk_5: u8, + unk_6: Array<0x18, u8>, + pub(crate) unk_1e: u8, + pub(crate) unk_1f: u8, + unk_20: Array<0x3, u8>, + pub(crate) unk_23: u8, + unk_24: Array<0x1c, u8>, + } + + impl Default for GpuContextData { + fn default() -> Self { + Self { + unk_0: 0xff, + unk_1: 0xff, + unk_2: Default::default(), + unk_4: 0, + unk_5: 1, + unk_6: Default::default(), + unk_1e: 0xff, + unk_1f: 0, + unk_20: Default::default(), + unk_23: 2, + unk_24: Default::default(), + } + } + } + + #[derive(Debug)] + #[repr(C)] + pub(crate) struct RingState { + pub(crate) gpu_doneptr: AtomicU32, + __pad0: Pad<0xc>, + pub(crate) unk_10: AtomicU32, + __pad1: Pad<0xc>, + pub(crate) unk_20: AtomicU32, + __pad2: Pad<0xc>, + pub(crate) gpu_rptr: AtomicU32, + __pad3: Pad<0xc>, + pub(crate) cpu_wptr: AtomicU32, + __pad4: Pad<0xc>, + pub(crate) rb_size: u32, + __pad5: Pad<0xc>, + // This isn't part of the structure, but it's here as a + // debugging hack so we can inspect what ring position + // the driver considered complete and freeable. + pub(crate) cpu_freeptr: AtomicU32, + __pad6: Pad<0xc>, + } + default_zeroed!(RingState); + + #[derive(Debug, Clone, Copy)] + #[repr(C)] + pub(crate) struct Priority(u32, u32, U64, u32, u32, u32); + + pub(crate) const PRIORITY: [Priority; 4] = [ + Priority(0, 0, U64(0xffff_ffff_ffff_0000), 1, 0, 1), + Priority(1, 1, U64(0xffff_ffff_0000_0000), 0, 0, 0), + Priority(2, 2, U64(0xffff_0000_0000_0000), 0, 0, 2), + Priority(3, 3, U64(0x0000_0000_0000_0000), 0, 0, 3), + ]; + + impl Default for Priority { + fn default() -> Priority { + PRIORITY[2] + } + } + + #[versions(AGX)] + #[derive(Debug)] + #[repr(C)] + pub(crate) struct QueueInfo<'a> { + pub(crate) state: GpuPointer<'a, super::RingState>, + pub(crate) ring: GpuPointer<'a, &'a [u64]>, + pub(crate) notifier_list: GpuPointer<'a, event::NotifierList>, + pub(crate) gpu_buf: GpuPointer<'a, &'a [u8]>, + pub(crate) gpu_rptr1: AtomicU32, + pub(crate) gpu_rptr2: AtomicU32, + pub(crate) gpu_rptr3: AtomicU32, + pub(crate) event_id: AtomicI32, + pub(crate) priority: Priority, + pub(crate) unk_4c: i32, + pub(crate) uuid: u32, + pub(crate) unk_54: i32, + pub(crate) unk_58: U64, + pub(crate) busy: AtomicU32, + pub(crate) __pad: Pad<0x20>, + pub(crate) unk_84_state: AtomicU32, + pub(crate) unk_88: u32, + pub(crate) unk_8c: u32, + pub(crate) unk_90: u32, + pub(crate) unk_94: u32, + pub(crate) pending: AtomicU32, + pub(crate) unk_9c: u32, + #[ver(V >= V13_2)] + pub(crate) unk_a0_0: u32, + pub(crate) gpu_context: GpuPointer<'a, super::GpuContextData>, + pub(crate) unk_a8: U64, + #[ver(V >= V13_2)] + pub(crate) unk_b0: u32, + } +} + +trivial_gpustruct!(Barrier); +trivial_gpustruct!(GpuContextData); +trivial_gpustruct!(RingState); + +impl Command for Barrier {} + +#[versions(AGX)] +#[derive(Debug)] +pub(crate) struct QueueInfo { + pub(crate) state: GpuObject<RingState>, + pub(crate) ring: GpuArray<u64>, + pub(crate) gpu_buf: GpuArray<u8>, + pub(crate) notifier_list: Arc<GpuObject<event::NotifierList>>, + pub(crate) gpu_context: Arc<crate::workqueue::GpuContext>, +} + +#[versions(AGX)] +impl GpuStruct for QueueInfo::ver { + type Raw<'a> = raw::QueueInfo::ver<'a>; +} diff --git a/drivers/gpu/drm/asahi/gem.rs b/drivers/gpu/drm/asahi/gem.rs new file mode 100644 index 000000000000..b334bebb0e8e --- /dev/null +++ b/drivers/gpu/drm/asahi/gem.rs @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Asahi driver GEM object implementation +//! +//! Basic wrappers and adaptations between generic GEM shmem objects and this driver's +//! view of what a GPU buffer object is. It is in charge of keeping track of all mappings for +//! each GEM object so we can remove them when a client (File) or a Vm are destroyed, as well as +//! implementing RTKit buffers on top of GEM objects for firmware use. + +use kernel::{ + bindings, + drm::{gem, gem::shmem}, + error::Result, + prelude::*, + soc::apple::rtkit, + sync::smutex::Mutex, +}; + +use kernel::drm::gem::BaseObject; + +use core::sync::atomic::{AtomicU64, Ordering}; + +use crate::debug::*; +use crate::driver::AsahiDevice; +use crate::file::DrmFile; + +const DEBUG_CLASS: DebugFlags = DebugFlags::Gem; + +/// Represents the inner data of a GEM object for this driver. +pub(crate) struct DriverObject { + /// Whether this is a kernel-created object. + kernel: bool, + /// Object creation flags. + flags: u32, + /// VM ID for VM-private objects. + vm_id: Option<u64>, + /// Locked list of mapping tuples: (file_id, vm_id, mapping) + mappings: Mutex<Vec<(u64, u64, crate::mmu::Mapping)>>, + /// ID for debug + id: u64, +} + +/// Type alias for the shmem GEM object type for this driver. +pub(crate) type Object = shmem::Object<DriverObject>; + +/// Type alias for the SGTable type for this driver. +pub(crate) type SGTable = shmem::SGTable<DriverObject>; + +/// A shared reference to a GEM object for this driver. +pub(crate) struct ObjectRef { + /// The underlying GEM object reference + pub(crate) gem: gem::ObjectRef<shmem::Object<DriverObject>>, + /// The kernel-side VMap of this object, if needed + vmap: Option<shmem::VMap<DriverObject>>, +} + +static GEM_ID: AtomicU64 = AtomicU64::new(0); + +impl DriverObject { + /// Drop all object mappings for a given file ID. + /// + /// Used on file close. + fn drop_file_mappings(&self, file_id: u64) { + let mut mappings = self.mappings.lock(); + for (index, (mapped_fid, _mapped_vmid, _mapping)) in mappings.iter().enumerate() { + if *mapped_fid == file_id { + mappings.swap_remove(index); + return; + } + } + } + + /// Drop all object mappings for a given VM ID. + /// + /// Used on VM destroy. + fn drop_vm_mappings(&self, vm_id: u64) { + let mut mappings = self.mappings.lock(); + for (index, (_mapped_fid, mapped_vmid, _mapping)) in mappings.iter().enumerate() { + if *mapped_vmid == vm_id { + mappings.swap_remove(index); + return; + } + } + } +} + +impl ObjectRef { + /// Create a new wrapper for a raw GEM object reference. + pub(crate) fn new(gem: gem::ObjectRef<shmem::Object<DriverObject>>) -> ObjectRef { + ObjectRef { gem, vmap: None } + } + + /// Return the `VMap` for this object, creating it if necessary. + pub(crate) fn vmap(&mut self) -> Result<&mut shmem::VMap<DriverObject>> { + if self.vmap.is_none() { + self.vmap = Some(self.gem.vmap()?); + } + Ok(self.vmap.as_mut().unwrap()) + } + + /// Return the IOVA of this object at which it is mapped in a given `Vm` identified by its ID, + /// if it is mapped in that `Vm`. + pub(crate) fn iova(&self, vm_id: u64) -> Option<usize> { + let mappings = self.gem.mappings.lock(); + for (_mapped_fid, mapped_vmid, mapping) in mappings.iter() { + if *mapped_vmid == vm_id { + return Some(mapping.iova()); + } + } + + None + } + + /// Returns the size of an object in bytes + pub(crate) fn size(&self) -> usize { + self.gem.size() + } + + /// Maps an object into a given `Vm` at any free address. + /// + /// Returns Err(EBUSY) if there is already a mapping. + pub(crate) fn map_into(&mut self, vm: &crate::mmu::Vm) -> Result<usize> { + let vm_id = vm.id(); + + if self.gem.vm_id.is_some() && self.gem.vm_id != Some(vm_id) { + return Err(EINVAL); + } + + let mut mappings = self.gem.mappings.lock(); + for (_mapped_fid, mapped_vmid, _mapping) in mappings.iter() { + if *mapped_vmid == vm_id { + return Err(EBUSY); + } + } + + let sgt = self.gem.sg_table()?; + let new_mapping = vm.map(self.gem.size(), sgt)?; + + let iova = new_mapping.iova(); + mappings.try_push((vm.file_id(), vm_id, new_mapping))?; + Ok(iova) + } + + /// Maps an object into a given `Vm` at any free address within a given range. + /// + /// Returns Err(EBUSY) if there is already a mapping. + pub(crate) fn map_into_range( + &mut self, + vm: &crate::mmu::Vm, + start: u64, + end: u64, + alignment: u64, + prot: u32, + guard: bool, + ) -> Result<usize> { + let vm_id = vm.id(); + + if self.gem.vm_id.is_some() && self.gem.vm_id != Some(vm_id) { + return Err(EINVAL); + } + + let mut mappings = self.gem.mappings.lock(); + for (_mapped_fid, mapped_vmid, _mapping) in mappings.iter() { + if *mapped_vmid == vm_id { + return Err(EBUSY); + } + } + + let sgt = self.gem.sg_table()?; + let new_mapping = + vm.map_in_range(self.gem.size(), sgt, alignment, start, end, prot, guard)?; + + let iova = new_mapping.iova(); + mappings.try_push((vm.file_id(), vm_id, new_mapping))?; + Ok(iova) + } + + /// Maps an object into a given `Vm` at a specific address. + /// + /// Returns Err(EBUSY) if there is already a mapping. + /// Returns Err(ENOSPC) if the requested address is already busy. + pub(crate) fn map_at( + &mut self, + vm: &crate::mmu::Vm, + addr: u64, + prot: u32, + guard: bool, + ) -> Result { + let vm_id = vm.id(); + + if self.gem.vm_id.is_some() && self.gem.vm_id != Some(vm_id) { + return Err(EINVAL); + } + + let mut mappings = self.gem.mappings.lock(); + for (_mapped_fid, mapped_vmid, _mapping) in mappings.iter() { + if *mapped_vmid == vm_id { + return Err(EBUSY); + } + } + + let sgt = self.gem.sg_table()?; + let new_mapping = vm.map_at(addr, self.gem.size(), sgt, prot, guard)?; + + let iova = new_mapping.iova(); + assert!(iova == addr as usize); + mappings.try_push((vm.file_id(), vm_id, new_mapping))?; + Ok(()) + } + + /// Drop all mappings for this object owned by a given `Vm` identified by its ID. + pub(crate) fn drop_vm_mappings(&mut self, vm_id: u64) { + self.gem.drop_vm_mappings(vm_id); + } + + /// Drop all mappings for this object owned by a given `File` identified by its ID. + pub(crate) fn drop_file_mappings(&mut self, file_id: u64) { + self.gem.drop_file_mappings(file_id); + } +} + +/// Create a new kernel-owned GEM object. +pub(crate) fn new_kernel_object(dev: &AsahiDevice, size: usize) -> Result<ObjectRef> { + let mut gem = shmem::Object::<DriverObject>::new(dev, size)?; + gem.kernel = true; + gem.flags = 0; + + gem.set_exportable(false); + + mod_pr_debug!("DriverObject new kernel object id={}\n", gem.id); + Ok(ObjectRef::new(gem.into_ref())) +} + +/// Create a new user-owned GEM object with the given flags. +pub(crate) fn new_object( + dev: &AsahiDevice, + size: usize, + flags: u32, + vm_id: Option<u64>, +) -> Result<ObjectRef> { + let mut gem = shmem::Object::<DriverObject>::new(dev, size)?; + gem.kernel = false; + gem.flags = flags; + gem.vm_id = vm_id; + + gem.set_exportable(vm_id.is_none()); + gem.set_wc(flags & bindings::ASAHI_GEM_WRITEBACK == 0); + + mod_pr_debug!( + "DriverObject new user object: vm_id={:?} id={}\n", + vm_id, + gem.id + ); + Ok(ObjectRef::new(gem.into_ref())) +} + +/// Look up a GEM object handle for a `File` and return an `ObjectRef` for it. +pub(crate) fn lookup_handle(file: &DrmFile, handle: u32) -> Result<ObjectRef> { + Ok(ObjectRef::new(shmem::Object::lookup_handle(file, handle)?)) +} + +impl gem::BaseDriverObject<Object> for DriverObject { + /// Callback to create the inner data of a GEM object + fn new(_dev: &AsahiDevice, _size: usize) -> Result<DriverObject> { + let id = GEM_ID.fetch_add(1, Ordering::Relaxed); + mod_pr_debug!("DriverObject::new id={}\n", id); + Ok(DriverObject { + kernel: false, + flags: 0, + vm_id: None, + mappings: Mutex::new(Vec::new()), + id, + }) + } + + /// Callback to drop all mappings for a GEM object owned by a given `File` + fn close(obj: &Object, file: &DrmFile) { + mod_pr_debug!("DriverObject::close vm_id={:?} id={}\n", obj.vm_id, obj.id); + obj.drop_file_mappings(file.file_id()); + } +} + +impl Drop for DriverObject { + fn drop(&mut self) { + mod_pr_debug!("DriverObject::drop vm_id={:?} id={}\n", self.vm_id, self.id); + } +} + +impl shmem::DriverObject for DriverObject { + type Driver = crate::driver::AsahiDriver; +} + +impl rtkit::Buffer for ObjectRef { + fn iova(&self) -> Result<usize> { + self.iova(0).ok_or(EIO) + } + fn buf(&mut self) -> Result<&mut [u8]> { + let vmap = self.vmap.as_mut().ok_or(ENOMEM)?; + Ok(vmap.as_mut_slice()) + } +} diff --git a/drivers/gpu/drm/asahi/gpu.rs b/drivers/gpu/drm/asahi/gpu.rs new file mode 100644 index 000000000000..1384b52d11a2 --- /dev/null +++ b/drivers/gpu/drm/asahi/gpu.rs @@ -0,0 +1,1088 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Top-level GPU manager +//! +//! This module is the root of all GPU firmware management for a given driver instance. It is +//! responsible for initialization, owning the top-level managers (events, UAT, etc.), and +//! communicating with the raw RtKit endpoints to send and receive messages to/from the GPU +//! firmware. +//! +//! It is also the point where diverging driver firmware/GPU variants (using the versions macro) +//! are unified, so that the top level of the driver itself (in `driver`) does not have to concern +//! itself with version dependence. + +use core::any::Any; +use core::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use core::time::Duration; + +use kernel::{ + delay::coarse_sleep, + error::code::*, + macros::versions, + prelude::*, + soc::apple::rtkit, + sync::{smutex::Mutex, Arc, Guard, UniqueArc}, + time, + types::ForeignOwnable, +}; + +use crate::alloc::Allocator; +use crate::box_in_place; +use crate::debug::*; +use crate::driver::AsahiDevice; +use crate::fw::channels::PipeType; +use crate::fw::types::U64; +use crate::{ + alloc, buffer, channel, event, fw, gem, hw, initdata, mem, mmu, queue, regs, workqueue, +}; + +const DEBUG_CLASS: DebugFlags = DebugFlags::Gpu; + +/// Firmware endpoint for init & incoming notifications. +const EP_FIRMWARE: u8 = 0x20; + +/// Doorbell endpoint for work/message submissions. +const EP_DOORBELL: u8 = 0x21; + +/// Initialize the GPU firmware. +const MSG_INIT: u64 = 0x81 << 48; +const INIT_DATA_MASK: u64 = (1 << 44) - 1; + +/// TX channel doorbell. +const MSG_TX_DOORBELL: u64 = 0x83 << 48; +/// Firmware control channel doorbell. +const MSG_FWCTL: u64 = 0x84 << 48; +// /// Halt the firmware (?). +// const MSG_HALT: u64 = 0x85 << 48; + +/// Receive channel doorbell notification. +const MSG_RX_DOORBELL: u64 = 0x42 << 48; + +/// Doorbell number for firmware kicks/wakeups. +const DOORBELL_KICKFW: u64 = 0x10; +/// Doorbell number for device control channel kicks. +const DOORBELL_DEVCTRL: u64 = 0x11; + +// Upper kernel half VA address ranges. +/// Private (cached) firmware structure VA range base. +const IOVA_KERN_PRIV_BASE: u64 = 0xffffffa000000000; +/// Private (cached) firmware structure VA range top. +const IOVA_KERN_PRIV_TOP: u64 = 0xffffffa7ffffffff; +/// Shared (uncached) firmware structure VA range base. +const IOVA_KERN_SHARED_BASE: u64 = 0xffffffa800000000; +/// Shared (uncached) firmware structure VA range top. +const IOVA_KERN_SHARED_TOP: u64 = 0xffffffa9ffffffff; +/// Shared (uncached) read-only firmware structure VA range base. +const IOVA_KERN_SHARED_RO_BASE: u64 = 0xffffffaa00000000; +/// Shared (uncached) read-only firmware structure VA range top. +const IOVA_KERN_SHARED_RO_TOP: u64 = 0xffffffabffffffff; +/// GPU/FW shared structure VA range base. +const IOVA_KERN_GPU_BASE: u64 = 0xffffffaf00000000; +/// GPU/FW shared structure VA range top. +const IOVA_KERN_GPU_TOP: u64 = 0xffffffafffffffff; + +/// Timeout for entering the halt state after a fault or request. +const HALT_ENTER_TIMEOUT_MS: u64 = 100; + +/// Global allocators used for kernel-half structures. +pub(crate) struct KernelAllocators { + pub(crate) private: alloc::DefaultAllocator, + pub(crate) shared: alloc::DefaultAllocator, + pub(crate) shared_ro: alloc::DefaultAllocator, + pub(crate) gpu: alloc::DefaultAllocator, +} + +/// Receive (GPU->driver) ring buffer channels. +#[versions(AGX)] +struct RxChannels { + event: channel::EventChannel, + fw_log: channel::FwLogChannel, + ktrace: channel::KTraceChannel, + stats: channel::StatsChannel::ver, +} + +/// GPU work submission pipe channels (driver->GPU). +#[versions(AGX)] +struct PipeChannels { + pub(crate) vtx: Vec<Mutex<channel::PipeChannel::ver>>, + pub(crate) frag: Vec<Mutex<channel::PipeChannel::ver>>, + pub(crate) comp: Vec<Mutex<channel::PipeChannel::ver>>, +} + +/// Misc command transmit (driver->GPU) channels. +#[versions(AGX)] +struct TxChannels { + pub(crate) device_control: channel::DeviceControlChannel::ver, +} + +/// Number of work submission pipes per type, one for each priority level. +const NUM_PIPES: usize = 4; + +/// A generic monotonically incrementing ID used to uniquely identify object instances within the +/// driver. +pub(crate) struct ID(AtomicU64); + +impl ID { + /// Create a new ID counter with a given value. + fn new(val: u64) -> ID { + ID(AtomicU64::new(val)) + } + + /// Fetch the next unique ID. + pub(crate) fn next(&self) -> u64 { + self.0.fetch_add(1, Ordering::Relaxed) + } +} + +impl Default for ID { + /// IDs default to starting at 2, as 0/1 are considered reserved for the system. + fn default() -> Self { + Self::new(2) + } +} + +/// A guard representing one active submission on the GPU. When dropped, decrements the active +/// submission count. +pub(crate) struct OpGuard(Arc<dyn GpuManagerPriv>); + +impl Drop for OpGuard { + fn drop(&mut self) { + self.0.end_op(); + } +} + +/// Set of global sequence IDs used in the driver. +#[derive(Default)] +pub(crate) struct SequenceIDs { + /// `File` instance ID. + pub(crate) file: ID, + /// `Vm` instance ID. + pub(crate) vm: ID, + /// Submission instance ID. + pub(crate) submission: ID, + /// `Queue` instance ID. + pub(crate) queue: ID, +} + +/// Top-level GPU manager that owns all the global state relevant to the driver instance. +#[versions(AGX)] +pub(crate) struct GpuManager { + dev: AsahiDevice, + cfg: &'static hw::HwConfig, + dyncfg: Box<hw::DynConfig>, + pub(crate) initdata: Box<fw::types::GpuObject<fw::initdata::InitData::ver>>, + uat: Box<mmu::Uat>, + crashed: AtomicBool, + alloc: Mutex<KernelAllocators>, + io_mappings: Vec<mmu::Mapping>, + rtkit: Mutex<Option<Box<rtkit::RtKit<GpuManager::ver>>>>, + rx_channels: Mutex<Box<RxChannels::ver>>, + tx_channels: Mutex<Box<TxChannels::ver>>, + fwctl_channel: Mutex<Box<channel::FwCtlChannel>>, + pipes: PipeChannels::ver, + event_manager: Arc<event::EventManager>, + buffer_mgr: buffer::BufferManager, + ids: SequenceIDs, +} + +/// Trait used to abstract the firmware/GPU-dependent variants of the GpuManager. +pub(crate) trait GpuManager: Send + Sync { + /// Cast as an Any type. + fn as_any(&self) -> &dyn Any; + /// Cast Arc<Self> as an Any type. + fn arc_as_any(self: Arc<Self>) -> Arc<dyn Any + Sync + Send>; + /// Initialize the GPU. + fn init(&self) -> Result; + /// Update the GPU globals from global info + /// + /// TODO: Unclear what can and cannot be updated like this. + fn update_globals(&self); + /// Get a reference to the KernelAllocators. + fn alloc(&self) -> Guard<'_, Mutex<KernelAllocators>>; + /// Create a new `Vm` given a unique `File` ID. + fn new_vm(&self, file_id: u64) -> Result<mmu::Vm>; + /// Bind a `Vm` to an available slot and return the `VmBind`. + fn bind_vm(&self, vm: &mmu::Vm) -> Result<mmu::VmBind>; + /// Create a new user command queue. + fn new_queue( + &self, + vm: mmu::Vm, + ualloc: Arc<Mutex<alloc::DefaultAllocator>>, + ualloc_priv: Arc<Mutex<alloc::DefaultAllocator>>, + priority: u32, + caps: u32, + ) -> Result<Box<dyn queue::Queue>>; + /// Return a reference to the global `SequenceIDs` instance. + fn ids(&self) -> &SequenceIDs; + /// Kick the firmware (wake it up if asleep). + /// + /// This should be useful to reduce latency on work submission, so we can ask the firmware to + /// wake up while we do some preparatory work for the work submission. + fn kick_firmware(&self) -> Result; + /// Invalidate a GPU scheduler context. Must be called before the relevant structures are freed. + fn invalidate_context( + &self, + context: &fw::types::GpuObject<fw::workqueue::GpuContextData>, + ) -> Result; + /// Flush the entire firmware cache. + /// + /// TODO: Does this actually work? + fn flush_fw_cache(&self) -> Result; + /// Handle a GPU work timeout event. + fn handle_timeout(&self, counter: u32, event_slot: u32); + /// Handle a GPU fault event. + fn handle_fault(&self); + /// Wait for the GPU to become idle and power off. + fn wait_for_poweroff(&self, timeout: usize) -> Result; + /// Send a firmware control command (secure cache flush). + fn fwctl(&self, msg: fw::channels::FwCtlMsg) -> Result; + /// Get the static GPU configuration for this SoC. + fn get_cfg(&self) -> &'static hw::HwConfig; + /// Get the dynamic GPU configuration for this SoC. + fn get_dyncfg(&self) -> &hw::DynConfig; +} + +/// Private generic trait for functions that don't need to escape this module. +trait GpuManagerPriv { + /// Decrement the pending submission counter. + fn end_op(&self); +} + +#[versions(AGX)] +#[vtable] +impl rtkit::Operations for GpuManager::ver { + type Data = Arc<GpuManager::ver>; + type Buffer = gem::ObjectRef; + + fn recv_message(data: <Self::Data as ForeignOwnable>::Borrowed<'_>, ep: u8, msg: u64) { + let dev = &data.dev; + //dev_info!(dev, "RtKit message: {:#x}:{:#x}\n", ep, msg); + + if ep != EP_FIRMWARE || msg != MSG_RX_DOORBELL { + dev_err!(dev, "Unknown message: {:#x}:{:#x}\n", ep, msg); + return; + } + + let mut ch = data.rx_channels.lock(); + + ch.fw_log.poll(); + ch.ktrace.poll(); + ch.stats.poll(); + ch.event.poll(); + } + + fn crashed(data: <Self::Data as ForeignOwnable>::Borrowed<'_>) { + let dev = &data.dev; + dev_err!(dev, "GPU firmware crashed, failing all jobs\n"); + + data.crashed.store(true, Ordering::Relaxed); + data.event_manager.fail_all(workqueue::WorkError::NoDevice); + } + + fn shmem_alloc( + data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + size: usize, + ) -> Result<Self::Buffer> { + let dev = &data.dev; + mod_dev_dbg!(dev, "shmem_alloc() {:#x} bytes\n", size); + + let mut obj = gem::new_kernel_object(dev, size)?; + obj.vmap()?; + let iova = obj.map_into(data.uat.kernel_vm())?; + mod_dev_dbg!(dev, "shmem_alloc() -> VA {:#x}\n", iova); + Ok(obj) + } +} + +#[versions(AGX)] +impl GpuManager::ver { + /// Create a new GpuManager of this version/GPU combination. + #[inline(never)] + pub(crate) fn new( + dev: &AsahiDevice, + res: ®s::Resources, + cfg: &'static hw::HwConfig, + ) -> Result<Arc<GpuManager::ver>> { + let uat = Self::make_uat(dev, cfg)?; + let dyncfg = Self::make_dyncfg(dev, res, cfg, &uat)?; + + let mut alloc = KernelAllocators { + private: alloc::DefaultAllocator::new( + dev, + uat.kernel_vm(), + IOVA_KERN_PRIV_BASE, + IOVA_KERN_PRIV_TOP, + 0x80, + mmu::PROT_FW_PRIV_RW, + 1024 * 1024, + true, + fmt!("Kernel Private"), + true, + )?, + shared: alloc::DefaultAllocator::new( + dev, + uat.kernel_vm(), + IOVA_KERN_SHARED_BASE, + IOVA_KERN_SHARED_TOP, + 0x80, + mmu::PROT_FW_SHARED_RW, + 1024 * 1024, + true, + fmt!("Kernel Shared"), + false, + )?, + shared_ro: alloc::DefaultAllocator::new( + dev, + uat.kernel_vm(), + IOVA_KERN_SHARED_RO_BASE, + IOVA_KERN_SHARED_RO_TOP, + 0x80, + mmu::PROT_FW_SHARED_RO, + 64 * 1024, + true, + fmt!("Kernel RO Shared"), + false, + )?, + gpu: alloc::DefaultAllocator::new( + dev, + uat.kernel_vm(), + IOVA_KERN_GPU_BASE, + IOVA_KERN_GPU_TOP, + 0x80, + mmu::PROT_GPU_FW_SHARED_RW, + 64 * 1024, + true, + fmt!("Kernel GPU Shared"), + false, + )?, + }; + + let event_manager = Self::make_event_manager(&mut alloc)?; + let initdata = Self::make_initdata(cfg, &dyncfg, &mut alloc)?; + let mut mgr = Self::make_mgr(dev, cfg, dyncfg, uat, alloc, event_manager, initdata)?; + + { + let fwctl = mgr.fwctl_channel.lock(); + let p_fwctl = fwctl.to_raw(); + core::mem::drop(fwctl); + + mgr.initdata.fw_status.with_mut(|raw, _inner| { + raw.fwctl_channel = p_fwctl; + }); + } + + { + let txc = mgr.tx_channels.lock(); + let p_device_control = txc.device_control.to_raw(); + core::mem::drop(txc); + + let rxc = mgr.rx_channels.lock(); + let p_event = rxc.event.to_raw(); + let p_fw_log = rxc.fw_log.to_raw(); + let p_ktrace = rxc.ktrace.to_raw(); + let p_stats = rxc.stats.to_raw(); + let p_fwlog_buf = rxc.fw_log.get_buf(); + core::mem::drop(rxc); + + mgr.initdata.runtime_pointers.with_mut(|raw, _inner| { + raw.device_control = p_device_control; + raw.event = p_event; + raw.fw_log = p_fw_log; + raw.ktrace = p_ktrace; + raw.stats = p_stats; + raw.fwlog_buf = Some(p_fwlog_buf); + }); + } + + let mut p_pipes: Vec<fw::initdata::raw::PipeChannels::ver> = Vec::new(); + + for ((v, f), c) in mgr + .pipes + .vtx + .iter() + .zip(&mgr.pipes.frag) + .zip(&mgr.pipes.comp) + { + p_pipes.try_push(fw::initdata::raw::PipeChannels::ver { + vtx: v.lock().to_raw(), + frag: f.lock().to_raw(), + comp: c.lock().to_raw(), + })?; + } + + mgr.initdata.runtime_pointers.with_mut(|raw, _inner| { + for (i, p) in p_pipes.into_iter().enumerate() { + raw.pipes[i].vtx = p.vtx; + raw.pipes[i].frag = p.frag; + raw.pipes[i].comp = p.comp; + } + }); + + for (i, map) in cfg.io_mappings.iter().enumerate() { + if let Some(map) = map.as_ref() { + mgr.iomap(i, map)?; + } + } + + let mgr = Arc::from(mgr); + + let rtkit = Box::try_new(rtkit::RtKit::<GpuManager::ver>::new( + dev, + None, + 0, + mgr.clone(), + )?)?; + + *mgr.rtkit.lock() = Some(rtkit); + + { + let mut rxc = mgr.rx_channels.lock(); + rxc.event.set_manager(mgr.clone()); + } + + Ok(mgr) + } + + /// Build the entire GPU InitData structure tree and return it as a boxed GpuObject. + fn make_initdata( + cfg: &'static hw::HwConfig, + dyncfg: &hw::DynConfig, + alloc: &mut KernelAllocators, + ) -> Result<Box<fw::types::GpuObject<fw::initdata::InitData::ver>>> { + let mut builder = initdata::InitDataBuilder::ver::new(alloc, cfg, dyncfg); + builder.build() + } + + /// Create a fresh boxed Uat instance. + /// + /// Force disable inlining to avoid blowing up the stack. + #[inline(never)] + fn make_uat(dev: &AsahiDevice, cfg: &'static hw::HwConfig) -> Result<Box<mmu::Uat>> { + Ok(Box::try_new(mmu::Uat::new(dev, cfg)?)?) + } + + /// Actually create the final GpuManager instance, as a UniqueArc. + /// + /// Force disable inlining to avoid blowing up the stack. + #[inline(never)] + fn make_mgr( + dev: &AsahiDevice, + cfg: &'static hw::HwConfig, + dyncfg: Box<hw::DynConfig>, + uat: Box<mmu::Uat>, + mut alloc: KernelAllocators, + event_manager: Arc<event::EventManager>, + initdata: Box<fw::types::GpuObject<fw::initdata::InitData::ver>>, + ) -> Result<UniqueArc<GpuManager::ver>> { + let mut pipes = PipeChannels::ver { + vtx: Vec::new(), + frag: Vec::new(), + comp: Vec::new(), + }; + + for _i in 0..=NUM_PIPES - 1 { + pipes + .vtx + .try_push(Mutex::new(channel::PipeChannel::ver::new(dev, &mut alloc)?))?; + pipes + .frag + .try_push(Mutex::new(channel::PipeChannel::ver::new(dev, &mut alloc)?))?; + pipes + .comp + .try_push(Mutex::new(channel::PipeChannel::ver::new(dev, &mut alloc)?))?; + } + + UniqueArc::try_new(GpuManager::ver { + dev: dev.clone(), + cfg, + dyncfg, + initdata, + uat, + io_mappings: Vec::new(), + rtkit: Mutex::new(None), + crashed: AtomicBool::new(false), + rx_channels: Mutex::new(box_in_place!(RxChannels::ver { + event: channel::EventChannel::new(dev, &mut alloc, event_manager.clone())?, + fw_log: channel::FwLogChannel::new(dev, &mut alloc)?, + ktrace: channel::KTraceChannel::new(dev, &mut alloc)?, + stats: channel::StatsChannel::ver::new(dev, &mut alloc)?, + })?), + tx_channels: Mutex::new(Box::try_new(TxChannels::ver { + device_control: channel::DeviceControlChannel::ver::new(dev, &mut alloc)?, + })?), + fwctl_channel: Mutex::new(Box::try_new(channel::FwCtlChannel::new(dev, &mut alloc)?)?), + pipes, + event_manager, + buffer_mgr: buffer::BufferManager::new()?, + alloc: Mutex::new(alloc), + ids: Default::default(), + }) + } + + /// Fetch and validate the GPU dynamic configuration from the device tree and hardware. + /// + /// Force disable inlining to avoid blowing up the stack. + #[inline(never)] + fn make_dyncfg( + dev: &AsahiDevice, + res: ®s::Resources, + cfg: &'static hw::HwConfig, + uat: &mmu::Uat, + ) -> Result<Box<hw::DynConfig>> { + let gpu_id = res.get_gpu_id()?; + + dev_info!(dev, "GPU Information:\n"); + dev_info!( + dev, + " Type: {:?}{:?}\n", + gpu_id.gpu_gen, + gpu_id.gpu_variant + ); + dev_info!(dev, " Max dies: {}\n", gpu_id.max_dies); + dev_info!(dev, " Clusters: {}\n", gpu_id.num_clusters); + dev_info!( + dev, + " Cores: {} ({})\n", + gpu_id.num_cores, + gpu_id.num_cores * gpu_id.num_clusters + ); + dev_info!( + dev, + " Frags: {} ({})\n", + gpu_id.num_frags, + gpu_id.num_frags * gpu_id.num_clusters + ); + dev_info!( + dev, + " GPs: {} ({})\n", + gpu_id.num_gps, + gpu_id.num_gps * gpu_id.num_clusters + ); + dev_info!(dev, " Core masks: {:#x?}\n", gpu_id.core_masks); + dev_info!(dev, " Active cores: {}\n", gpu_id.total_active_cores); + + dev_info!(dev, "Getting configuration from device tree...\n"); + let pwr_cfg = hw::PwrConfig::load(dev, cfg)?; + dev_info!(dev, "Dynamic configuration fetched\n"); + + if gpu_id.gpu_gen != cfg.gpu_gen || gpu_id.gpu_variant != cfg.gpu_variant { + dev_err!( + dev, + "GPU type mismatch (expected {:?}{:?}, found {:?}{:?})\n", + cfg.gpu_gen, + cfg.gpu_variant, + gpu_id.gpu_gen, + gpu_id.gpu_variant + ); + return Err(EIO); + } + if gpu_id.num_clusters > cfg.max_num_clusters { + dev_err!( + dev, + "Too many clusters ({} > {})\n", + gpu_id.num_clusters, + cfg.max_num_clusters + ); + return Err(EIO); + } + if gpu_id.num_cores > cfg.max_num_cores { + dev_err!( + dev, + "Too many cores ({} > {})\n", + gpu_id.num_cores, + cfg.max_num_cores + ); + return Err(EIO); + } + if gpu_id.num_frags > cfg.max_num_frags { + dev_err!( + dev, + "Too many frags ({} > {})\n", + gpu_id.num_frags, + cfg.max_num_frags + ); + return Err(EIO); + } + if gpu_id.num_gps > cfg.max_num_gps { + dev_err!( + dev, + "Too many GPs ({} > {})\n", + gpu_id.num_gps, + cfg.max_num_gps + ); + return Err(EIO); + } + + Ok(Box::try_new(hw::DynConfig { + pwr: pwr_cfg, + uat_ttb_base: uat.ttb_base(), + id: gpu_id, + })?) + } + + /// Create the global GPU event manager, and return an `Arc<>` to it. + fn make_event_manager(alloc: &mut KernelAllocators) -> Result<Arc<event::EventManager>> { + Arc::try_new(event::EventManager::new(alloc)?) + } + + /// Create a new MMIO mapping and add it to the mappings list in initdata at the specified + /// index. + fn iomap(&mut self, index: usize, map: &hw::IOMapping) -> Result { + let off = map.base & mmu::UAT_PGMSK; + let base = map.base - off; + let end = (map.base + map.size + mmu::UAT_PGMSK) & !mmu::UAT_PGMSK; + let mapping = self + .uat + .kernel_vm() + .map_io(base, end - base, map.writable)?; + + self.initdata.runtime_pointers.hwdata_b.with_mut(|raw, _| { + raw.io_mappings[index] = fw::initdata::raw::IOMapping { + phys_addr: U64(map.base as u64), + virt_addr: U64((mapping.iova() + off) as u64), + size: map.size as u32, + range_size: map.range_size as u32, + readwrite: U64(map.writable as u64), + }; + }); + + self.io_mappings.try_push(mapping)?; + Ok(()) + } + + /// Mark work associated with currently in-progress event slots as failed, after a fault or + /// timeout. + fn mark_pending_events(&self, culprit_slot: Option<u32>, error: workqueue::WorkError) { + dev_err!(self.dev, " Pending events:\n"); + + self.initdata.globals.with(|raw, _inner| { + for i in raw.pending_stamps.iter() { + let info = i.info.load(Ordering::Relaxed); + let wait_value = i.wait_value.load(Ordering::Relaxed); + + if info & 1 != 0 { + let slot = info >> 3; + let flags = info & 0x7; + dev_err!( + self.dev, + " [{}] flags={} value={:#x}\n", + slot, + flags, + wait_value + ); + let error = if culprit_slot.is_some() && culprit_slot != Some(slot) { + workqueue::WorkError::Killed + } else { + error + }; + self.event_manager.mark_error(slot, wait_value, error); + i.info.store(0, Ordering::Relaxed); + i.wait_value.store(0, Ordering::Relaxed); + } + } + }); + } + + /// Fetch the GPU MMU fault information from the hardware registers. + fn get_fault_info(&self) -> Option<regs::FaultInfo> { + let data = self.dev.data(); + + let res = match data.resources() { + Some(res) => res, + None => { + dev_err!(self.dev, " Failed to acquire resources\n"); + return None; + } + }; + + let info = res.get_fault_info(); + if info.is_some() { + dev_err!(self.dev, " Fault info: {:#x?}\n", info.as_ref().unwrap()); + } + info + } + + /// Resume the GPU firmware after it halts (due to a timeout, fault, or request). + fn recover(&self) { + self.initdata.fw_status.with(|raw, _inner| { + let halt_count = raw.flags.halt_count.load(Ordering::Relaxed); + let mut halted = raw.flags.halted.load(Ordering::Relaxed); + dev_err!(self.dev, " Halt count: {}\n", halt_count); + dev_err!(self.dev, " Halted: {}\n", halted); + + if halted == 0 { + let timeout = time::ktime_get() + Duration::from_millis(HALT_ENTER_TIMEOUT_MS); + while time::ktime_get() < timeout { + halted = raw.flags.halted.load(Ordering::Relaxed); + if halted != 0 { + break; + } + mem::sync(); + } + halted = raw.flags.halted.load(Ordering::Relaxed); + } + + if debug_enabled(DebugFlags::NoGpuRecovery) { + dev_crit!(self.dev, " GPU recovery is disabled, wedging forever!\n"); + } else if halted != 0 { + dev_err!(self.dev, " Attempting recovery...\n"); + raw.flags.halted.store(0, Ordering::SeqCst); + raw.flags.resume.store(1, Ordering::SeqCst); + } else { + dev_err!(self.dev, " Cannot recover.\n"); + } + }); + } + + /// Return the packed GPU enabled core masks. + // Only used for some versions + #[allow(dead_code)] + pub(crate) fn core_masks_packed(&self) -> &[u32] { + self.dyncfg.id.core_masks_packed.as_slice() + } + + /// Kick a submission pipe for a submitted job to tell the firmware to start processing it. + pub(crate) fn run_job(&self, job: workqueue::JobSubmission::ver<'_>) -> Result { + mod_dev_dbg!(self.dev, "GPU: run_job\n"); + + let pipe_type = job.pipe_type(); + mod_dev_dbg!(self.dev, "GPU: run_job: pipe_type={:?}\n", pipe_type); + + let pipes = match pipe_type { + PipeType::Vertex => &self.pipes.vtx, + PipeType::Fragment => &self.pipes.frag, + PipeType::Compute => &self.pipes.comp, + }; + + let index: usize = job.priority() as usize; + let mut pipe = pipes.get(index).ok_or(EIO)?.lock(); + + mod_dev_dbg!(self.dev, "GPU: run_job: run()\n"); + job.run(&mut pipe); + mod_dev_dbg!(self.dev, "GPU: run_job: ring doorbell\n"); + + let mut guard = self.rtkit.lock(); + let rtk = guard.as_mut().unwrap(); + rtk.send_message( + EP_DOORBELL, + MSG_TX_DOORBELL | pipe_type as u64 | ((index as u64) << 2), + )?; + mod_dev_dbg!(self.dev, "GPU: run_job: done\n"); + + Ok(()) + } + + pub(crate) fn is_crashed(&self) -> bool { + self.crashed.load(Ordering::Relaxed) + } + + pub(crate) fn start_op(self: &Arc<GpuManager::ver>) -> Result<OpGuard> { + if self.is_crashed() { + return Err(ENODEV); + } + + let val = self + .initdata + .globals + .with(|raw, _inner| raw.pending_submissions.fetch_add(1, Ordering::Acquire)); + + mod_dev_dbg!(self.dev, "OP start (pending: {})\n", val + 1); + self.kick_firmware()?; + Ok(OpGuard(self.clone())) + } +} + +#[versions(AGX)] +impl GpuManager for GpuManager::ver { + fn as_any(&self) -> &dyn Any { + self + } + + fn arc_as_any(self: Arc<Self>) -> Arc<dyn Any + Sync + Send> { + self as Arc<dyn Any + Sync + Send> + } + + fn init(&self) -> Result { + self.tx_channels.lock().device_control.send( + &fw::channels::DeviceControlMsg::ver::Initialize(Default::default()), + ); + + let initdata = self.initdata.gpu_va().get(); + let mut guard = self.rtkit.lock(); + let rtk = guard.as_mut().unwrap(); + + rtk.boot()?; + rtk.start_endpoint(EP_FIRMWARE)?; + rtk.start_endpoint(EP_DOORBELL)?; + rtk.send_message(EP_FIRMWARE, MSG_INIT | (initdata & INIT_DATA_MASK))?; + rtk.send_message(EP_DOORBELL, MSG_TX_DOORBELL | DOORBELL_DEVCTRL)?; + core::mem::drop(guard); + + self.kick_firmware()?; + Ok(()) + } + + fn update_globals(&self) { + let mut timeout: u32 = 2; + if debug_enabled(DebugFlags::WaitForPowerOff) { + timeout = 0; + } else if debug_enabled(DebugFlags::KeepGpuPowered) { + timeout = 5000; + } + + self.initdata.globals.with(|raw, _inner| { + raw.idle_off_delay_ms.store(timeout, Ordering::Relaxed); + }); + } + + fn alloc(&self) -> Guard<'_, Mutex<KernelAllocators>> { + let mut guard = self.alloc.lock(); + let (garbage_count, garbage_bytes) = guard.private.garbage(); + if garbage_bytes > 1024 * 1024 { + mod_dev_dbg!( + self.dev, + "Collecting kalloc garbage ({} objects, {} bytes)\n", + garbage_count, + garbage_bytes + ); + if self.flush_fw_cache().is_err() { + dev_err!(self.dev, "Failed to flush FW cache\n"); + } else { + guard.private.collect_garbage(garbage_count); + } + } + + guard + } + + fn new_vm(&self, file_id: u64) -> Result<mmu::Vm> { + self.uat.new_vm(self.ids.vm.next(), file_id) + } + + fn bind_vm(&self, vm: &mmu::Vm) -> Result<mmu::VmBind> { + self.uat.bind(vm) + } + + fn new_queue( + &self, + vm: mmu::Vm, + ualloc: Arc<Mutex<alloc::DefaultAllocator>>, + ualloc_priv: Arc<Mutex<alloc::DefaultAllocator>>, + priority: u32, + caps: u32, + ) -> Result<Box<dyn queue::Queue>> { + let mut kalloc = self.alloc(); + let id = self.ids.queue.next(); + Ok(Box::try_new(queue::Queue::ver::new( + &self.dev, + vm, + &mut kalloc, + ualloc, + ualloc_priv, + self.event_manager.clone(), + &self.buffer_mgr, + id, + priority, + caps, + )?)?) + } + + fn kick_firmware(&self) -> Result { + if self.is_crashed() { + return Err(ENODEV); + } + + let mut guard = self.rtkit.lock(); + let rtk = guard.as_mut().unwrap(); + rtk.send_message(EP_DOORBELL, MSG_TX_DOORBELL | DOORBELL_KICKFW)?; + + Ok(()) + } + + fn invalidate_context( + &self, + context: &fw::types::GpuObject<fw::workqueue::GpuContextData>, + ) -> Result { + mod_dev_dbg!( + self.dev, + "Invalidating GPU context @ {:?}\n", + context.weak_pointer() + ); + + if self.is_crashed() { + return Err(ENODEV); + } + + let mut guard = self.alloc.lock(); + let (garbage_count, _) = guard.private.garbage(); + + let dc = context.with( + |raw, _inner| fw::channels::DeviceControlMsg::ver::DestroyContext { + unk_4: 0, + ctx_23: raw.unk_23, + __pad0: Default::default(), + unk_c: 0, + unk_10: 0, + ctx_0: raw.unk_0, + ctx_1: raw.unk_1, + ctx_4: raw.unk_4, + __pad1: Default::default(), + unk_18: 0, + gpu_context: Some(context.weak_pointer()), + __pad2: Default::default(), + }, + ); + + mod_dev_dbg!(self.dev, "Context invalidation command: {:?}\n", &dc); + + let mut txch = self.tx_channels.lock(); + + let token = txch.device_control.send(&dc); + + { + let mut guard = self.rtkit.lock(); + let rtk = guard.as_mut().unwrap(); + rtk.send_message(EP_DOORBELL, MSG_TX_DOORBELL | DOORBELL_DEVCTRL)?; + } + + txch.device_control.wait_for(token)?; + + mod_dev_dbg!( + self.dev, + "GPU context invalidated: {:?}\n", + context.weak_pointer() + ); + + // The invalidation does a cache flush, so it is okay to collect garbage + guard.private.collect_garbage(garbage_count); + + Ok(()) + } + + fn flush_fw_cache(&self) -> Result { + mod_dev_dbg!(self.dev, "Flushing coprocessor data cache\n"); + + if self.is_crashed() { + return Err(ENODEV); + } + + // ctx_0 == 0xff or ctx_1 == 0xff cause no effect on context, + // but this command does a full cache flush too, so abuse it + // for that. + + let dc = fw::channels::DeviceControlMsg::ver::DestroyContext { + unk_4: 0, + ctx_23: 0, + __pad0: Default::default(), + unk_c: 0, + unk_10: 0, + ctx_0: 0xff, + ctx_1: 0xff, + ctx_4: 0, + __pad1: Default::default(), + unk_18: 0, + gpu_context: None, + __pad2: Default::default(), + }; + + let mut txch = self.tx_channels.lock(); + + let token = txch.device_control.send(&dc); + { + let mut guard = self.rtkit.lock(); + let rtk = guard.as_mut().unwrap(); + rtk.send_message(EP_DOORBELL, MSG_TX_DOORBELL | DOORBELL_DEVCTRL)?; + } + + txch.device_control.wait_for(token)?; + Ok(()) + } + + fn ids(&self) -> &SequenceIDs { + &self.ids + } + + fn handle_timeout(&self, counter: u32, event_slot: u32) { + dev_err!(self.dev, " (\\________/) \n"); + dev_err!(self.dev, " | | \n"); + dev_err!(self.dev, "'.| \\ , / |.'\n"); + dev_err!(self.dev, "--| / (( \\ |--\n"); + dev_err!(self.dev, ".'| _-_- |'.\n"); + dev_err!(self.dev, " |________| \n"); + dev_err!(self.dev, "** GPU timeout nya~!!!!! **\n"); + dev_err!(self.dev, " Event slot: {}\n", event_slot); + dev_err!(self.dev, " Timeout count: {}\n", counter); + + // If we have fault info, consider it a fault. + let error = match self.get_fault_info() { + Some(info) => workqueue::WorkError::Fault(info), + None => workqueue::WorkError::Timeout, + }; + self.mark_pending_events(Some(event_slot), error); + self.recover(); + } + + fn handle_fault(&self) { + dev_err!(self.dev, " (\\________/) \n"); + dev_err!(self.dev, " | | \n"); + dev_err!(self.dev, "'.| \\ , / |.'\n"); + dev_err!(self.dev, "--| / (( \\ |--\n"); + dev_err!(self.dev, ".'| _-_- |'.\n"); + dev_err!(self.dev, " |________| \n"); + dev_err!(self.dev, "GPU fault nya~!!!!!\n"); + let error = match self.get_fault_info() { + Some(info) => workqueue::WorkError::Fault(info), + None => workqueue::WorkError::Unknown, + }; + self.mark_pending_events(None, error); + self.recover(); + } + + fn wait_for_poweroff(&self, timeout: usize) -> Result { + self.initdata.runtime_pointers.hwdata_a.with(|raw, _inner| { + for _i in 0..timeout { + if raw.pwr_status.load(Ordering::Relaxed) == 4 { + return Ok(()); + } + coarse_sleep(Duration::from_millis(1)); + } + Err(ETIMEDOUT) + }) + } + + fn fwctl(&self, msg: fw::channels::FwCtlMsg) -> Result { + if self.is_crashed() { + return Err(ENODEV); + } + + let mut fwctl = self.fwctl_channel.lock(); + let token = fwctl.send(&msg); + { + let mut guard = self.rtkit.lock(); + let rtk = guard.as_mut().unwrap(); + rtk.send_message(EP_DOORBELL, MSG_FWCTL)?; + } + fwctl.wait_for(token)?; + Ok(()) + } + + fn get_cfg(&self) -> &'static hw::HwConfig { + self.cfg + } + + fn get_dyncfg(&self) -> &hw::DynConfig { + &self.dyncfg + } +} + +#[versions(AGX)] +impl GpuManagerPriv for GpuManager::ver { + fn end_op(&self) { + let val = self + .initdata + .globals + .with(|raw, _inner| raw.pending_submissions.fetch_sub(1, Ordering::Release)); + + mod_dev_dbg!(self.dev, "OP end (pending: {})\n", val - 1); + } +} diff --git a/drivers/gpu/drm/asahi/hw/mod.rs b/drivers/gpu/drm/asahi/hw/mod.rs new file mode 100644 index 000000000000..a92bb70aeae8 --- /dev/null +++ b/drivers/gpu/drm/asahi/hw/mod.rs @@ -0,0 +1,522 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Per-SoC hardware configuration structures +//! +//! This module contains the definitions used to store per-GPU and per-SoC configuration data. + +use crate::driver::AsahiDevice; +use crate::fw::types::*; +use alloc::vec::Vec; +use kernel::c_str; +use kernel::device::RawDevice; +use kernel::prelude::*; + +const MAX_POWERZONES: usize = 5; + +pub(crate) mod t600x; +pub(crate) mod t8103; +pub(crate) mod t8112; + +/// GPU generation enumeration. Note: Part of the UABI. +#[derive(Debug, PartialEq, Copy, Clone)] +#[repr(u32)] +pub(crate) enum GpuGen { + G13 = 13, + G14 = 14, +} + +/// GPU variant enumeration. Note: Part of the UABI. +#[derive(Debug, PartialEq, Copy, Clone)] +#[repr(u32)] +pub(crate) enum GpuVariant { + P = 'P' as u32, + G = 'G' as u32, + S = 'S' as u32, + C = 'C' as u32, + D = 'D' as u32, +} + +/// GPU revision enumeration. Note: Part of the UABI. +#[derive(Debug, PartialEq, Copy, Clone)] +#[repr(u32)] +pub(crate) enum GpuRevision { + A0 = 0x00, + A1 = 0x01, + B0 = 0x10, + B1 = 0x11, + C0 = 0x20, + C1 = 0x21, +} + +/// GPU core type enumeration. Note: Part of the firmware ABI. +#[derive(Debug, Copy, Clone)] +#[repr(u32)] +pub(crate) enum GpuCore { + // Unknown = 0, + // G5P = 1, + // G5G = 2, + // G9P = 3, + // G9G = 4, + // G10P = 5, + // G11P = 6, + // G11M = 7, + // G11G = 8, + // G12P = 9, + // G13P = 10, + G13G = 11, + G13S = 12, + G13C = 13, + // G14P = 14, + G14G = 15, +} + +/// GPU revision ID. Note: Part of the firmware ABI. +#[derive(Debug, PartialEq, Copy, Clone)] +#[repr(u32)] +pub(crate) enum GpuRevisionID { + // Unknown = 0, + A0 = 1, + A1 = 2, + B0 = 3, + B1 = 4, + C0 = 5, + C1 = 6, +} + +/// GPU driver/hardware features, from the UABI. +pub(crate) mod feat { + /// Backwards-compatible features. + pub(crate) mod compat {} + + /// Backwards-incompatible features. + pub(crate) mod incompat { + use kernel::bindings; + + /// Hardware requires Z/S compression to be mandatorily enabled. + pub(crate) const MANDATORY_ZS_COMPRESSION: u64 = + bindings::drm_asahi_feat_incompat_DRM_ASAHI_FEAT_MANDATORY_ZS_COMPRESSION as u64; + } +} + +/// A single performance state of the GPU. +#[derive(Debug)] +pub(crate) struct PState { + /// Voltage in millivolts, per GPU cluster. + pub(crate) volt_mv: Vec<u32>, + /// Frequency in hertz. + pub(crate) freq_hz: u32, + /// Maximum power consumption of the GPU at this pstate, in milliwatts. + pub(crate) pwr_mw: u32, +} + +/// A power zone definition (we have no idea what this is but Apple puts them in the DT). +#[allow(missing_docs)] +#[derive(Debug, Copy, Clone)] +pub(crate) struct PowerZone { + pub(crate) target: u32, + pub(crate) target_offset: u32, + pub(crate) filter_tc: u32, +} + +/// An MMIO mapping used by the firmware. +#[derive(Debug, Copy, Clone)] +pub(crate) struct IOMapping { + /// Base physical address of the mapping. + pub(crate) base: usize, + /// Size of the mapping. + pub(crate) size: usize, + /// Range size of the mapping (for arrays?) + pub(crate) range_size: usize, + /// Whether the mapping should be writable. + pub(crate) writable: bool, +} + +impl IOMapping { + /// Convenience constructor for a new IOMapping. + pub(crate) const fn new( + base: usize, + size: usize, + range_size: usize, + writable: bool, + ) -> IOMapping { + IOMapping { + base, + size, + range_size, + writable, + } + } +} + +/// Unknown HwConfigA fields that vary from SoC to SoC. +#[allow(missing_docs)] +#[derive(Debug, Copy, Clone)] +pub(crate) struct HwConfigA { + pub(crate) unk_87c: i32, + pub(crate) unk_8cc: u32, + pub(crate) unk_e24: u32, +} + +/// Unknown HwConfigB fields that vary from SoC to SoC. +#[allow(missing_docs)] +#[derive(Debug, Copy, Clone)] +pub(crate) struct HwConfigB { + pub(crate) unk_4e0: u64, + pub(crate) unk_534: u32, + pub(crate) unk_ab8: u32, + pub(crate) unk_abc: u32, + pub(crate) unk_b30: u32, +} + +/// Render command configs that vary from SoC to SoC. +#[derive(Debug, Copy, Clone)] +pub(crate) struct HwRenderConfig { + /// Vertex/tiling-related configuration register (lsb: disable clustering) + pub(crate) tiling_control: u32, +} + +/// Static hardware configuration for a given SoC model. +#[derive(Debug)] +pub(crate) struct HwConfig { + /// Chip ID in hex format (e.g. 0x8103 for t8103). + pub(crate) chip_id: u32, + /// GPU generation. + pub(crate) gpu_gen: GpuGen, + /// GPU variant type. + pub(crate) gpu_variant: GpuVariant, + /// GPU core type ID (as known by the firmware). + pub(crate) gpu_core: GpuCore, + /// Compatible feature bitmask for this GPU. + pub(crate) gpu_feat_compat: u64, + /// Incompatible feature bitmask for this GPU. + pub(crate) gpu_feat_incompat: u64, + + /// Base clock used used for timekeeping. + pub(crate) base_clock_hz: u32, + /// Output address space for the UAT on this SoC. + pub(crate) uat_oas: usize, + /// Maximum number of clusters on this SoC. + pub(crate) max_num_clusters: u32, + /// Maximum number of cores per cluster for this GPU. + pub(crate) max_num_cores: u32, + /// Maximum number of frags per cluster for this GPU. + pub(crate) max_num_frags: u32, + /// Maximum number of GPs per cluster for this GPU. + pub(crate) max_num_gps: u32, + + /// Required size of the first preemption buffer. + pub(crate) preempt1_size: usize, + /// Required size of the second preemption buffer. + pub(crate) preempt2_size: usize, + /// Required size of the third preemption buffer. + pub(crate) preempt3_size: usize, + + /// Rendering-relevant configuration. + pub(crate) render: HwRenderConfig, + + /// Misc HWDataA field values. + pub(crate) da: HwConfigA, + /// Misc HWDataB field values. + pub(crate) db: HwConfigB, + /// HwDataShared1.table. + pub(crate) shared1_tab: &'static [i32], + /// HwDataShared1.unk_a4. + pub(crate) shared1_a4: u32, + /// HwDataShared2.table. + pub(crate) shared2_tab: &'static [i32], + /// HwDataShared2.unk_508. + pub(crate) shared2_unk_508: u32, + /// Constant related to SRAM voltages. + pub(crate) sram_k: F32, + /// Unknown per-cluster coefficients 1. + pub(crate) unk_coef_a: &'static [&'static [F32]], + /// Unknown per-cluster coefficients 2. + pub(crate) unk_coef_b: &'static [&'static [F32]], + /// Unknown table in Global struct. + pub(crate) global_tab: Option<&'static [u8]>, + + /// Temperature sensor list (8 bits per sensor). + pub(crate) fast_die0_sensor_mask: u64, + /// Temperature sensor list (alternate). + pub(crate) fast_die0_sensor_mask_alt: u64, + /// Temperature sensor present bitmask. + pub(crate) fast_die0_sensor_present: u32, + /// Required MMIO mappings for this GPU/firmware. + pub(crate) io_mappings: &'static [Option<IOMapping>], +} + +/// Dynamic (fetched from hardware/DT) configuration. +#[derive(Debug)] +pub(crate) struct DynConfig { + /// Base physical address of the UAT TTB (from DT reserved memory region). + pub(crate) uat_ttb_base: u64, + /// GPU ID configuration read from hardware. + pub(crate) id: GpuIdConfig, + /// Power calibration configuration for this specific chip/device. + pub(crate) pwr: PwrConfig, +} + +/// Specific GPU ID configuration fetched from SGX MMIO registers. +#[derive(Debug)] +pub(crate) struct GpuIdConfig { + /// GPU generation (should match static config). + pub(crate) gpu_gen: GpuGen, + /// GPU variant type (should match static config). + pub(crate) gpu_variant: GpuVariant, + /// GPU silicon revision. + pub(crate) gpu_rev: GpuRevision, + /// GPU silicon revision ID (firmware enum). + pub(crate) gpu_rev_id: GpuRevisionID, + /// Maximum number of dies supported. + pub(crate) max_dies: u32, + /// Total number of GPU clusters. + pub(crate) num_clusters: u32, + /// Maximum number of GPU cores per cluster. + pub(crate) num_cores: u32, + /// Number of frags per cluster. + pub(crate) num_frags: u32, + /// Number of GPs per cluster. + pub(crate) num_gps: u32, + /// Total number of active cores for the whole GPU. + pub(crate) total_active_cores: u32, + /// Mask of active cores per cluster. + pub(crate) core_masks: Vec<u32>, + /// Packed mask of all active cores. + pub(crate) core_masks_packed: Vec<u32>, +} + +/// Configurable GPU power settings from the device tree. +#[derive(Debug)] +pub(crate) struct PwrConfig { + /// GPU performance state list. + pub(crate) perf_states: Vec<PState>, + /// GPU power zone list. + pub(crate) power_zones: Vec<PowerZone>, + + /// Core leakage coefficient per cluster. + pub(crate) core_leak_coef: Vec<F32>, + /// SRAM leakage coefficient per cluster. + pub(crate) sram_leak_coef: Vec<F32>, + + /// Maximum total power of the GPU in milliwatts. + pub(crate) max_power_mw: u32, + /// Maximum frequency of the GPU in megahertz. + pub(crate) max_freq_mhz: u32, + + /// Minimum performance state to start at. + pub(crate) perf_base_pstate: u32, + /// Maximum enabled performance state. + pub(crate) perf_max_pstate: u32, + + /// Minimum voltage for the SRAM power domain in microvolts. + pub(crate) min_sram_microvolt: u32, + + // Most of these fields are just named after Apple ADT property names and we don't fully + // understand them. They configure various power-related PID loops and filters. + /// Average power filter time constant in milliseconds. + pub(crate) avg_power_filter_tc_ms: u32, + /// Average power filter PID integral gain? + pub(crate) avg_power_ki_only: F32, + /// Average power filter PID proportional gain? + pub(crate) avg_power_kp: F32, + pub(crate) avg_power_min_duty_cycle: u32, + /// Average power target filter time constant in periods. + pub(crate) avg_power_target_filter_tc: u32, + /// "Fast die0" (temperature?) PID integral gain. + pub(crate) fast_die0_integral_gain: F32, + /// "Fast die0" (temperature?) PID proportional gain. + pub(crate) fast_die0_proportional_gain: F32, + pub(crate) fast_die0_prop_tgt_delta: u32, + pub(crate) fast_die0_release_temp: u32, + /// Delay from the fender (?) becoming idle to powerdown + pub(crate) fender_idle_off_delay_ms: u32, + /// Timeout from firmware early wake to sleep if no work was submitted (?) + pub(crate) fw_early_wake_timeout_ms: u32, + /// Delay from the GPU becoming idle to powerdown + pub(crate) idle_off_delay_ms: u32, + /// Percent? + pub(crate) perf_boost_ce_step: u32, + /// Minimum utilization before performance state is increased in %. + pub(crate) perf_boost_min_util: u32, + pub(crate) perf_filter_drop_threshold: u32, + /// Performance PID filter time constant? (periods?) + pub(crate) perf_filter_time_constant: u32, + /// Performance PID filter time constant 2? (periods?) + pub(crate) perf_filter_time_constant2: u32, + /// Performance PID integral gain. + pub(crate) perf_integral_gain: F32, + /// Performance PID integral gain 2 (?). + pub(crate) perf_integral_gain2: F32, + pub(crate) perf_integral_min_clamp: u32, + /// Performance PID proportional gain. + pub(crate) perf_proportional_gain: F32, + /// Performance PID proportional gain 2 (?). + pub(crate) perf_proportional_gain2: F32, + pub(crate) perf_reset_iters: u32, + /// Target GPU utilization for the performance controller in %. + pub(crate) perf_tgt_utilization: u32, + /// Power sampling period in milliseconds. + pub(crate) power_sample_period: u32, + /// PPM (?) filter time constant in milliseconds. + pub(crate) ppm_filter_time_constant_ms: u32, + /// PPM (?) filter PID integral gain. + pub(crate) ppm_ki: F32, + /// PPM (?) filter PID proportional gain. + pub(crate) ppm_kp: F32, + /// Power consumption filter time constant (periods?) + pub(crate) pwr_filter_time_constant: u32, + /// Power consumption filter PID integral gain. + pub(crate) pwr_integral_gain: F32, + pub(crate) pwr_integral_min_clamp: u32, + pub(crate) pwr_min_duty_cycle: u32, + pub(crate) pwr_proportional_gain: F32, +} + +impl PwrConfig { + /// Load the GPU power configuration from the device tree. + pub(crate) fn load(dev: &AsahiDevice, cfg: &HwConfig) -> Result<PwrConfig> { + let mut perf_states = Vec::new(); + + let node = dev.of_node().ok_or(EIO)?; + let opps = node + .parse_phandle(c_str!("operating-points-v2"), 0) + .ok_or(EIO)?; + + let mut max_power_mw: u32 = 0; + let mut max_freq_mhz: u32 = 0; + + macro_rules! prop { + ($prop:expr, $default:expr) => {{ + node.get_opt_property(c_str!($prop)) + .map_err(|e| { + dev_err!(dev, "Error reading property {}: {:?}\n", $prop, e); + e + })? + .unwrap_or($default) + }}; + ($prop:expr) => {{ + node.get_property(c_str!($prop)).map_err(|e| { + dev_err!(dev, "Error reading property {}: {:?}\n", $prop, e); + e + })? + }}; + } + + for opp in opps.children() { + let freq_hz: u64 = opp.get_property(c_str!("opp-hz"))?; + let mut volt_uv: Vec<u32> = opp.get_property(c_str!("opp-microvolt"))?; + let pwr_uw: u32 = opp.get_property(c_str!("opp-microwatt"))?; + + if volt_uv.len() != cfg.max_num_clusters as usize { + dev_err!( + dev, + "Invalid opp-microvolt length (expected {}, got {})\n", + cfg.max_num_clusters, + volt_uv.len() + ); + return Err(EINVAL); + } + + volt_uv.iter_mut().for_each(|a| *a /= 1000); + let volt_mv = volt_uv; + + let pwr_mw = pwr_uw / 1000; + max_power_mw = max_power_mw.max(pwr_mw); + + let freq_mhz: u32 = (freq_hz / 1_000_000).try_into()?; + max_freq_mhz = max_freq_mhz.max(freq_mhz); + + perf_states.try_push(PState { + freq_hz: freq_hz.try_into()?, + volt_mv, + pwr_mw, + })?; + } + + let pz_data = prop!("apple,power-zones", Vec::new()); + + if pz_data.len() > 3 * MAX_POWERZONES || pz_data.len() % 3 != 0 { + dev_err!(dev, "Invalid apple,power-zones value\n"); + return Err(EINVAL); + } + + let pz_count = pz_data.len() / 3; + let mut power_zones = Vec::new(); + for i in (0..pz_count).step_by(3) { + power_zones.try_push(PowerZone { + target: pz_data[i], + target_offset: pz_data[i + 1], + filter_tc: pz_data[i + 2], + })?; + } + + let core_leak_coef: Vec<F32> = prop!("apple,core-leak-coef"); + let sram_leak_coef: Vec<F32> = prop!("apple,sram-leak-coef"); + + if core_leak_coef.len() != cfg.max_num_clusters as usize { + dev_err!(dev, "Invalid apple,core-leak-coef\n"); + return Err(EINVAL); + } + if sram_leak_coef.len() != cfg.max_num_clusters as usize { + dev_err!(dev, "Invalid apple,sram_leak_coef\n"); + return Err(EINVAL); + } + + Ok(PwrConfig { + core_leak_coef, + sram_leak_coef, + + max_power_mw, + max_freq_mhz, + + perf_base_pstate: prop!("apple,perf-base-pstate", 1), + perf_max_pstate: perf_states.len() as u32 - 1, + min_sram_microvolt: prop!("apple,min-sram-microvolt"), + + avg_power_filter_tc_ms: prop!("apple,avg-power-filter-tc-ms"), + avg_power_ki_only: prop!("apple,avg-power-ki-only"), + avg_power_kp: prop!("apple,avg-power-kp"), + avg_power_min_duty_cycle: prop!("apple,avg-power-min-duty-cycle"), + avg_power_target_filter_tc: prop!("apple,avg-power-target-filter-tc"), + fast_die0_integral_gain: prop!("apple,fast-die0-integral-gain"), + fast_die0_proportional_gain: prop!("apple,fast-die0-proportional-gain"), + fast_die0_prop_tgt_delta: prop!("apple,fast-die0-prop-tgt-delta", 0), + fast_die0_release_temp: prop!("apple,fast-die0-release-temp", 80), + fender_idle_off_delay_ms: prop!("apple,fender-idle-off-delay-ms", 40), + fw_early_wake_timeout_ms: prop!("apple,fw-early-wake-timeout-ms", 5), + idle_off_delay_ms: prop!("apple,idle-off-delay-ms", 2), + perf_boost_ce_step: prop!("apple,perf-boost-ce-step", 25), + perf_boost_min_util: prop!("apple,perf-boost-min-util", 100), + perf_filter_drop_threshold: prop!("apple,perf-filter-drop-threshold"), + perf_filter_time_constant2: prop!("apple,perf-filter-time-constant2"), + perf_filter_time_constant: prop!("apple,perf-filter-time-constant"), + perf_integral_gain2: prop!("apple,perf-integral-gain2"), + perf_integral_gain: prop!("apple,perf-integral-gain", f32!(7.8956833)), + perf_integral_min_clamp: prop!("apple,perf-integral-min-clamp"), + perf_proportional_gain2: prop!("apple,perf-proportional-gain2"), + perf_proportional_gain: prop!("apple,perf-proportional-gain", f32!(14.707963)), + perf_reset_iters: prop!("apple,perf-reset-iters", 6), + perf_tgt_utilization: prop!("apple,perf-tgt-utilization"), + power_sample_period: prop!("apple,power-sample-period"), + ppm_filter_time_constant_ms: prop!("apple,ppm-filter-time-constant-ms"), + ppm_ki: prop!("apple,ppm-ki"), + ppm_kp: prop!("apple,ppm-kp"), + pwr_filter_time_constant: prop!("apple,pwr-filter-time-constant", 313), + pwr_integral_gain: prop!("apple,pwr-integral-gain", f32!(0.0202129)), + pwr_integral_min_clamp: prop!("apple,pwr-integral-min-clamp", 0), + pwr_min_duty_cycle: prop!("apple,pwr-min-duty-cycle"), + pwr_proportional_gain: prop!("apple,pwr-proportional-gain", f32!(5.2831855)), + + perf_states, + power_zones, + }) + } + + pub(crate) fn min_frequency_khz(&self) -> u32 { + self.perf_states[self.perf_base_pstate as usize].freq_hz / 1000 + } + + pub(crate) fn max_frequency_khz(&self) -> u32 { + self.perf_states[self.perf_max_pstate as usize].freq_hz / 1000 + } +} diff --git a/drivers/gpu/drm/asahi/hw/t600x.rs b/drivers/gpu/drm/asahi/hw/t600x.rs new file mode 100644 index 000000000000..8a8267a7e18a --- /dev/null +++ b/drivers/gpu/drm/asahi/hw/t600x.rs @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Hardware configuration for t600x (M1 Pro/Max/Ultra) platforms. + +use crate::f32; + +use super::*; + +const fn iomaps(mcc_count: usize, has_die1: bool) -> [Option<IOMapping>; 20] { + [ + Some(IOMapping::new(0x404d00000, 0x1c000, 0x1c000, true)), // Fender + Some(IOMapping::new(0x20e100000, 0x4000, 0x4000, false)), // AICTimer + Some(IOMapping::new(0x28e104000, 0x4000, 0x4000, true)), // AICSWInt + Some(IOMapping::new(0x404000000, 0x20000, 0x20000, true)), // RGX + None, // UVD + None, // unused + None, // DisplayUnderrunWA + Some(IOMapping::new(0x28e494000, 0x1000, 0x1000, false)), // AnalogTempSensorControllerRegs + None, // PMPDoorbell + Some(IOMapping::new(0x404d80000, 0x8000, 0x8000, true)), // MetrologySensorRegs + Some(IOMapping::new(0x204d61000, 0x1000, 0x1000, true)), // GMGIFAFRegs + Some(IOMapping::new( + 0x200000000, + mcc_count * 0xd8000, + 0xd6400, + true, + )), // MCache registers + None, // AICBankedRegisters + None, // PMGRScratch + Some(IOMapping::new(0x2643c4000, 0x1000, 0x1000, true)), // NIA Special agent idle register die 0 + if has_die1 { + // NIA Special agent idle register die 1 + Some(IOMapping::new(0x22643c4000, 0x1000, 0x1000, true)) + } else { + None + }, + None, // CRE registers + None, // Streaming codec registers + Some(IOMapping::new(0x28e3d0000, 0x1000, 0x1000, true)), // ? + Some(IOMapping::new(0x28e3c0000, 0x1000, 0x1000, false)), // ? + ] +} + +pub(crate) const HWCONFIG_T6002: super::HwConfig = HwConfig { + chip_id: 0x6002, + gpu_gen: GpuGen::G13, + gpu_variant: GpuVariant::D, + gpu_core: GpuCore::G13C, + gpu_feat_compat: 0, + gpu_feat_incompat: feat::incompat::MANDATORY_ZS_COMPRESSION, + + base_clock_hz: 24_000_000, + uat_oas: 42, + max_num_clusters: 8, + max_num_cores: 8, + max_num_frags: 8, + max_num_gps: 4, + + preempt1_size: 0x540, + preempt2_size: 0x280, + preempt3_size: 0x20, + + render: HwRenderConfig { + tiling_control: 0xa540, + }, + + da: HwConfigA { + unk_87c: 900, + unk_8cc: 11000, + unk_e24: 125, + }, + db: HwConfigB { + unk_4e0: 4, + unk_534: 1, + unk_ab8: 0x2084, + unk_abc: 0x80, + unk_b30: 0, + }, + shared1_tab: &[ + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + ], + shared1_a4: 0xffff, + shared2_tab: &[-1, -1, -1, -1, 0x2aa, 0xaaa, -1, -1, 0, 0], + shared2_unk_508: 0xcc00001, + sram_k: f32!(1.02), + unk_coef_a: &[ + &f32!([9.838]), + &f32!([9.819]), + &f32!([9.826]), + &f32!([9.799]), + &f32!([9.799]), + &f32!([9.826]), + &f32!([9.819]), + &f32!([9.838]), + ], + unk_coef_b: &[ + &f32!([13.0]), + &f32!([13.0]), + &f32!([13.0]), + &f32!([13.0]), + &f32!([13.0]), + &f32!([13.0]), + &f32!([13.0]), + &f32!([13.0]), + ], + global_tab: Some(&[ + 0, 1, 2, 1, 1, 90, 75, 1, 1, 1, 2, 90, 75, 1, 1, 1, 1, 90, 75, 1, 1, + ]), + fast_die0_sensor_mask: 0x8080808080808080, + fast_die0_sensor_mask_alt: 0x9090909090909090, + fast_die0_sensor_present: 0xff, + io_mappings: &iomaps(16, true), +}; + +pub(crate) const HWCONFIG_T6001: super::HwConfig = HwConfig { + chip_id: 0x6001, + gpu_variant: GpuVariant::C, + gpu_core: GpuCore::G13C, + + max_num_clusters: 4, + fast_die0_sensor_mask: 0x80808080, + fast_die0_sensor_mask_alt: 0x90909090, + fast_die0_sensor_present: 0x0f, + io_mappings: &iomaps(8, false), + ..HWCONFIG_T6002 +}; + +pub(crate) const HWCONFIG_T6000: super::HwConfig = HwConfig { + chip_id: 0x6000, + gpu_variant: GpuVariant::S, + gpu_core: GpuCore::G13S, + + max_num_clusters: 2, + fast_die0_sensor_mask: 0x8080, + fast_die0_sensor_mask_alt: 0x9090, + fast_die0_sensor_present: 0x03, + io_mappings: &iomaps(4, false), + ..HWCONFIG_T6001 +}; diff --git a/drivers/gpu/drm/asahi/hw/t8103.rs b/drivers/gpu/drm/asahi/hw/t8103.rs new file mode 100644 index 000000000000..3d38b088a0f5 --- /dev/null +++ b/drivers/gpu/drm/asahi/hw/t8103.rs @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Hardware configuration for t8103 platforms (M1). + +use crate::f32; + +use super::*; + +pub(crate) const HWCONFIG: super::HwConfig = HwConfig { + chip_id: 0x8103, + gpu_gen: GpuGen::G13, + gpu_variant: GpuVariant::G, + gpu_core: GpuCore::G13G, + gpu_feat_compat: 0, + gpu_feat_incompat: 0, + + base_clock_hz: 24_000_000, + uat_oas: 40, + max_num_clusters: 1, + max_num_cores: 8, + max_num_frags: 8, + max_num_gps: 4, + + preempt1_size: 0x540, + preempt2_size: 0x280, + preempt3_size: 0x20, + + render: HwRenderConfig { + // bit 0: disable clustering (always) + tiling_control: 0xa041, + }, + + da: HwConfigA { + unk_87c: -220, + unk_8cc: 9880, + unk_e24: 112, + }, + db: HwConfigB { + unk_4e0: 0, + unk_534: 0, + unk_ab8: 0x48, + unk_abc: 0x8, + unk_b30: 0, + }, + shared1_tab: &[ + -1, 0x7282, 0x50ea, 0x370a, 0x25be, 0x1c1f, 0x16fb, -1, -1, -1, -1, -1, -1, -1, -1, -1, + ], + shared1_a4: 0xffff, + shared2_tab: &[0x800, 0x1555, -1, -1, -1, -1, -1, -1, 0, 0], + shared2_unk_508: 0xc00007, + sram_k: f32!(1.02), + unk_coef_a: &[], + unk_coef_b: &[], + global_tab: None, + fast_die0_sensor_mask: 0x12, + fast_die0_sensor_mask_alt: 0x12, + fast_die0_sensor_present: 0x01, + io_mappings: &[ + Some(IOMapping::new(0x204d00000, 0x1c000, 0x1c000, true)), // Fender + Some(IOMapping::new(0x20e100000, 0x4000, 0x4000, false)), // AICTimer + Some(IOMapping::new(0x23b104000, 0x4000, 0x4000, true)), // AICSWInt + Some(IOMapping::new(0x204000000, 0x20000, 0x20000, true)), // RGX + None, // UVD + None, // unused + None, // DisplayUnderrunWA + Some(IOMapping::new(0x23b2e8000, 0x1000, 0x1000, false)), // AnalogTempSensorControllerRegs + Some(IOMapping::new(0x23bc00000, 0x1000, 0x1000, true)), // PMPDoorbell + Some(IOMapping::new(0x204d80000, 0x5000, 0x5000, true)), // MetrologySensorRegs + Some(IOMapping::new(0x204d61000, 0x1000, 0x1000, true)), // GMGIFAFRegs + Some(IOMapping::new(0x200000000, 0xd6400, 0xd6400, true)), // MCache registers + None, // AICBankedRegisters + Some(IOMapping::new(0x23b738000, 0x1000, 0x1000, true)), // PMGRScratch + None, // NIA Special agent idle register die 0 + None, // NIA Special agent idle register die 1 + None, // CRE registers + None, // Streaming codec registers + None, // + None, // + ], +}; diff --git a/drivers/gpu/drm/asahi/hw/t8112.rs b/drivers/gpu/drm/asahi/hw/t8112.rs new file mode 100644 index 000000000000..5624dca130be --- /dev/null +++ b/drivers/gpu/drm/asahi/hw/t8112.rs @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Hardware configuration for t8112 platforms (M2). + +use crate::f32; + +use super::*; + +pub(crate) const HWCONFIG: super::HwConfig = HwConfig { + chip_id: 0x8112, + gpu_gen: GpuGen::G14, + gpu_variant: GpuVariant::G, + gpu_core: GpuCore::G14G, + gpu_feat_compat: 0, + gpu_feat_incompat: 0, + + base_clock_hz: 24_000_000, + uat_oas: 40, + max_num_clusters: 1, + max_num_cores: 10, + max_num_frags: 10, + max_num_gps: 4, + + preempt1_size: 0x540, + preempt2_size: 0x280, + preempt3_size: 0x20, + + render: HwRenderConfig { + // TODO: this is unused here, may be present in newer FW + tiling_control: 0xa041, + }, + + da: HwConfigA { + unk_87c: 900, + unk_8cc: 11000, + unk_e24: 125, + }, + db: HwConfigB { + unk_4e0: 4, + unk_534: 0, + unk_ab8: 0x2048, + unk_abc: 0x4000, + unk_b30: 1, + }, + shared1_tab: &[ + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, + ], + shared1_a4: 0, + shared2_tab: &[-1, -1, -1, -1, -1, -1, -1, -1, 0xaa5aa, 0], + shared2_unk_508: 0xc00000, + sram_k: f32!(1.02), + // 13.2: last coef changed from 6.6 to 5.3, assuming that was a fix we can backport + unk_coef_a: &[&f32!([0.0, 0.0, 0.0, 0.0, 5.3, 0.0, 5.3, /*6.6*/ 5.3])], + unk_coef_b: &[&f32!([0.0, 0.0, 0.0, 0.0, 5.3, 0.0, 5.3, /*6.6*/ 5.3])], + global_tab: None, + fast_die0_sensor_mask: 0x6800, + fast_die0_sensor_mask_alt: 0x6800, + fast_die0_sensor_present: 0x02, + io_mappings: &[ + Some(IOMapping::new(0x204d00000, 0x14000, 0x14000, true)), // Fender + Some(IOMapping::new(0x20e100000, 0x4000, 0x4000, false)), // AICTimer + Some(IOMapping::new(0x23b0c4000, 0x4000, 0x4000, true)), // AICSWInt + Some(IOMapping::new(0x204000000, 0x20000, 0x20000, true)), // RGX + None, // UVD + None, // unused + None, // DisplayUnderrunWA + Some(IOMapping::new(0x23b2c0000, 0x1000, 0x1000, false)), // AnalogTempSensorControllerRegs + None, // PMPDoorbell + Some(IOMapping::new(0x204d80000, 0x8000, 0x8000, true)), // MetrologySensorRegs + Some(IOMapping::new(0x204d61000, 0x1000, 0x1000, true)), // GMGIFAFRegs + Some(IOMapping::new(0x200000000, 0xd6400, 0xd6400, true)), // MCache registers + None, // AICBankedRegisters + None, // PMGRScratch + None, // NIA Special agent idle register die 0 + None, // NIA Special agent idle register die 1 + Some(IOMapping::new(0x204e00000, 0x10000, 0x10000, true)), // CRE registers + Some(IOMapping::new(0x27d050000, 0x4000, 0x4000, true)), // Streaming codec registers + Some(IOMapping::new(0x23b3d0000, 0x1000, 0x1000, true)), // + Some(IOMapping::new(0x23b3c0000, 0x1000, 0x1000, true)), // + ], +}; diff --git a/drivers/gpu/drm/asahi/initdata.rs b/drivers/gpu/drm/asahi/initdata.rs new file mode 100644 index 000000000000..472c42169130 --- /dev/null +++ b/drivers/gpu/drm/asahi/initdata.rs @@ -0,0 +1,777 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +#![allow(clippy::unusual_byte_groupings)] + +//! GPU initialization data builder. +//! +//! The root of all interaction between the GPU firmware and the host driver is a complex set of +//! nested structures that we call InitData. This includes both GPU hardware/firmware configuration +//! and the pointers to the ring buffers and global data fields that are used for communication at +//! runtime. +//! +//! Many of these structures are poorly understood, so there are lots of hardcoded unknown values +//! derived from observing the InitData structures that macOS generates. + +use crate::fw::initdata::*; +use crate::fw::types::*; +use crate::{box_in_place, f32, place}; +use crate::{gpu, hw, mmu}; +use kernel::error::Result; +use kernel::macros::versions; + +/// Builder helper for the global GPU InitData. +#[versions(AGX)] +pub(crate) struct InitDataBuilder<'a> { + alloc: &'a mut gpu::KernelAllocators, + cfg: &'a hw::HwConfig, + dyncfg: &'a hw::DynConfig, +} + +#[versions(AGX)] +impl<'a> InitDataBuilder::ver<'a> { + /// Create a new InitData builder + pub(crate) fn new( + alloc: &'a mut gpu::KernelAllocators, + cfg: &'a hw::HwConfig, + dyncfg: &'a hw::DynConfig, + ) -> InitDataBuilder::ver<'a> { + InitDataBuilder::ver { alloc, cfg, dyncfg } + } + + /// Create the HwDataShared1 structure, which is used in two places in InitData. + #[inline(never)] + fn hw_shared1(cfg: &hw::HwConfig) -> raw::HwDataShared1 { + let mut ret = raw::HwDataShared1 { + unk_a4: cfg.shared1_a4, + ..Default::default() + }; + for (i, val) in cfg.shared1_tab.iter().enumerate() { + ret.table[i] = *val; + } + ret + } + + fn init_curve( + curve: &mut raw::HwDataShared2Curve, + unk_0: u32, + unk_4: u32, + t1: &[i16], + t2: &[i16], + t3: &[&[i32]], + ) { + curve.unk_0 = unk_0; + curve.unk_4 = unk_4; + (*curve.t1)[..t1.len()].copy_from_slice(t1); + (*curve.t1)[t1.len()..].fill(t1[0]); + (*curve.t2)[..t2.len()].copy_from_slice(t2); + (*curve.t2)[t2.len()..].fill(t2[0]); + for (i, a) in curve.t3.iter_mut().enumerate() { + a.fill(0x3ffffff); + if i < t3.len() { + let b = t3[i]; + (**a)[..b.len()].copy_from_slice(b); + } + } + } + + /// Create the HwDataShared2 structure, which is used in two places in InitData. + #[inline(never)] + fn hw_shared2(cfg: &hw::HwConfig) -> Result<Box<raw::HwDataShared2>> { + let mut ret = box_in_place!(raw::HwDataShared2 { + unk_28: Array::new([0xff; 16]), + t8112: Default::default(), + unk_508: cfg.shared2_unk_508, + ..Default::default() + })?; + + for (i, val) in cfg.shared2_tab.iter().enumerate() { + ret.table[i] = *val; + } + + if cfg.chip_id == 0x8112 { + ret.t8112.unk_14 = 0x6000000; + Self::init_curve(&mut ret.t8112.curve1, 0, 0x20000000, &[-1], &[0x0f07], &[]); + Self::init_curve( + &mut ret.t8112.curve2, + 7, + 0x80000000, + &[-1, 25740, 17429, 12550, 9597, 7910, 6657, 5881, 5421], + &[ + 0x0f07, 0x04c0, 0x06c0, 0x08c0, 0x0ac0, 0x0c40, 0x0dc0, 0x0ec0, 0x0f80, + ], + &[ + &[0x3ffffff, 107, 101, 94, 87, 82, 77, 73, 71], + &[ + 0x3ffffff, 38240, 36251, 33562, 31368, 29379, 27693, 26211, 25370, + ], + &[ + 0x3ffffff, 123933, 117485, 108771, 101661, 95217, 89751, 84948, 82222, + ], + ], + ); + } + + Ok(ret) + } + + /// Create the HwDataShared3 structure, which is used in two places in InitData. + #[inline(never)] + fn hw_shared3(cfg: &hw::HwConfig) -> Result<Box<raw::HwDataShared3>> { + let mut ret = box_in_place!(raw::HwDataShared3 { + ..Default::default() + })?; + + if cfg.chip_id == 0x8112 { + ret.unk_0 = 1; + ret.unk_4 = 500; + ret.unk_8 = 5; + ret.table.copy_from_slice(&[ + 10700, 10700, 10700, 10700, 10700, 6000, 1000, 1000, 1000, 10700, 10700, 10700, + 10700, 10700, 10700, 10700, + ]); + ret.unk_4c = 1; + } + + Ok(ret) + } + + /// Create an unknown T81xx-specific data structure. + fn t81xx_data(dyncfg: &'a hw::DynConfig) -> raw::T81xxData { + raw::T81xxData { + unk_d8c: 0x80000000, + unk_d90: 4, + unk_d9c: f32!(0.6), + unk_da4: f32!(0.4), + unk_dac: f32!(0.38552), + unk_db8: f32!(65536.0), + unk_dbc: f32!(13.56), + max_pstate_scaled: 100 * dyncfg.pwr.perf_max_pstate, + ..Default::default() + } + } + + /// Create the HwDataA structure. This mostly contains power-related configuration. + #[inline(never)] + fn hwdata_a(&mut self) -> Result<GpuObject<HwDataA::ver>> { + self.alloc + .private + .new_inplace(Default::default(), |_inner, ptr| { + let pwr = &self.dyncfg.pwr; + let period_ms = pwr.power_sample_period; + let period_s = F32::from(period_ms) / f32!(1000.0); + let ppm_filter_tc_periods = pwr.ppm_filter_time_constant_ms / period_ms; + #[ver(V >= V13_0B4)] + let ppm_filter_tc_ms_rounded = ppm_filter_tc_periods * period_ms; + let ppm_filter_a = f32!(1.0) / ppm_filter_tc_periods.into(); + let perf_filter_a = f32!(1.0) / pwr.perf_filter_time_constant.into(); + let perf_filter_a2 = f32!(1.0) / pwr.perf_filter_time_constant2.into(); + let avg_power_target_filter_a = f32!(1.0) / pwr.avg_power_target_filter_tc.into(); + let avg_power_filter_tc_periods = pwr.avg_power_filter_tc_ms / period_ms; + #[ver(V >= V13_0B4)] + let avg_power_filter_tc_ms_rounded = avg_power_filter_tc_periods * period_ms; + let avg_power_filter_a = f32!(1.0) / avg_power_filter_tc_periods.into(); + let pwr_filter_a = f32!(1.0) / pwr.pwr_filter_time_constant.into(); + + let base_ps = pwr.perf_base_pstate; + let base_ps_scaled = 100 * base_ps; + let max_ps = pwr.perf_max_pstate; + let max_ps_scaled = 100 * max_ps; + let boost_ps_count = max_ps - base_ps; + + let base_clock_khz = self.cfg.base_clock_hz / 1000; + let clocks_per_period = base_clock_khz * period_ms; + + let raw = place!( + ptr, + raw::HwDataA::ver { + clocks_per_period: clocks_per_period, + #[ver(V >= V13_0B4)] + clocks_per_period_2: clocks_per_period, + pwr_status: AtomicU32::new(4), + unk_10: f32!(1.0), + actual_pstate: 1, + tgt_pstate: 1, + base_pstate_scaled: base_ps_scaled, + unk_40: 1, + max_pstate_scaled: max_ps_scaled, + min_pstate_scaled: 100, + unk_64c: 625, + pwr_filter_a_neg: f32!(1.0) - pwr_filter_a, + pwr_filter_a: pwr_filter_a, + pwr_integral_gain: pwr.pwr_integral_gain, + pwr_integral_min_clamp: pwr.pwr_integral_min_clamp.into(), + max_power_1: pwr.max_power_mw.into(), + pwr_proportional_gain: pwr.pwr_proportional_gain, + pwr_pstate_related_k: -F32::from(max_ps_scaled) / pwr.max_power_mw.into(), + pwr_pstate_max_dc_offset: pwr.pwr_min_duty_cycle as i32 + - max_ps_scaled as i32, + max_pstate_scaled_2: max_ps_scaled, + max_power_2: pwr.max_power_mw, + max_pstate_scaled_3: max_ps_scaled, + ppm_filter_tc_periods_x4: ppm_filter_tc_periods * 4, + ppm_filter_a_neg: f32!(1.0) - ppm_filter_a, + ppm_filter_a: ppm_filter_a, + ppm_ki_dt: pwr.ppm_ki * period_s, + unk_6fc: f32!(65536.0), + ppm_kp: pwr.ppm_kp, + pwr_min_duty_cycle: pwr.pwr_min_duty_cycle, + max_pstate_scaled_4: max_ps_scaled, + unk_71c: f32!(0.0), + max_power_3: pwr.max_power_mw, + cur_power_mw_2: 0x0, + ppm_filter_tc_ms: pwr.ppm_filter_time_constant_ms, + #[ver(V >= V13_0B4)] + ppm_filter_tc_clks: ppm_filter_tc_ms_rounded * base_clock_khz, + perf_tgt_utilization: pwr.perf_tgt_utilization, + perf_boost_min_util: pwr.perf_boost_min_util, + perf_boost_ce_step: pwr.perf_boost_ce_step, + perf_reset_iters: pwr.perf_reset_iters, + unk_774: 6, + unk_778: 1, + perf_filter_drop_threshold: pwr.perf_filter_drop_threshold, + perf_filter_a_neg: f32!(1.0) - perf_filter_a, + perf_filter_a2_neg: f32!(1.0) - perf_filter_a2, + perf_filter_a: perf_filter_a, + perf_filter_a2: perf_filter_a2, + perf_ki: pwr.perf_integral_gain, + perf_ki2: pwr.perf_integral_gain2, + perf_integral_min_clamp: pwr.perf_integral_min_clamp.into(), + unk_79c: f32!(95.0), + perf_kp: pwr.perf_proportional_gain, + perf_kp2: pwr.perf_proportional_gain2, + boost_state_unk_k: F32::from(boost_ps_count) / f32!(0.95), + base_pstate_scaled_2: base_ps_scaled, + max_pstate_scaled_5: max_ps_scaled, + base_pstate_scaled_3: base_ps_scaled, + perf_tgt_utilization_2: pwr.perf_tgt_utilization, + base_pstate_scaled_4: base_ps_scaled, + unk_7fc: f32!(65536.0), + pwr_min_duty_cycle_2: pwr.pwr_min_duty_cycle.into(), + max_pstate_scaled_6: max_ps_scaled.into(), + max_freq_mhz: pwr.max_freq_mhz, + pwr_min_duty_cycle_3: pwr.pwr_min_duty_cycle, + min_pstate_scaled_4: f32!(100.0), + max_pstate_scaled_7: max_ps_scaled, + unk_alpha_neg: f32!(0.8), + unk_alpha: f32!(0.2), + fast_die0_sensor_mask: U64(self.cfg.fast_die0_sensor_mask), + fast_die0_release_temp_cc: 100 * pwr.fast_die0_release_temp, + unk_87c: self.cfg.da.unk_87c, + unk_880: 0x4, + unk_894: f32!(1.0), + + fast_die0_ki_dt: pwr.fast_die0_integral_gain * period_s, + unk_8a8: f32!(65536.0), + fast_die0_kp: pwr.fast_die0_proportional_gain, + pwr_min_duty_cycle_4: pwr.pwr_min_duty_cycle, + max_pstate_scaled_8: max_ps_scaled, + max_pstate_scaled_9: max_ps_scaled, + fast_die0_prop_tgt_delta: 100 * pwr.fast_die0_prop_tgt_delta, + unk_8cc: self.cfg.da.unk_8cc, + max_pstate_scaled_10: max_ps_scaled, + max_pstate_scaled_11: max_ps_scaled, + unk_c2c: 1, + power_zone_count: pwr.power_zones.len() as u32, + max_power_4: pwr.max_power_mw, + max_power_5: pwr.max_power_mw, + max_power_6: pwr.max_power_mw, + avg_power_target_filter_a_neg: f32!(1.0) - avg_power_target_filter_a, + avg_power_target_filter_a: avg_power_target_filter_a, + avg_power_target_filter_tc_x4: 4 * pwr.avg_power_target_filter_tc, + avg_power_target_filter_tc_xperiod: period_ms + * pwr.avg_power_target_filter_tc, + #[ver(V >= V13_0B4)] + avg_power_target_filter_tc_clks: period_ms + * pwr.avg_power_target_filter_tc + * base_clock_khz, + avg_power_filter_tc_periods_x4: 4 * avg_power_filter_tc_periods, + avg_power_filter_a_neg: f32!(1.0) - avg_power_filter_a, + avg_power_filter_a: avg_power_filter_a, + avg_power_ki_dt: pwr.avg_power_ki_only * period_s, + unk_d20: f32!(65536.0), + avg_power_kp: pwr.avg_power_kp, + avg_power_min_duty_cycle: pwr.avg_power_min_duty_cycle, + max_pstate_scaled_12: max_ps_scaled, + max_pstate_scaled_13: max_ps_scaled, + max_power_7: pwr.max_power_mw.into(), + max_power_8: pwr.max_power_mw, + avg_power_filter_tc_ms: pwr.avg_power_filter_tc_ms, + #[ver(V >= V13_0B4)] + avg_power_filter_tc_clks: avg_power_filter_tc_ms_rounded * base_clock_khz, + max_pstate_scaled_14: max_ps_scaled, + t81xx_data: match self.cfg.chip_id { + 0x8103 | 0x8112 => Self::t81xx_data(self.dyncfg), + _ => Default::default(), + }, + #[ver(V >= V13_0B4)] + unk_e10_0: raw::HwDataA130Extra { + unk_38: 4, + unk_3c: 8000, + unk_40: 2500, + unk_48: 0xffffffff, + unk_4c: 50, + unk_54: 50, + unk_58: 0x1, + unk_60: f32!(0.8888889), + unk_64: f32!(0.6666667), + unk_68: f32!(0.11111111), + unk_6c: f32!(0.33333333), + unk_70: f32!(-0.4), + unk_74: f32!(-0.8), + unk_7c: f32!(65536.0), + unk_80: f32!(-5.0), + unk_84: f32!(-10.0), + unk_8c: 40, + max_pstate_scaled_1: max_ps_scaled, + unk_9c: f32!(8000.0), + unk_a0: 1400, + unk_a8: 72, + unk_ac: 24, + unk_b0: 1728000, + unk_b8: 576000, + unk_c4: f32!(65536.0), + unk_114: f32!(65536.0), + unk_124: 40, + max_pstate_scaled_2: max_ps_scaled, + ..Default::default() + }, + fast_die0_sensor_mask_2: U64(self.cfg.fast_die0_sensor_mask), + unk_e24: self.cfg.da.unk_e24, + unk_e28: 1, + fast_die0_sensor_mask_alt: U64(self.cfg.fast_die0_sensor_mask_alt), + #[ver(V < V13_0B4)] + fast_die0_sensor_present: U64(self.cfg.fast_die0_sensor_present as u64), + unk_163c: 1, + unk_3644: 0, + hws1: Self::hw_shared1(self.cfg), + hws2: *Self::hw_shared2(self.cfg)?, + hws3: *Self::hw_shared3(self.cfg)?, + unk_3ce8: 1, + ..Default::default() + } + ); + + for i in 0..self.dyncfg.pwr.perf_states.len() { + raw.sram_k[i] = self.cfg.sram_k; + } + + for (i, coef) in pwr.core_leak_coef.iter().enumerate() { + raw.core_leak_coef[i] = *coef; + } + + for (i, coef) in pwr.sram_leak_coef.iter().enumerate() { + raw.sram_leak_coef[i] = *coef; + } + + for i in 0..self.dyncfg.id.num_clusters as usize { + if let Some(coef_a) = self.cfg.unk_coef_a.get(i) { + (*raw.unk_coef_a1[i])[..coef_a.len()].copy_from_slice(coef_a); + (*raw.unk_coef_a2[i])[..coef_a.len()].copy_from_slice(coef_a); + } + if let Some(coef_b) = self.cfg.unk_coef_b.get(i) { + (*raw.unk_coef_b1[i])[..coef_b.len()].copy_from_slice(coef_b); + (*raw.unk_coef_b2[i])[..coef_b.len()].copy_from_slice(coef_b); + } + } + + for (i, pz) in pwr.power_zones.iter().enumerate() { + raw.power_zones[i].target = pz.target; + raw.power_zones[i].target_off = pz.target - pz.target_offset; + raw.power_zones[i].filter_tc_x4 = 4 * pz.filter_tc; + raw.power_zones[i].filter_tc_xperiod = period_ms * pz.filter_tc; + let filter_a = f32!(1.0) / pz.filter_tc.into(); + raw.power_zones[i].filter_a = filter_a; + raw.power_zones[i].filter_a_neg = f32!(1.0) - filter_a; + #[ver(V >= V13_0B4)] + raw.power_zones[i].unk_10 = 1320000000; + } + + Ok(raw) + }) + } + + /// Create the HwDataB structure. This mostly contains GPU-related configuration. + #[inline(never)] + fn hwdata_b(&mut self) -> Result<GpuObject<HwDataB::ver>> { + self.alloc + .private + .new_inplace(Default::default(), |_inner, ptr| { + let raw = place!( + ptr, + raw::HwDataB::ver { + // Userspace VA map related + #[ver(V < V13_0B4)] + unk_0: U64(0x13_00000000), + unk_8: U64(0x14_00000000), + #[ver(V < V13_0B4)] + unk_10: U64(0x1_00000000), + unk_18: U64(0xffc00000), + unk_20: U64(0x11_00000000), + unk_28: U64(0x11_00000000), + // userspace address? + unk_30: U64(0x6f_ffff8000), + // unmapped? + unkptr_38: U64(0xffffffa0_11800000), + // TODO: yuv matrices + chip_id: self.cfg.chip_id, + unk_454: 0x1, + unk_458: 0x1, + unk_460: 0x1, + unk_464: 0x1, + unk_468: 0x1, + unk_47c: 0x1, + unk_484: 0x1, + unk_48c: 0x1, + base_clock_khz: self.cfg.base_clock_hz / 1000, + power_sample_period: self.dyncfg.pwr.power_sample_period, + unk_49c: 0x1, + unk_4a0: 0x1, + unk_4a4: 0x1, + unk_4c0: 0x1f, + unk_4e0: U64(self.cfg.db.unk_4e0), + unk_4f0: 0x1, + unk_4f4: 0x1, + unk_504: 0x31, + unk_524: 0x1, // use_secure_cache_flush + unk_534: self.cfg.db.unk_534, + num_frags: self.dyncfg.id.num_frags * self.dyncfg.id.num_clusters, + unk_554: 0x1, + uat_ttb_base: U64(self.dyncfg.uat_ttb_base), + gpu_core_id: self.cfg.gpu_core as u32, + gpu_rev_id: self.dyncfg.id.gpu_rev_id as u32, + num_cores: self.dyncfg.id.num_cores * self.dyncfg.id.num_clusters, + max_pstate: self.dyncfg.pwr.perf_states.len() as u32 - 1, + #[ver(V < V13_0B4)] + num_pstates: self.dyncfg.pwr.perf_states.len() as u32, + #[ver(V < V13_0B4)] + min_sram_volt: self.dyncfg.pwr.min_sram_microvolt / 1000, + #[ver(V < V13_0B4)] + unk_ab8: self.cfg.db.unk_ab8, + #[ver(V < V13_0B4)] + unk_abc: self.cfg.db.unk_abc, + #[ver(V < V13_0B4)] + unk_ac0: 0x1020, + + #[ver(V >= V13_0B4)] + unk_ae4: Array::new([0x0, 0x3, 0x7, 0x7]), + #[ver(V < V13_0B4)] + unk_ae4: Array::new([0x0, 0xf, 0x3f, 0x3f]), + unk_b10: 0x1, + unk_b24: 0x1, + unk_b28: 0x1, + unk_b2c: 0x1, + unk_b30: self.cfg.db.unk_b30, + #[ver(V >= V13_0B4)] + unk_b38_0: 1, + #[ver(V >= V13_0B4)] + unk_b38_4: 1, + unk_b38: Array::new([0xffffffff; 12]), + #[ver(V >= V13_0B4)] + unk_c3c: 0x19, + ..Default::default() + } + ); + + let base_ps = self.dyncfg.pwr.perf_base_pstate as usize; + let max_ps = self.dyncfg.pwr.perf_max_pstate as usize; + let base_freq = self.dyncfg.pwr.perf_states[base_ps].freq_hz; + let max_freq = self.dyncfg.pwr.perf_states[max_ps].freq_hz; + + for (i, ps) in self.dyncfg.pwr.perf_states.iter().enumerate() { + raw.frequencies[i] = ps.freq_hz / 1000000; + for (j, mv) in ps.volt_mv.iter().enumerate() { + let sram_mv = (*mv).max(self.dyncfg.pwr.min_sram_microvolt / 1000); + raw.voltages[i][j] = *mv; + raw.voltages_sram[i][j] = sram_mv; + } + raw.sram_k[i] = self.cfg.sram_k; + raw.rel_max_powers[i] = ps.pwr_mw * 100 / self.dyncfg.pwr.max_power_mw; + raw.rel_boost_freqs[i] = if i > base_ps { + (ps.freq_hz - base_freq) / ((max_freq - base_freq) / 100) + } else { + 0 + }; + } + + Ok(raw) + }) + } + + /// Create the Globals structure, which contains global firmware config including more power + /// configuration data and globals used to exchange state between the firmware and driver. + #[inline(never)] + fn globals(&mut self) -> Result<GpuObject<Globals::ver>> { + self.alloc + .shared + .new_inplace(Default::default(), |_inner, ptr| { + let pwr = &self.dyncfg.pwr; + let period_ms = pwr.power_sample_period; + let period_s = F32::from(period_ms) / f32!(1000.0); + let avg_power_filter_tc_periods = pwr.avg_power_filter_tc_ms / period_ms; + + let max_ps = pwr.perf_max_pstate; + let max_ps_scaled = 100 * max_ps; + + let raw = place!( + ptr, + raw::Globals::ver { + //ktrace_enable: 0xffffffff, + ktrace_enable: 0, + #[ver(V >= V13_2)] + unk_24_0: 3000, + unk_24: 0, + #[ver(V >= V13_0B4)] + unk_28_0: 0, // debug + unk_28: 1, + #[ver(V >= V13_0B4)] + unk_2c_0: 0, + unk_2c: 1, + unk_30: 0, + unk_34: 120, + sub: raw::GlobalsSub::ver { + unk_54: 0xffff, + unk_56: 40, + unk_58: 0xffff, + unk_5e: U32(1), + unk_66: U32(1), + ..Default::default() + }, + unk_8900: 1, + pending_submissions: AtomicU32::new(0), + max_power: pwr.max_power_mw, + max_pstate_scaled: max_ps_scaled, + max_pstate_scaled_2: max_ps_scaled, + max_pstate_scaled_3: max_ps_scaled, + power_zone_count: pwr.power_zones.len() as u32, + avg_power_filter_tc_periods: avg_power_filter_tc_periods, + avg_power_ki_dt: pwr.avg_power_ki_only * period_s, + avg_power_kp: pwr.avg_power_kp, + avg_power_min_duty_cycle: pwr.avg_power_min_duty_cycle, + avg_power_target_filter_tc: pwr.avg_power_target_filter_tc, + unk_89bc: self.cfg.da.unk_8cc, + fast_die0_release_temp: 100 * pwr.fast_die0_release_temp, + unk_89c4: self.cfg.da.unk_87c, + fast_die0_prop_tgt_delta: 100 * pwr.fast_die0_prop_tgt_delta, + fast_die0_kp: pwr.fast_die0_proportional_gain, + fast_die0_ki_dt: pwr.fast_die0_integral_gain * period_s, + unk_89e0: 1, + max_power_2: pwr.max_power_mw, + ppm_kp: pwr.ppm_kp, + ppm_ki_dt: pwr.ppm_ki * period_s, + #[ver(V >= V13_0B4)] + unk_89f4_8: 1, + unk_89f4: 0, + hws1: Self::hw_shared1(self.cfg), + hws2: *Self::hw_shared2(self.cfg)?, + hws3: *Self::hw_shared3(self.cfg)?, + unk_900c: 1, + #[ver(V >= V13_0B4)] + unk_9010_0: 1, + #[ver(V >= V13_0B4)] + unk_903c: 1, + #[ver(V < V13_0B4)] + unk_903c: 0, + fault_control: *crate::fault_control.read(), + do_init: 1, + unk_11020: 40, + unk_11024: 10, + unk_11028: 250, + #[ver(V >= V13_0B4)] + unk_1102c_0: 1, + #[ver(V >= V13_0B4)] + unk_1102c_4: 1, + #[ver(V >= V13_0B4)] + unk_1102c_8: 100, + #[ver(V >= V13_0B4)] + unk_1102c_c: 1, + idle_off_delay_ms: AtomicU32::new(pwr.idle_off_delay_ms), + fender_idle_off_delay_ms: pwr.fender_idle_off_delay_ms, + fw_early_wake_timeout_ms: pwr.fw_early_wake_timeout_ms, + unk_118e0: 40, + #[ver(V >= V13_0B4)] + unk_118e4_0: 50, + #[ver(V >= V13_0B4)] + unk_11edc: 0, + #[ver(V >= V13_0B4)] + unk_11efc: 0, + ..Default::default() + } + ); + + for (i, pz) in pwr.power_zones.iter().enumerate() { + raw.power_zones[i].target = pz.target; + raw.power_zones[i].target_off = pz.target - pz.target_offset; + raw.power_zones[i].filter_tc = pz.filter_tc; + } + + if let Some(tab) = self.cfg.global_tab.as_ref() { + for (i, x) in tab.iter().enumerate() { + raw.unk_118ec[i] = *x; + } + raw.unk_118e8 = 1; + } + + Ok(raw) + }) + } + + /// Create the RuntimePointers structure, which contains pointers to most of the other + /// structures including the ring buffer channels, statistics structures, and HwDataA/HwDataB. + #[inline(never)] + fn runtime_pointers(&mut self) -> Result<GpuObject<RuntimePointers::ver>> { + let hwa = self.hwdata_a()?; + let hwb = self.hwdata_b()?; + + let pointers: Box<RuntimePointers::ver> = box_in_place!(RuntimePointers::ver { + stats: Stats::ver { + vtx: self.alloc.private.new_default::<GpuGlobalStatsVtx::ver>()?, + frag: self.alloc.private.new_inplace( + Default::default(), + |_inner, ptr: &mut MaybeUninit<raw::GpuGlobalStatsFrag::ver>| { + Ok(place!( + ptr, + raw::GpuGlobalStatsFrag::ver { + stats: raw::GpuStatsFrag::ver { + cur_stamp_id: -1, + unk_118: -1, + ..Default::default() + }, + ..Default::default() + } + )) + }, + )?, + comp: self.alloc.private.new_default::<GpuStatsComp>()?, + }, + + hwdata_a: hwa, + unkptr_190: self.alloc.private.array_empty(0x80)?, + unkptr_198: self.alloc.private.array_empty(0xc0)?, + hwdata_b: hwb, + + unkptr_1b8: self.alloc.private.array_empty(0x1000)?, + unkptr_1c0: self.alloc.private.array_empty(0x300)?, + unkptr_1c8: self.alloc.private.array_empty(0x1000)?, + + buffer_mgr_ctl: self.alloc.gpu.array_empty(127)?, + })?; + + self.alloc.private.new_boxed(pointers, |inner, ptr| { + Ok(place!( + ptr, + raw::RuntimePointers::ver { + pipes: Default::default(), + device_control: Default::default(), + event: Default::default(), + fw_log: Default::default(), + ktrace: Default::default(), + stats: Default::default(), + + stats_vtx: inner.stats.vtx.gpu_pointer(), + stats_frag: inner.stats.frag.gpu_pointer(), + stats_comp: inner.stats.comp.gpu_pointer(), + + hwdata_a: inner.hwdata_a.gpu_pointer(), + unkptr_190: inner.unkptr_190.gpu_pointer(), + unkptr_198: inner.unkptr_198.gpu_pointer(), + hwdata_b: inner.hwdata_b.gpu_pointer(), + hwdata_b_2: inner.hwdata_b.gpu_pointer(), + + fwlog_buf: None, + + unkptr_1b8: inner.unkptr_1b8.gpu_pointer(), + unkptr_1c0: inner.unkptr_1c0.gpu_pointer(), + unkptr_1c8: inner.unkptr_1c8.gpu_pointer(), + + buffer_mgr_ctl: inner.buffer_mgr_ctl.gpu_pointer(), + buffer_mgr_ctl_2: inner.buffer_mgr_ctl.gpu_pointer(), + + __pad0: Default::default(), + unk_160: U64(0), + unk_168: U64(0), + unk_1d0: 0, + unk_1d4: 0, + unk_1d8: Default::default(), + + __pad1: Default::default(), + gpu_scratch: raw::RuntimeScratch { + unk_6b38: 0xff, + ..Default::default() + }, + } + )) + }) + } + + /// Create the FwStatus structure, which is used to coordinate the firmware halt state between + /// the firmware and the driver. + #[inline(never)] + fn fw_status(&mut self) -> Result<GpuObject<FwStatus>> { + self.alloc + .shared + .new_object(Default::default(), |_inner| Default::default()) + } + + /// Create one UatLevelInfo structure, which describes one level of translation for the UAT MMU. + #[inline(never)] + fn uat_level_info( + cfg: &hw::HwConfig, + index_shift: usize, + num_entries: usize, + ) -> raw::UatLevelInfo { + raw::UatLevelInfo { + index_shift: index_shift as _, + unk_1: 14, + unk_2: 14, + unk_3: 8, + unk_4: 0x4000, + num_entries: num_entries as _, + unk_8: U64(1), + unk_10: U64(((1u64 << cfg.uat_oas) - 1) & !(mmu::UAT_PGMSK as u64)), + index_mask: U64(((num_entries - 1) << index_shift) as u64), + } + } + + /// Build the top-level InitData object. + #[inline(never)] + pub(crate) fn build(&mut self) -> Result<Box<GpuObject<InitData::ver>>> { + let inner: Box<InitData::ver> = box_in_place!(InitData::ver { + unk_buf: self.alloc.shared_ro.array_empty(0x4000)?, + runtime_pointers: self.runtime_pointers()?, + globals: self.globals()?, + fw_status: self.fw_status()?, + })?; + + Ok(Box::try_new(self.alloc.shared_ro.new_boxed( + inner, + |inner, ptr| { + Ok(place!( + ptr, + raw::InitData::ver { + #[ver(V >= V13_0B4)] + ver_info: Array::new([1, 1, 16, 1]), + unk_buf: inner.unk_buf.gpu_pointer(), + unk_8: 0, + unk_c: 0, + runtime_pointers: inner.runtime_pointers.gpu_pointer(), + globals: inner.globals.gpu_pointer(), + fw_status: inner.fw_status.gpu_pointer(), + uat_page_size: 0x4000, + uat_page_bits: 14, + uat_num_levels: 3, + uat_level_info: Array::new([ + Self::uat_level_info(self.cfg, 36, 8), + Self::uat_level_info(self.cfg, 25, 2048), + Self::uat_level_info(self.cfg, 14, 2048), + ]), + __pad0: Default::default(), + host_mapped_fw_allocations: 1, + unk_ac: 0, + unk_b0: 0, + unk_b4: 0, + unk_b8: 0, + } + )) + }, + )?)?) + } +} diff --git a/drivers/gpu/drm/asahi/mem.rs b/drivers/gpu/drm/asahi/mem.rs new file mode 100644 index 000000000000..491d4f8a4016 --- /dev/null +++ b/drivers/gpu/drm/asahi/mem.rs @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! ARM64 low level memory operations. +//! +//! This GPU uses CPU-side `tlbi` outer-shareable instructions to manage its TLBs. +//! Yes, really. Even though the VA address spaces are unrelated. +//! +//! Right now we pick our own ASIDs and don't coordinate with the CPU. This might result +//! in needless TLB shootdowns on the CPU side... TODO: fix this. + +use core::arch::asm; +use core::cmp::min; + +use crate::debug::*; +use crate::mmu; + +type Asid = u8; + +/// Invalidate the entire GPU TLB. +#[inline(always)] +pub(crate) fn tlbi_all() { + unsafe { + asm!(".arch armv8.4-a", "tlbi vmalle1os",); + } +} + +/// Invalidate all TLB entries for a given ASID. +#[inline(always)] +pub(crate) fn tlbi_asid(asid: Asid) { + if debug_enabled(DebugFlags::ConservativeTlbi) { + tlbi_all(); + sync(); + return; + } + + unsafe { + asm!( + ".arch armv8.4-a", + "tlbi aside1os, {x}", + x = in(reg) ((asid as u64) << 48) + ); + } +} + +/// Invalidate a single page for a given ASID. +#[inline(always)] +pub(crate) fn tlbi_page(asid: Asid, va: usize) { + if debug_enabled(DebugFlags::ConservativeTlbi) { + tlbi_all(); + sync(); + return; + } + + let val: u64 = ((asid as u64) << 48) | ((va as u64 >> 12) & 0xffffffffffc); + unsafe { + asm!( + ".arch armv8.4-a", + "tlbi vae1os, {x}", + x = in(reg) val + ); + } +} + +/// Invalidate a range of pages for a given ASID. +#[inline(always)] +pub(crate) fn tlbi_range(asid: Asid, va: usize, len: usize) { + if debug_enabled(DebugFlags::ConservativeTlbi) { + tlbi_all(); + sync(); + return; + } + + if len == 0 { + return; + } + + let start_pg = va >> mmu::UAT_PGBIT; + let end_pg = (va + len + mmu::UAT_PGMSK) >> mmu::UAT_PGBIT; + + let mut val: u64 = ((asid as u64) << 48) | (2 << 46) | (start_pg as u64 & 0x1fffffffff); + let pages = end_pg - start_pg; + + if pages == 1 { + tlbi_page(asid, va); + return; + } + + // Page count is always in units of 2 + let num = ((pages + 1) >> 1) as u64; + // base: 5 bits + // exp: 2 bits + // pages = (base + 1) << (5 * exp + 1) + // 0:00000 -> 2 pages = 2 << 0 + // 0:11111 -> 32 * 2 pages = 2 << 5 + // 1:00000 -> 1 * 32 * 2 pages = 2 << 5 + // 1:11111 -> 32 * 32 * 2 pages = 2 << 10 + // 2:00000 -> 1 * 32 * 32 * 2 pages = 2 << 10 + // 2:11111 -> 32 * 32 * 32 * 2 pages = 2 << 15 + // 3:00000 -> 1 * 32 * 32 * 32 * 2 pages = 2 << 15 + // 3:11111 -> 32 * 32 * 32 * 32 * 2 pages = 2 << 20 + let exp = min(3, (64 - num.leading_zeros()) / 5); + let bits = 5 * exp; + let mut base = (num + (1 << bits) - 1) >> bits; + + val |= (exp as u64) << 44; + + while base > 32 { + unsafe { + asm!( + ".arch armv8.4-a", + "tlbi rvae1os, {x}", + x = in(reg) val | (31 << 39) + ); + } + base -= 32; + } + + unsafe { + asm!( + ".arch armv8.4-a", + "tlbi rvae1os, {x}", + x = in(reg) val | ((base - 1) << 39) + ); + } +} + +/// Issue a memory barrier (`dsb sy`). +#[inline(always)] +pub(crate) fn sync() { + unsafe { + asm!("dsb sy"); + } +} diff --git a/drivers/gpu/drm/asahi/microseq.rs b/drivers/gpu/drm/asahi/microseq.rs new file mode 100644 index 000000000000..dca94ebc53a1 --- /dev/null +++ b/drivers/gpu/drm/asahi/microseq.rs @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU Micro operation sequence builder +//! +//! As part of a single job submisssion to the GPU, the GPU firmware interprets a sequence of +//! commands that we call a "microsequence". These are responsible for setting up the job execution, +//! timestamping the process, waiting for completion, tearing up any resources, and signaling +//! completion to the driver via the event stamp mechanism. +//! +//! Although the microsequences used by the macOS driver are usually quite uniform and simple, the +//! firmware actually implements enough operations to make this interpreter Turing-complete (!). +//! Most of those aren't implemented yet, since we don't need them, but they could come in handy in +//! the future to do strange things or work around firmware bugs... +//! +//! This module simply implements a collection of microsequence operations that can be appended to +//! and later concatenated into one buffer, ready for firmware execution. + +use crate::fw::microseq; +pub(crate) use crate::fw::microseq::*; +use crate::fw::types::*; +use kernel::prelude::*; + +/// MicroSequence object type, which is just an opaque byte array. +pub(crate) type MicroSequence = GpuArray<u8>; + +/// MicroSequence builder. +pub(crate) struct Builder { + ops: Vec<u8>, +} + +impl Builder { + /// Create a new Builder object + pub(crate) fn new() -> Builder { + Builder { ops: Vec::new() } + } + + /// Get the relative offset from the current pointer to a given target offset. + /// + /// Used for relative jumps. + pub(crate) fn offset_to(&self, target: i32) -> i32 { + target - self.ops.len() as i32 + } + + /// Add an operation to the end of the sequence. + pub(crate) fn add<T: microseq::Operation>(&mut self, op: T) -> Result<i32> { + let off = self.ops.len(); + let p: *const T = &op; + let p: *const u8 = p as *const u8; + let s: &[u8] = unsafe { core::slice::from_raw_parts(p, core::mem::size_of::<T>()) }; + self.ops.try_extend_from_slice(s)?; + Ok(off as i32) + } + + /// Collect all submitted operations into a finalized GPU object. + pub(crate) fn build(self, alloc: &mut Allocator) -> Result<MicroSequence> { + let mut array = alloc.array_empty::<u8>(self.ops.len())?; + + array.as_mut_slice().clone_from_slice(self.ops.as_slice()); + Ok(array) + } +} diff --git a/drivers/gpu/drm/asahi/mmu.rs b/drivers/gpu/drm/asahi/mmu.rs new file mode 100644 index 000000000000..226ca0b7c1d7 --- /dev/null +++ b/drivers/gpu/drm/asahi/mmu.rs @@ -0,0 +1,1249 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU UAT (MMU) management +//! +//! AGX GPUs use an MMU called the UAT, which is largely compatible with the ARM64 page table +//! format. This module manages the global MMU structures, including a shared handoff structure +//! that is used to coordinate VM management operations with the firmware, the TTBAT which points +//! to currently active GPU VM contexts, as well as the individual `Vm` operations to map and +//! unmap buffer objects into a single user or kernel address space. +//! +//! The actual page table management is delegated to the common kernel `io_pgtable` code. + +use core::fmt::Debug; +use core::mem::size_of; +use core::ptr::{addr_of_mut, NonNull}; +use core::sync::atomic::{fence, AtomicU32, AtomicU64, AtomicU8, Ordering}; +use core::time::Duration; + +use kernel::{ + bindings, c_str, delay, device, + drm::mm, + error::{to_result, Result}, + io_pgtable, + io_pgtable::{prot, AppleUAT, IoPageTable}, + prelude::*, + sync::{smutex::Mutex, Guard}, + sync::{Arc, LockClassKey, UniqueArc}, + time, + types::ForeignOwnable, +}; + +use crate::debug::*; +use crate::no_debug; +use crate::{driver, fw, gem, hw, mem, slotalloc}; + +const DEBUG_CLASS: DebugFlags = DebugFlags::Mmu; + +/// PPL magic number for the handoff region +const PPL_MAGIC: u64 = 0x4b1d000000000002; + +/// Number of supported context entries in the TTBAT +const UAT_NUM_CTX: usize = 64; +/// First context available for users +const UAT_USER_CTX_START: usize = 1; +/// Number of available user contexts +const UAT_USER_CTX: usize = UAT_NUM_CTX - UAT_USER_CTX_START; + +/// Number of bits in a page offset. +pub(crate) const UAT_PGBIT: usize = 14; +/// UAT page size. +pub(crate) const UAT_PGSZ: usize = 1 << UAT_PGBIT; +/// UAT page offset mask. +pub(crate) const UAT_PGMSK: usize = UAT_PGSZ - 1; + +type Pte = AtomicU64; + +/// Number of PTEs per page. +const UAT_NPTE: usize = UAT_PGSZ / size_of::<Pte>(); + +/// UAT input address space (user) +pub(crate) const UAT_IAS: usize = 39; +/// "Fake" kernel UAT input address space (one page level lower) +pub(crate) const UAT_IAS_KERN: usize = 36; + +/// Lower/user base VA +const IOVA_USER_BASE: usize = UAT_PGSZ; +/// Lower/user top VA +const IOVA_USER_TOP: usize = (1 << UAT_IAS) - 1; +/// Upper/kernel base VA +// const IOVA_TTBR1_BASE: usize = 0xffffff8000000000; +/// Driver-managed kernel base VA +const IOVA_KERN_BASE: usize = 0xffffffa000000000; +/// Driver-managed kernel top VA +const IOVA_KERN_TOP: usize = 0xffffffafffffffff; + +const TTBR_VALID: u64 = 0x1; // BIT(0) +const TTBR_ASID_SHIFT: usize = 48; + +const PTE_TABLE: u64 = 0x3; // BIT(0) | BIT(1) + +// Mapping protection types + +// Note: prot::CACHE means "cache coherency", which for UAT means *uncached*, +// since uncached mappings from the GFX ASC side are cache coherent with the AP cache. +// Not having that flag means *cached noncoherent*. + +/// Firmware MMIO R/W +pub(crate) const PROT_FW_MMIO_RW: u32 = + prot::PRIV | prot::READ | prot::WRITE | prot::CACHE | prot::MMIO; +/// Firmware MMIO R/O +pub(crate) const PROT_FW_MMIO_RO: u32 = prot::PRIV | prot::READ | prot::CACHE | prot::MMIO; +/// Firmware shared (uncached) RW +pub(crate) const PROT_FW_SHARED_RW: u32 = prot::PRIV | prot::READ | prot::WRITE | prot::CACHE; +/// Firmware shared (uncached) RO +pub(crate) const PROT_FW_SHARED_RO: u32 = prot::PRIV | prot::READ | prot::CACHE; +/// Firmware private (cached) RW +pub(crate) const PROT_FW_PRIV_RW: u32 = prot::PRIV | prot::READ | prot::WRITE; +/* +/// Firmware private (cached) RO +pub(crate) const PROT_FW_PRIV_RO: u32 = prot::PRIV | prot::READ; +*/ +/// Firmware/GPU shared (uncached) RW +pub(crate) const PROT_GPU_FW_SHARED_RW: u32 = prot::READ | prot::WRITE | prot::CACHE; +/// Firmware/GPU shared (private) RW +pub(crate) const PROT_GPU_FW_PRIV_RW: u32 = prot::READ | prot::WRITE; +/// GPU shared/coherent RW +pub(crate) const PROT_GPU_SHARED_RW: u32 = prot::READ | prot::WRITE | prot::CACHE | prot::NOEXEC; +/// GPU shared/coherent RO +pub(crate) const PROT_GPU_SHARED_RO: u32 = prot::READ | prot::CACHE | prot::NOEXEC; +/// GPU shared/coherent WO +pub(crate) const PROT_GPU_SHARED_WO: u32 = prot::WRITE | prot::CACHE | prot::NOEXEC; +/* +/// GPU private/noncoherent RW +pub(crate) const PROT_GPU_PRIV_RW: u32 = prot::READ | prot::WRITE | prot::NOEXEC; +/// GPU private/noncoherent RO +pub(crate) const PROT_GPU_PRIV_RO: u32 = prot::READ | prot::NOEXEC; +*/ + +type PhysAddr = bindings::phys_addr_t; + +/// A pre-allocated memory region for UAT management +struct UatRegion { + base: PhysAddr, + map: NonNull<core::ffi::c_void>, +} + +/// It's safe to share UAT region records across threads. +unsafe impl Send for UatRegion {} +unsafe impl Sync for UatRegion {} + +/// Handoff region flush info structure +#[repr(C)] +struct FlushInfo { + state: AtomicU64, + addr: AtomicU64, + size: AtomicU64, +} + +/// UAT Handoff region layout +#[repr(C)] +struct Handoff { + magic_ap: AtomicU64, + magic_fw: AtomicU64, + + lock_ap: AtomicU8, + lock_fw: AtomicU8, + // Implicit padding: 2 bytes + turn: AtomicU32, + cur_slot: AtomicU32, + // Implicit padding: 4 bytes + flush: [FlushInfo; UAT_NUM_CTX + 1], + + unk2: AtomicU8, + // Implicit padding: 7 bytes + unk3: AtomicU64, +} + +const HANDOFF_SIZE: usize = size_of::<Handoff>(); + +/// One VM slot in the TTBAT +#[repr(C)] +struct SlotTTBS { + ttb0: AtomicU64, + ttb1: AtomicU64, +} + +const SLOTS_SIZE: usize = UAT_NUM_CTX * size_of::<SlotTTBS>(); + +// We need at least page 0 (ttb0) +const PAGETABLES_SIZE: usize = UAT_PGSZ; + +/// Inner data for a Vm instance. This is reference-counted by the outer Vm object. +struct VmInner { + dev: driver::AsahiDevice, + is_kernel: bool, + min_va: usize, + max_va: usize, + page_table: AppleUAT<Uat>, + mm: mm::Allocator<(), MappingInner>, + uat_inner: Arc<UatInner>, + active_users: usize, + binding: Option<slotalloc::Guard<SlotInner>>, + bind_token: Option<slotalloc::SlotToken>, + id: u64, +} + +impl VmInner { + /// Returns the slot index, if this VM is bound. + fn slot(&self) -> Option<u32> { + if self.is_kernel { + // The GFX ASC does not care about the ASID. Pick an arbitrary one. + // TODO: This needs to be a persistently reserved ASID once we integrate + // with the ARM64 kernel ASID machinery to avoid overlap. + Some(0) + } else { + // We don't check whether we lost the slot, which could cause unnecessary + // invalidations against another Vm. However, this situation should be very + // rare (e.g. a Vm lost its slot, which means 63 other Vms bound in the + // interim, and then it gets killed / drops its mappings without doing any + // final rendering). Anything doing active maps/unmaps is probably also + // rendering and therefore likely bound. + self.bind_token + .as_ref() + .map(|token| (token.last_slot() + UAT_USER_CTX_START as u32)) + } + } + + /// Returns the translation table base for this Vm + fn ttb(&self) -> u64 { + self.page_table.cfg().ttbr + } + + /// Map an IOVA to the shifted address the underlying io_pgtable uses. + fn map_iova(&self, iova: usize, size: usize) -> Result<usize> { + if iova < self.min_va || (iova + size - 1) > self.max_va { + Err(EINVAL) + } else if self.is_kernel { + Ok(iova - self.min_va) + } else { + Ok(iova) + } + } + + /// Map a contiguous range of virtual->physical pages. + fn map_pages( + &mut self, + mut iova: usize, + mut paddr: usize, + pgsize: usize, + pgcount: usize, + prot: u32, + ) -> Result<usize> { + let mut left = pgcount; + while left > 0 { + let mapped_iova = self.map_iova(iova, pgsize * left)?; + let mapped = self + .page_table + .map_pages(mapped_iova, paddr, pgsize, left, prot)?; + assert!(mapped <= left * pgsize); + + left -= mapped / pgsize; + paddr += mapped; + iova += mapped; + } + Ok(pgcount * pgsize) + } + + /// Unmap a contiguous range of pages. + fn unmap_pages(&mut self, mut iova: usize, pgsize: usize, pgcount: usize) -> Result<usize> { + let mut left = pgcount; + while left > 0 { + let mapped_iova = self.map_iova(iova, pgsize * left)?; + let unmapped = self.page_table.unmap_pages(mapped_iova, pgsize, left); + assert!(unmapped <= left * pgsize); + + left -= unmapped / pgsize; + iova += unmapped; + } + + Ok(pgcount * pgsize) + } + + /// Map an `mm::Node` representing an mapping in VA space. + fn map_node(&mut self, node: &mm::Node<(), MappingInner>, prot: u32) -> Result { + let mut iova = node.start() as usize; + let sgt = node.sgt.as_ref().ok_or(EINVAL)?; + + for range in sgt.iter() { + let addr = range.dma_address(); + let len = range.dma_len(); + + if (addr | len | iova) & UAT_PGMSK != 0 { + dev_err!( + self.dev, + "MMU: Mapping {:#x}:{:#x} -> {:#x} is not page-aligned\n", + addr, + len, + iova + ); + return Err(EINVAL); + } + + mod_dev_dbg!( + self.dev, + "MMU: map: {:#x}:{:#x} -> {:#x}\n", + addr, + len, + iova + ); + + self.map_pages(iova, addr, UAT_PGSZ, len >> UAT_PGBIT, prot)?; + + iova += len; + } + Ok(()) + } +} + +/// Shared reference to a virtual memory address space ([`Vm`]). +#[derive(Clone)] +pub(crate) struct Vm { + id: u64, + file_id: u64, + inner: Arc<Mutex<VmInner>>, +} +no_debug!(Vm); + +/// Slot data for a [`Vm`] slot (nothing, we only care about the indices). +pub(crate) struct SlotInner(); + +impl slotalloc::SlotItem for SlotInner { + type Data = (); +} + +/// Represents a single user of a binding of a [`Vm`] to a slot. +/// +/// The number of users is counted, and the slot will be freed when it drops to 0. +#[derive(Debug)] +pub(crate) struct VmBind(Vm, u32); + +impl VmBind { + /// Returns the slot that this `Vm` is bound to. + pub(crate) fn slot(&self) -> u32 { + self.1 + } +} + +impl Drop for VmBind { + fn drop(&mut self) { + let mut inner = self.0.inner.lock(); + + assert_ne!(inner.active_users, 0); + inner.active_users -= 1; + mod_pr_debug!("MMU: slot {} active users {}\n", self.1, inner.active_users); + if inner.active_users == 0 { + inner.binding = None; + } + } +} + +impl Clone for VmBind { + fn clone(&self) -> VmBind { + let mut inner = self.0.inner.lock(); + + inner.active_users += 1; + mod_pr_debug!("MMU: slot {} active users {}\n", self.1, inner.active_users); + VmBind(self.0.clone(), self.1) + } +} + +/// Inner data required for an object mapping into a [`Vm`]. +pub(crate) struct MappingInner { + owner: Arc<Mutex<VmInner>>, + uat_inner: Arc<UatInner>, + prot: u32, + mapped_size: usize, + sgt: Option<gem::SGTable>, +} + +/// An object mapping into a [`Vm`], which reserves the address range from use by other mappings. +pub(crate) struct Mapping(mm::Node<(), MappingInner>); + +impl Mapping { + /// Returns the IOVA base of this mapping + pub(crate) fn iova(&self) -> usize { + self.0.start() as usize + } + + /// Returns the size of this mapping in bytes + pub(crate) fn size(&self) -> usize { + self.0.mapped_size + } + + /// Remap a cached mapping as uncached, then synchronously flush that range of VAs from the + /// coprocessor cache. This is required to safely unmap cached/private mappings. + fn remap_uncached_and_flush(&mut self) { + let mut owner = self.0.owner.lock(); + mod_dev_dbg!( + owner.dev, + "MMU: remap as uncached {:#x}:{:#x}\n", + self.iova(), + self.size() + ); + + // The IOMMU API does not allow us to remap things in-place... + // just do an unmap and map again for now. + // Do not try to unmap guard page (-1) + if owner + .unmap_pages(self.iova(), UAT_PGSZ, self.size() >> UAT_PGBIT) + .is_err() + { + dev_err!( + owner.dev, + "MMU: unmap for remap {:#x}:{:#x} failed\n", + self.iova(), + self.size() + ); + } + + let prot = self.0.prot | prot::CACHE; + if owner.map_node(&self.0, prot).is_err() { + dev_err!( + owner.dev, + "MMU: remap {:#x}:{:#x} failed\n", + self.iova(), + self.size() + ); + } + + // If we don't have (and have never had) a VM slot, just return + let slot = match owner.slot() { + None => return, + Some(slot) => slot, + }; + + let flush_slot = if owner.is_kernel { + // If this is a kernel mapping, always flush on index 64 + UAT_NUM_CTX as u32 + } else { + // Otherwise, check if this slot is the active one, otherwise return + // Also check that we actually own this slot + let ttb = owner.ttb() | TTBR_VALID | (slot as u64) << TTBR_ASID_SHIFT; + + let uat_inner = self.0.uat_inner.lock(); + uat_inner.handoff().lock(); + let cur_slot = uat_inner.handoff().current_slot(); + let ttb_cur = uat_inner.ttbs()[slot as usize].ttb0.load(Ordering::Relaxed); + uat_inner.handoff().unlock(); + if cur_slot == Some(slot) && ttb_cur == ttb { + slot + } else { + return; + } + }; + + // FIXME: There is a race here, though it'll probably never happen in practice. + // In theory, it's possible for the ASC to finish using our slot, whatever command + // it was processing to complete, the slot to be lost to another context, and the ASC + // to begin using it again with a different page table, thus faulting when it gets a + // flush request here. In practice, the chance of this happening is probably vanishingly + // small, as all 62 other slots would have to be recycled or in use before that slot can + // be reused, and the ASC using user contexts at all is very rare. + + // Still, the locking around UAT/Handoff/TTBs should probably be redesigned to better + // model the interactions with the firmware and avoid these races. + // Possibly TTB changes should be tied to slot locks: + + // Flush: + // - Can early check handoff here (no need to lock). + // If user slot and it doesn't match the active ASC slot, + // we can elide the flush as the ASC guarantees it flushes + // TLBs/caches when it switches context. We just need a + // barrier to ensure ordering. + // - Lock TTB slot + // - If user ctx: + // - Lock handoff AP-side + // - Lock handoff dekker + // - Check TTB & handoff cur ctx + // - Perform flush if necessary + // - This implies taking the fwring lock + // + // TTB change: + // - lock TTB slot + // - lock handoff AP-side + // - lock handoff dekker + // change TTB + + // Lock this flush slot, and write the range to it + let flush = self.0.uat_inner.lock_flush(flush_slot); + let pages = self.size() >> UAT_PGBIT; + flush.begin_flush(self.iova() as u64, self.size() as u64); + if pages >= 0x10000 { + dev_err!(owner.dev, "MMU: Flush too big ({:#x} pages))\n", pages); + } + + let cmd = fw::channels::FwCtlMsg { + addr: fw::types::U64(self.iova() as u64), + unk_8: 0, + slot: flush_slot, + page_count: pages as u16, + unk_12: 2, // ? + }; + + // Tell the firmware to do a cache flush + if let Err(e) = owner.dev.data().gpu.fwctl(cmd) { + dev_err!( + owner.dev, + "MMU: ASC cache flush {:#x}:{:#x} failed (err: {:?})\n", + self.iova(), + self.size(), + e + ); + } + + // Finish the flush + flush.end_flush(); + + // Slot is unlocked here + } +} + +impl Drop for Mapping { + fn drop(&mut self) { + // This is the main unmap function for UAT mappings. + // The sequence of operations here is finicky, due to the interaction + // between cached GFX ASC mappings and the page tables. These mappings + // always have to be flushed from the cache before being unmapped. + + // For uncached mappings, just unmapping and flushing the TLB is sufficient. + + // For cached mappings, this is the required sequence: + // 1. Remap it as uncached + // 2. Flush the TLB range + // 3. If kernel VA mapping OR user VA mapping and handoff.current_slot() == slot: + // a. Take a lock for this slot + // b. Write the flush range to the right context slot in handoff area + // c. Issue a cache invalidation request via FwCtl queue + // d. Poll for completion via queue + // e. Check for completion flag in the handoff area + // f. Drop the lock + // 4. Unmap + // 5. Flush the TLB range again + + // prot::CACHE means "cache coherent" which means *uncached* here. + if self.0.prot & prot::CACHE == 0 { + self.remap_uncached_and_flush(); + } + + let mut owner = self.0.owner.lock(); + mod_dev_dbg!( + owner.dev, + "MMU: unmap {:#x}:{:#x}\n", + self.iova(), + self.size() + ); + + if owner + .unmap_pages(self.iova(), UAT_PGSZ, self.size() >> UAT_PGBIT) + .is_err() + { + dev_err!( + owner.dev, + "MMU: unmap {:#x}:{:#x} failed\n", + self.iova(), + self.size() + ); + } + + if let Some(asid) = owner.slot() { + mem::tlbi_range(asid as u8, self.iova(), self.size()); + mod_dev_dbg!( + owner.dev, + "MMU: flush range: asid={:#x} start={:#x} len={:#x}\n", + asid, + self.iova(), + self.size() + ); + mem::sync(); + } + } +} + +/// Shared UAT global data structures +struct UatShared { + handoff_rgn: UatRegion, + ttbs_rgn: UatRegion, +} + +impl UatShared { + /// Returns the handoff region area + fn handoff(&self) -> &Handoff { + // SAFETY: pointer is non-null per the type invariant + unsafe { (self.handoff_rgn.map.as_ptr() as *mut Handoff).as_ref() }.unwrap() + } + + /// Returns the TTBAT area + fn ttbs(&self) -> &[SlotTTBS; UAT_NUM_CTX] { + // SAFETY: pointer is non-null per the type invariant + unsafe { (self.ttbs_rgn.map.as_ptr() as *mut [SlotTTBS; UAT_NUM_CTX]).as_ref() }.unwrap() + } +} + +// SAFETY: Nothing here is unsafe to send across threads. +unsafe impl Send for UatShared {} + +/// Inner data for the top-level UAT instance. +struct UatInner { + shared: Mutex<UatShared>, + handoff_flush: [Mutex<HandoffFlush>; UAT_NUM_CTX + 1], +} + +impl UatInner { + /// Take the lock on the shared data and return the guard. + fn lock(&self) -> Guard<'_, Mutex<UatShared>> { + self.shared.lock() + } + + /// Take a lock on a handoff flush slot and return the guard. + fn lock_flush(&self, slot: u32) -> Guard<'_, Mutex<HandoffFlush>> { + self.handoff_flush[slot as usize].lock() + } +} + +/// Top-level UAT manager object +pub(crate) struct Uat { + dev: driver::AsahiDevice, + cfg: &'static hw::HwConfig, + pagetables_rgn: UatRegion, + + inner: Arc<UatInner>, + slots: slotalloc::SlotAllocator<SlotInner>, + + kernel_vm: Vm, + _kernel_lower_vm: Vm, +} + +impl Drop for UatRegion { + fn drop(&mut self) { + // SAFETY: the pointer is valid by the type invariant + unsafe { bindings::memunmap(self.map.as_ptr()) }; + } +} + +impl Handoff { + /// Lock the handoff region from firmware access + fn lock(&self) { + self.lock_ap.store(1, Ordering::Relaxed); + fence(Ordering::SeqCst); + + while self.lock_fw.load(Ordering::Relaxed) != 0 { + if self.turn.load(Ordering::Relaxed) != 0 { + self.lock_ap.store(0, Ordering::Relaxed); + while self.turn.load(Ordering::Relaxed) != 0 {} + self.lock_ap.store(1, Ordering::Relaxed); + fence(Ordering::SeqCst); + } + } + fence(Ordering::Acquire); + } + + /// Unlock the handoff region, allowing firmware access + fn unlock(&self) { + self.turn.store(1, Ordering::Relaxed); + self.lock_ap.store(0, Ordering::Release); + } + + /// Returns the current Vm slot mapped by the firmware for lower/unprivileged access, if any. + fn current_slot(&self) -> Option<u32> { + let slot = self.cur_slot.load(Ordering::Relaxed); + if slot == 0 || slot == u32::MAX { + None + } else { + Some(slot) + } + } + + /// Initialize the handoff region + fn init(&self) -> Result { + self.magic_ap.store(PPL_MAGIC, Ordering::Relaxed); + self.cur_slot.store(0, Ordering::Relaxed); + self.unk3.store(0, Ordering::Relaxed); + fence(Ordering::SeqCst); + + let timeout = time::ktime_get() + Duration::from_millis(1000); + + self.lock(); + while time::ktime_get() < timeout { + if self.magic_fw.load(Ordering::Relaxed) == PPL_MAGIC { + break; + } else { + self.unlock(); + delay::coarse_sleep(Duration::from_millis(10)); + self.lock(); + } + } + + if self.magic_fw.load(Ordering::Relaxed) != PPL_MAGIC { + self.unlock(); + pr_err!("Handoff: Failed to initialize (firmware not running?)\n"); + return Err(EIO); + } + + self.unlock(); + + for i in 0..=UAT_NUM_CTX { + self.flush[i].state.store(0, Ordering::Relaxed); + self.flush[i].addr.store(0, Ordering::Relaxed); + self.flush[i].size.store(0, Ordering::Relaxed); + } + fence(Ordering::SeqCst); + Ok(()) + } +} + +/// Represents a single flush info slot in the handoff region. +/// +/// # Invariants +/// The pointer is valid and there is no aliasing HandoffFlush instance. +struct HandoffFlush(*const FlushInfo); + +// SAFETY: These pointers are safe to send across threads. +unsafe impl Send for HandoffFlush {} + +impl HandoffFlush { + /// Set up a flush operation for the coprocessor + fn begin_flush(&self, start: u64, size: u64) { + let flush = unsafe { self.0.as_ref().unwrap() }; + + let state = flush.state.load(Ordering::Relaxed); + if state != 0 { + pr_err!("Handoff: expected flush state 0, got {}\n", state); + } + flush.addr.store(start, Ordering::Relaxed); + flush.size.store(size, Ordering::Relaxed); + flush.state.store(1, Ordering::Relaxed); + } + + /// Complete a flush operation for the coprocessor + fn end_flush(&self) { + let flush = unsafe { self.0.as_ref().unwrap() }; + let state = flush.state.load(Ordering::Relaxed); + if state != 2 { + pr_err!("Handoff: expected flush state 2, got {}\n", state); + } + flush.state.store(0, Ordering::Relaxed); + } +} + +// We do not implement FlushOps, since we flush manually in this module after +// page table operations. Just provide dummy implementations. +impl io_pgtable::FlushOps for Uat { + type Data = (); + + fn tlb_flush_all(_data: <Self::Data as ForeignOwnable>::Borrowed<'_>) {} + fn tlb_flush_walk( + _data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + _iova: usize, + _size: usize, + _granule: usize, + ) { + } + fn tlb_add_page( + _data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + _iova: usize, + _granule: usize, + ) { + } +} + +static LOCK_KEY: LockClassKey = LockClassKey::new(); + +impl Vm { + /// Create a new virtual memory address space + fn new( + dev: driver::AsahiDevice, + uat_inner: Arc<UatInner>, + cfg: &'static hw::HwConfig, + is_kernel: bool, + id: u64, + file_id: u64, + ) -> Result<Vm> { + let page_table = AppleUAT::new( + &dev, + io_pgtable::Config { + pgsize_bitmap: UAT_PGSZ, + ias: if is_kernel { UAT_IAS_KERN } else { UAT_IAS }, + oas: cfg.uat_oas, + coherent_walk: true, + quirks: 0, + }, + (), + )?; + let min_va = if is_kernel { + IOVA_KERN_BASE + } else { + IOVA_USER_BASE + }; + let max_va = if is_kernel { + IOVA_KERN_TOP + } else { + IOVA_USER_TOP + }; + + let mm = mm::Allocator::new( + min_va as u64, + (max_va - min_va + 1) as u64, + (), + c_str!("asahi Vm"), + &LOCK_KEY, + )?; + + Ok(Vm { + id, + file_id, + inner: Arc::try_new(Mutex::new(VmInner { + dev, + min_va, + max_va, + is_kernel, + page_table, + mm, + uat_inner, + binding: None, + bind_token: None, + active_users: 0, + id, + }))?, + }) + } + + /// Get the translation table base for this Vm + fn ttb(&self) -> u64 { + self.inner.lock().ttb() + } + + /// Map a GEM object (using its `SGTable`) into this Vm at a free address. + pub(crate) fn map(&self, size: usize, sgt: gem::SGTable) -> Result<Mapping> { + let mut inner = self.inner.lock(); + + let uat_inner = inner.uat_inner.clone(); + let node = inner.mm.insert_node( + MappingInner { + owner: self.inner.clone(), + uat_inner, + prot: PROT_FW_SHARED_RW, + sgt: Some(sgt), + mapped_size: size, + }, + (size + UAT_PGSZ) as u64, // Add guard page + )?; + + inner.map_node(&node, PROT_FW_SHARED_RW)?; + Ok(Mapping(node)) + } + + /// Map a GEM object (using its `SGTable`) into this Vm at a free address in a given range. + #[allow(clippy::too_many_arguments)] + pub(crate) fn map_in_range( + &self, + size: usize, + sgt: gem::SGTable, + alignment: u64, + start: u64, + end: u64, + prot: u32, + guard: bool, + ) -> Result<Mapping> { + let mut inner = self.inner.lock(); + + let uat_inner = inner.uat_inner.clone(); + let node = inner.mm.insert_node_in_range( + MappingInner { + owner: self.inner.clone(), + uat_inner, + prot, + sgt: Some(sgt), + mapped_size: size, + }, + (size + if guard { UAT_PGSZ } else { 0 }) as u64, // Add guard page + alignment, + 0, + start, + end, + mm::InsertMode::Best, + )?; + + inner.map_node(&node, prot)?; + Ok(Mapping(node)) + } + + /// Map a GEM object (using its `SGTable`) into this Vm at a specific address. + #[allow(clippy::too_many_arguments)] + pub(crate) fn map_at( + &self, + addr: u64, + size: usize, + sgt: gem::SGTable, + prot: u32, + guard: bool, + ) -> Result<Mapping> { + let mut inner = self.inner.lock(); + + let uat_inner = inner.uat_inner.clone(); + let node = inner.mm.reserve_node( + MappingInner { + owner: self.inner.clone(), + uat_inner, + prot, + sgt: Some(sgt), + mapped_size: size, + }, + addr, + (size + if guard { UAT_PGSZ } else { 0 }) as u64, // Add guard page + 0, + )?; + + inner.map_node(&node, prot)?; + Ok(Mapping(node)) + } + + /// Add a direct MMIO mapping to this Vm at a free address. + pub(crate) fn map_io(&self, phys: usize, size: usize, rw: bool) -> Result<Mapping> { + let prot = if rw { PROT_FW_MMIO_RW } else { PROT_FW_MMIO_RO }; + let mut inner = self.inner.lock(); + + let uat_inner = inner.uat_inner.clone(); + let node = inner.mm.insert_node( + MappingInner { + owner: self.inner.clone(), + uat_inner, + prot, + sgt: None, + mapped_size: size, + }, + (size + UAT_PGSZ) as u64, // Add guard page + )?; + + let iova = node.start() as usize; + + if (phys | size | iova) & UAT_PGMSK != 0 { + dev_err!( + inner.dev, + "MMU: Mapping {:#x}:{:#x} -> {:#x} is not page-aligned\n", + phys, + size, + iova + ); + return Err(EINVAL); + } + + dev_info!( + inner.dev, + "MMU: IO map: {:#x}:{:#x} -> {:#x}\n", + phys, + size, + iova + ); + + inner.map_pages(iova, phys, UAT_PGSZ, size >> UAT_PGBIT, prot)?; + + Ok(Mapping(node)) + } + + /// Returns the unique ID of this Vm + pub(crate) fn id(&self) -> u64 { + self.id + } + + /// Returns the unique File ID of the owner of this Vm + pub(crate) fn file_id(&self) -> u64 { + self.file_id + } +} + +impl Drop for VmInner { + fn drop(&mut self) { + assert_eq!(self.active_users, 0); + + mod_pr_debug!( + "VmInner::Drop [{}]: bind_token={:?}\n", + self.id, + self.bind_token + ); + + // Make sure this VM is not mapped to a TTB if it was + if let Some(token) = self.bind_token.take() { + let idx = (token.last_slot() as usize) + UAT_USER_CTX_START; + let ttb = self.ttb() | TTBR_VALID | (idx as u64) << TTBR_ASID_SHIFT; + + let uat_inner = self.uat_inner.lock(); + uat_inner.handoff().lock(); + let handoff_cur = uat_inner.handoff().current_slot(); + let ttb_cur = uat_inner.ttbs()[idx].ttb0.load(Ordering::SeqCst); + let inval = ttb_cur == ttb; + if inval { + if handoff_cur == Some(idx as u32) { + pr_err!( + "VmInner::drop owning slot {}, but it is currently in use by the ASC?\n", + idx + ); + } + uat_inner.ttbs()[idx].ttb0.store(0, Ordering::SeqCst); + } + uat_inner.handoff().unlock(); + core::mem::drop(uat_inner); + + // In principle we dropped all the Mappings already, but we might as + // well play it safe and invalidate the whole ASID. + if inval { + mod_pr_debug!( + "VmInner::Drop [{}]: need inval for ASID {:#x}\n", + self.id, + idx + ); + mem::tlbi_asid(idx as u8); + mem::sync(); + } + } + } +} + +impl Uat { + /// Map a bootloader-preallocated memory region + fn map_region( + dev: &dyn device::RawDevice, + name: &CStr, + size: usize, + cached: bool, + ) -> Result<UatRegion> { + let rdev = dev.raw_device(); + + let mut res = core::mem::MaybeUninit::<bindings::resource>::uninit(); + + let res = unsafe { + let idx = bindings::of_property_match_string( + (*rdev).of_node, + c_str!("memory-region-names").as_char_ptr(), + name.as_char_ptr(), + ); + to_result(idx)?; + + let np = bindings::of_parse_phandle( + (*rdev).of_node, + c_str!("memory-region").as_char_ptr(), + idx, + ); + if np.is_null() { + dev_err!(dev, "Missing {} region\n", name); + return Err(EINVAL); + } + let ret = bindings::of_address_to_resource(np, 0, res.as_mut_ptr()); + bindings::of_node_put(np); + + if ret < 0 { + dev_err!(dev, "Failed to get {} region\n", name); + to_result(ret)? + } + + res.assume_init() + }; + + let rgn_size: usize = unsafe { bindings::resource_size(&res) } as usize; + + if size > rgn_size { + dev_err!( + dev, + "Region {} is too small (expected {}, got {})\n", + name, + size, + rgn_size + ); + return Err(ENOMEM); + } + + let flags = if cached { + bindings::MEMREMAP_WB + } else { + bindings::MEMREMAP_WC + }; + let map = unsafe { bindings::memremap(res.start, rgn_size, flags.into()) }; + let map = NonNull::new(map); + + match map { + None => { + dev_err!(dev, "Failed to remap {} region\n", name); + Err(ENOMEM) + } + Some(map) => Ok(UatRegion { + base: res.start, + map, + }), + } + } + + /// Returns a view into the root kernel (upper half) page table + fn kpt0(&self) -> &[Pte; UAT_NPTE] { + // SAFETY: pointer is non-null per the type invariant + unsafe { (self.pagetables_rgn.map.as_ptr() as *mut [Pte; UAT_NPTE]).as_ref() }.unwrap() + } + + /// Returns a reference to the global kernel (upper half) `Vm` + pub(crate) fn kernel_vm(&self) -> &Vm { + &self.kernel_vm + } + + /// Returns the base physical address of the TTBAT region. + pub(crate) fn ttb_base(&self) -> u64 { + let inner = self.inner.lock(); + + inner.ttbs_rgn.base + } + + /// Binds a `Vm` to a slot, preferring the last used one. + pub(crate) fn bind(&self, vm: &Vm) -> Result<VmBind> { + let mut inner = vm.inner.lock(); + + if inner.binding.is_none() { + assert_eq!(inner.active_users, 0); + + let slot = self.slots.get(inner.bind_token)?; + if slot.changed() { + mod_pr_debug!("Vm Bind [{}]: bind_token={:?}\n", vm.id, slot.token(),); + let idx = (slot.slot() as usize) + UAT_USER_CTX_START; + let ttb = inner.ttb() | TTBR_VALID | (idx as u64) << TTBR_ASID_SHIFT; + + let uat_inner = self.inner.lock(); + let ttbs = uat_inner.ttbs(); + uat_inner.handoff().lock(); + if uat_inner.handoff().current_slot() == Some(idx as u32) { + pr_err!( + "Vm::bind to slot {}, but it is currently in use by the ASC?\n", + idx + ); + } + ttbs[idx].ttb0.store(ttb, Ordering::Relaxed); + ttbs[idx].ttb1.store(0, Ordering::Relaxed); + uat_inner.handoff().unlock(); + core::mem::drop(uat_inner); + + // Make sure all TLB entries from the previous owner of this ASID are gone + mem::tlbi_asid(idx as u8); + mem::sync(); + } + + inner.bind_token = Some(slot.token()); + inner.binding = Some(slot); + } + + inner.active_users += 1; + + let slot = inner.binding.as_ref().unwrap().slot() + UAT_USER_CTX_START as u32; + mod_pr_debug!("MMU: slot {} active users {}\n", slot, inner.active_users); + Ok(VmBind(vm.clone(), slot)) + } + + /// Creates a new `Vm` linked to this UAT. + pub(crate) fn new_vm(&self, id: u64, file_id: u64) -> Result<Vm> { + Vm::new( + self.dev.clone(), + self.inner.clone(), + self.cfg, + false, + id, + file_id, + ) + } + + /// Creates the reference-counted inner data for a new `Uat` instance. + #[inline(never)] + fn make_inner(dev: &driver::AsahiDevice) -> Result<Arc<UatInner>> { + let handoff_rgn = Self::map_region(dev, c_str!("handoff"), HANDOFF_SIZE, false)?; + let ttbs_rgn = Self::map_region(dev, c_str!("ttbs"), SLOTS_SIZE, false)?; + + dev_info!(dev, "MMU: Initializing kernel page table\n"); + + let mut inner = UniqueArc::<UatInner>::try_new_uninit()?; + let ptr = inner.as_mut_ptr(); + + Ok(unsafe { + let handoff = &(handoff_rgn.map.as_ptr() as *mut Handoff).as_ref().unwrap(); + + for i in 0..UAT_NUM_CTX + 1 { + addr_of_mut!((*ptr).handoff_flush[i]) + .write(Mutex::new(HandoffFlush(&handoff.flush[i]))); + } + + addr_of_mut!((*ptr).shared).write(Mutex::new(UatShared { + handoff_rgn, + ttbs_rgn, + })); + + inner.assume_init() + } + .into()) + } + + /// Creates a new `Uat` instance given the relevant hardware config. + #[inline(never)] + pub(crate) fn new(dev: &driver::AsahiDevice, cfg: &'static hw::HwConfig) -> Result<Self> { + dev_info!(dev, "MMU: Initializing...\n"); + + let inner = Self::make_inner(dev)?; + + let pagetables_rgn = Self::map_region(dev, c_str!("pagetables"), PAGETABLES_SIZE, true)?; + + dev_info!(dev, "MMU: Creating kernel page tables\n"); + let kernel_lower_vm = Vm::new(dev.clone(), inner.clone(), cfg, false, 1, 0)?; + let kernel_vm = Vm::new(dev.clone(), inner.clone(), cfg, true, 0, 0)?; + + dev_info!(dev, "MMU: Kernel page tables created\n"); + + let ttb0 = kernel_lower_vm.ttb(); + let ttb1 = kernel_vm.ttb(); + + let uat = Self { + dev: dev.clone(), + cfg, + pagetables_rgn, + kernel_vm, + _kernel_lower_vm: kernel_lower_vm, + inner, + slots: slotalloc::SlotAllocator::new(UAT_USER_CTX as u32, (), |_inner, _slot| { + SlotInner() + })?, + }; + + let inner = uat.inner.lock(); + + inner.handoff().init()?; + + dev_info!(dev, "MMU: Initializing TTBs\n"); + + inner.handoff().lock(); + + let ttbs = inner.ttbs(); + + ttbs[0].ttb0.store(ttb0 | TTBR_VALID, Ordering::Relaxed); + ttbs[0] + .ttb1 + .store(uat.pagetables_rgn.base | TTBR_VALID, Ordering::Relaxed); + + for ctx in &ttbs[1..] { + ctx.ttb0.store(0, Ordering::Relaxed); + ctx.ttb1.store(0, Ordering::Relaxed); + } + + inner.handoff().unlock(); + + core::mem::drop(inner); + + uat.kpt0()[2].store(ttb1 | PTE_TABLE, Ordering::Relaxed); + + dev_info!(dev, "MMU: initialized\n"); + + Ok(uat) + } +} + +impl Drop for Uat { + fn drop(&mut self) { + // Unmap what we mapped + self.kpt0()[2].store(0, Ordering::Relaxed); + + // Make sure we flush the TLBs + fence(Ordering::SeqCst); + mem::tlbi_all(); + mem::sync(); + } +} diff --git a/drivers/gpu/drm/asahi/object.rs b/drivers/gpu/drm/asahi/object.rs new file mode 100644 index 000000000000..449899b88181 --- /dev/null +++ b/drivers/gpu/drm/asahi/object.rs @@ -0,0 +1,704 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Asahi GPU object model +//! +//! The AGX GPU includes a coprocessor that uses a large number of shared memory structures to +//! communicate with the driver. These structures contain GPU VA pointers to each other, which are +//! directly dereferenced by the firmware and are expected to always be valid for the usage +//! lifetime of the containing struct (which is an implicit contract, not explicitly managed). +//! Any faults cause an unrecoverable firmware crash, requiring a full system reboot. +//! +//! In order to manage this complexity safely, we implement a GPU object model using Rust's type +//! system to enforce GPU object lifetime relationships. GPU objects represent an allocated piece +//! of memory of a given type, mapped to the GPU (and usually also the CPU). On the CPU side, +//! these objects are associated with a pure Rust structure that contains the objects it depends +//! on (or references to them). This allows us to map Rust lifetimes into the GPU object model +//! system. Then, GPU VA pointers also inherit those lifetimes, which means the Rust borrow checker +//! can ensure that all pointers are assigned an address that is guaranteed to outlive the GPU +//! object it points to. +//! +//! Since the firmware object model does have self-referencing pointers (and there is of course no +//! underlying revocability mechanism to make it safe), we must have an escape hatch. GPU pointers +//! can be weak pointers, which do not enforce lifetimes. In those cases, it is the user's +//! responsibility to ensure that lifetime requirements are met. +//! +//! In other words, the model is necessarily leaky and there is no way to fully map Rust safety to +//! GPU firmware object safety. The goal of the model is to make it easy to model the lifetimes of +//! GPU objects and have the compiler help in avoiding mistakes, rather than to guarantee safety +//! 100% of the time as would be the case for CPU-side Rust code. + +// TODO: There is a fundamental soundness issue with sharing memory with the GPU (that even affects +// C code too). Since the GPU is free to mutate that memory at any time, normal reference invariants +// cannot be enforced on the CPU side. For example, the compiler could perform an optimization that +// assumes that a given memory location does not change between two reads, and causes UB otherwise, +// and then the GPU could mutate that memory out from under the CPU. +// +// For cases where we *expect* this to happen, we use atomic types, which avoid this issue. However, +// doing so for every single field of every type is a non-starter. Right now, there seems to be no +// good solution for this that does not come with significant performance or ergonomics downsides. +// +// In *practice* we are almost always only writing GPU memory, and only reading from atomics, so the +// chances of this actually triggering UB (e.g. a security issue that can be triggered from the GPU +// side) due to a compiler optimization are very slim. +// +// Further discussion: https://github.com/rust-lang/unsafe-code-guidelines/issues/152 + +use kernel::{error::code::*, prelude::*}; + +use alloc::boxed::Box; +use core::fmt; +use core::fmt::Debug; +use core::fmt::Formatter; +use core::marker::PhantomData; +use core::mem::MaybeUninit; +use core::num::NonZeroU64; +use core::ops::{Deref, DerefMut, Index, IndexMut}; +use core::{mem, ptr, slice}; + +use crate::alloc::Allocation; +use crate::debug::*; +use crate::fw::types::Zeroed; + +const DEBUG_CLASS: DebugFlags = DebugFlags::Object; + +/// A GPU-side strong pointer, which is a 64-bit non-zero VA with an associated lifetime. +/// +/// In rare cases these pointers are not aligned, so this is `packed(1)`. +#[repr(C, packed(1))] +pub(crate) struct GpuPointer<'a, T: ?Sized>(NonZeroU64, PhantomData<&'a T>); + +impl<'a, T: ?Sized> GpuPointer<'a, T> { + /// Logical OR the pointer with an arbitrary `u64`. This is used when GPU struct fields contain + /// misc flag fields in the upper bits. The lifetime is retained. This is GPU-unsafe in + /// principle, but we assert that only non-implemented address bits are touched, which is safe + /// for pointers used by the GPU (not by firmware). + pub(crate) fn or(&self, other: u64) -> GpuPointer<'a, T> { + // This will fail for kernel-half pointers, which should not be ORed. + assert_eq!(self.0.get() & other, 0); + // Assert that we only touch the high bits. + assert_eq!(other & 0xffffffffff, 0); + GpuPointer(self.0 | other, PhantomData) + } + + /// Add an arbitrary offset to the pointer. This is not safe (from the GPU perspective), and + /// should only be used via the `inner_ptr` macro to get pointers to inner fields, hence we mark + /// it `unsafe` to discourage direct use. + // NOTE: The third argument is a type inference hack. + pub(crate) unsafe fn offset<U>(&self, off: usize, _: *const U) -> GpuPointer<'a, U> { + GpuPointer::<'a, U>( + NonZeroU64::new(self.0.get() + (off as u64)).unwrap(), + PhantomData, + ) + } +} + +impl<'a, T: ?Sized> Debug for GpuPointer<'a, T> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let val = self.0; + f.write_fmt(format_args!("{:#x} ({})", val, core::any::type_name::<T>())) + } +} + +/// Take a pointer to a sub-field within a structure pointed to by a GpuPointer, keeping the +/// lifetime. +#[macro_export] +macro_rules! inner_ptr { + ($gpuva:expr, $($f:tt)*) => ({ + // This mirrors kernel::offset_of(), except we use type inference to avoid having to know + // the type of the pointer explicitly. + fn uninit_from<'a, T: GpuStruct>(_: GpuPointer<'a, T>) -> core::mem::MaybeUninit<T::Raw<'static>> { + core::mem::MaybeUninit::uninit() + } + let tmp = uninit_from($gpuva); + let outer = tmp.as_ptr(); + // SAFETY: The pointer is valid and aligned, just not initialised; `addr_of` ensures that + // we don't actually read from `outer` (which would be UB) nor create an intermediate + // reference. + let p: *const _ = unsafe { core::ptr::addr_of!((*outer).$($f)*) }; + let inner = p as *const u8; + // SAFETY: The two pointers are within the same allocation block. + let off = unsafe { inner.offset_from(outer as *const u8) }; + // SAFETY: The resulting pointer is guaranteed to point to valid memory within the outer + // object. + unsafe { $gpuva.offset(off.try_into().unwrap(), p) } + }) +} + +/// A GPU-side weak pointer, which is a 64-bit non-zero VA with no lifetime. +/// +/// In rare cases these pointers are not aligned, so this is `packed(1)`. +#[repr(C, packed(1))] +pub(crate) struct GpuWeakPointer<T: ?Sized>(NonZeroU64, PhantomData<*const T>); + +/// SAFETY: GPU weak pointers are always safe to share between threads. +unsafe impl<T: ?Sized> Send for GpuWeakPointer<T> {} +unsafe impl<T: ?Sized> Sync for GpuWeakPointer<T> {} + +// Weak pointers can be copied/cloned regardless of their target type. +impl<T: ?Sized> Copy for GpuWeakPointer<T> {} + +impl<T: ?Sized> Clone for GpuWeakPointer<T> { + fn clone(&self) -> Self { + *self + } +} + +impl<T: ?Sized> GpuWeakPointer<T> { + /// Add an arbitrary offset to the pointer. This is not safe (from the GPU perspective), and + /// should only be used via the `inner_ptr` macro to get pointers to inner fields, hence we mark + /// it `unsafe` to discourage direct use. + // NOTE: The third argument is a type inference hack. + pub(crate) unsafe fn offset<U>(&self, off: usize, _: *const U) -> GpuWeakPointer<U> { + GpuWeakPointer::<U>( + NonZeroU64::new(self.0.get() + (off as u64)).unwrap(), + PhantomData, + ) + } + + /// Upgrade a weak pointer into a strong pointer. This is not considered safe from the GPU + /// perspective. + pub(crate) unsafe fn upgrade<'a>(&self) -> GpuPointer<'a, T> { + GpuPointer(self.0, PhantomData) + } +} + +impl<T: ?Sized> Debug for GpuWeakPointer<T> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + let val = self.0; + f.write_fmt(format_args!("{:#x} ({})", val, core::any::type_name::<T>())) + } +} + +/// Take a pointer to a sub-field within a structure pointed to by a GpuWeakPointer. +#[macro_export] +macro_rules! inner_weak_ptr { + ($gpuva:expr, $($f:tt)*) => ({ + // See inner_ptr() + fn uninit_from<T: GpuStruct>(_: GpuWeakPointer<T>) -> core::mem::MaybeUninit<T::Raw<'static>> { + core::mem::MaybeUninit::uninit() + } + let tmp = uninit_from($gpuva); + let outer = tmp.as_ptr(); + // SAFETY: The pointer is valid and aligned, just not initialised; `addr_of` ensures that + // we don't actually read from `outer` (which would be UB) nor create an intermediate + // reference. + let p: *const _ = unsafe { core::ptr::addr_of!((*outer).$($f)*) }; + let inner = p as *const u8; + // SAFETY: The two pointers are within the same allocation block. + let off = unsafe { inner.offset_from(outer as *const u8) }; + // SAFETY: The resulting pointer is guaranteed to point to valid memory within the outer + // object. + unsafe { $gpuva.offset(off.try_into().unwrap(), p) } + }) +} + +/// Types that implement this trait represent a GPU structure from the CPU side. +/// +/// The `Raw` type represents the actual raw structure definition on the GPU side. +/// +/// Types implementing [`GpuStruct`] must have fields owning any objects (or strong references +/// to them) that GPU pointers in the `Raw` structure point to. This mechanism is used to enforce +/// lifetimes. +pub(crate) trait GpuStruct: 'static { + /// The type of the GPU-side structure definition representing the firmware struct layout. + type Raw<'a>; +} + +/// An instance of a GPU object in memory. +/// +/// # Invariants +/// `raw` must point to a valid mapping of the `T::Raw` type associated with the `alloc` allocation. +/// `gpu_ptr` must be the GPU address of the same object. +pub(crate) struct GpuObject<T: GpuStruct, U: Allocation<T>> { + raw: *mut T::Raw<'static>, + alloc: U, + gpu_ptr: GpuWeakPointer<T>, + inner: Box<T>, +} + +impl<T: GpuStruct, U: Allocation<T>> GpuObject<T, U> { + /// Create a new GpuObject given an allocator and the inner data (a type implementing + /// GpuStruct). + /// + /// The caller passes a closure that constructs the `T::Raw` type given a reference to the + /// `GpuStruct`. This is the mechanism used to enforce lifetimes. + pub(crate) fn new( + alloc: U, + inner: T, + callback: impl for<'a> FnOnce(&'a T) -> T::Raw<'a>, + ) -> Result<Self> { + let size = mem::size_of::<T::Raw<'static>>(); + if size > 0x1000 { + dev_crit!( + alloc.device(), + "Allocating {} of size {:#x}, with new, please use new_boxed!\n", + core::any::type_name::<T>(), + size + ); + } + if alloc.size() < size { + return Err(ENOMEM); + } + let gpu_ptr = + GpuWeakPointer::<T>(NonZeroU64::new(alloc.gpu_ptr()).ok_or(EINVAL)?, PhantomData); + mod_dev_dbg!( + alloc.device(), + "Allocating {} @ {:#x}\n", + core::any::type_name::<T>(), + alloc.gpu_ptr() + ); + let p = alloc.ptr().ok_or(EINVAL)?.as_ptr() as *mut T::Raw<'static>; + let mut raw = callback(&inner); + // SAFETY: `p` is guaranteed to be valid per the Allocation invariant, and the type is + // identical to the type of `raw` other than the lifetime. + unsafe { p.copy_from(&mut raw as *mut _ as *mut u8 as *mut _, 1) }; + mem::forget(raw); + Ok(Self { + raw: p, + gpu_ptr, + alloc, + inner: Box::try_new(inner)?, + }) + } + + /// Create a new GpuObject given an allocator and the boxed inner data (a type implementing + /// GpuStruct). + /// + /// The caller passes a closure that initializes the `T::Raw` type given a reference to the + /// `GpuStruct` and a `MaybeUninit<T::Raw>`. This is intended to be used with the place!() + /// macro to avoid constructing the whole `T::Raw` object on the stack. + pub(crate) fn new_boxed( + alloc: U, + inner: Box<T>, + callback: impl for<'a> FnOnce( + &'a T, + &'a mut MaybeUninit<T::Raw<'a>>, + ) -> Result<&'a mut T::Raw<'a>>, + ) -> Result<Self> { + if alloc.size() < mem::size_of::<T::Raw<'static>>() { + return Err(ENOMEM); + } + let gpu_ptr = + GpuWeakPointer::<T>(NonZeroU64::new(alloc.gpu_ptr()).ok_or(EINVAL)?, PhantomData); + mod_dev_dbg!( + alloc.device(), + "Allocating {} @ {:#x}\n", + core::any::type_name::<T>(), + alloc.gpu_ptr() + ); + let p = alloc.ptr().ok_or(EINVAL)?.as_ptr() as *mut MaybeUninit<T::Raw<'_>>; + // SAFETY: `p` is guaranteed to be valid per the Allocation invariant. + let raw = callback(&inner, unsafe { &mut *p })?; + if p as *mut T::Raw<'_> != raw as *mut _ { + dev_err!( + alloc.device(), + "Allocation callback returned a mismatched reference ({})\n", + core::any::type_name::<T>(), + ); + return Err(EINVAL); + } + Ok(Self { + raw: p as *mut u8 as *mut T::Raw<'static>, + gpu_ptr, + alloc, + inner, + }) + } + + /// Create a new GpuObject given an allocator and the inner data (a type implementing + /// GpuStruct). + /// + /// The caller passes a closure that initializes the `T::Raw` type given a reference to the + /// `GpuStruct` and a `MaybeUninit<T::Raw>`. This is intended to be used with the place!() + /// macro to avoid constructing the whole `T::Raw` object on the stack. + pub(crate) fn new_inplace( + alloc: U, + inner: T, + callback: impl for<'a> FnOnce( + &'a T, + &'a mut MaybeUninit<T::Raw<'a>>, + ) -> Result<&'a mut T::Raw<'a>>, + ) -> Result<Self> { + GpuObject::<T, U>::new_boxed(alloc, Box::try_new(inner)?, callback) + } + + /// Create a new GpuObject given an allocator, with callback-based initialization. + /// + /// This is used when the construction of the `T` type requires knowing the GPU VA address of + /// the structure that is being constructed ahead of time. The first callback constructs a + /// `Box<T>` given the pointer to the about-to-be-initialized GPU structure, and the second + /// callback initializes that structure as in `new_boxed`. + pub(crate) fn new_prealloc( + alloc: U, + inner_cb: impl FnOnce(GpuWeakPointer<T>) -> Result<Box<T>>, + raw_cb: impl for<'a> FnOnce( + &'a T, + &'a mut MaybeUninit<T::Raw<'a>>, + ) -> Result<&'a mut T::Raw<'a>>, + ) -> Result<Self> { + if alloc.size() < mem::size_of::<T::Raw<'static>>() { + return Err(ENOMEM); + } + let gpu_ptr = + GpuWeakPointer::<T>(NonZeroU64::new(alloc.gpu_ptr()).ok_or(EINVAL)?, PhantomData); + mod_dev_dbg!( + alloc.device(), + "Allocating {} @ {:#x}\n", + core::any::type_name::<T>(), + alloc.gpu_ptr() + ); + let inner = inner_cb(gpu_ptr)?; + let p = alloc.ptr().ok_or(EINVAL)?.as_ptr() as *mut MaybeUninit<T::Raw<'_>>; + // SAFETY: `p` is guaranteed to be valid per the Allocation invariant. + let raw = raw_cb(&*inner, unsafe { &mut *p })?; + if p as *mut T::Raw<'_> != raw as *mut _ { + dev_err!( + alloc.device(), + "Allocation callback returned a mismatched reference ({})\n", + core::any::type_name::<T>(), + ); + return Err(EINVAL); + } + Ok(Self { + raw: p as *mut u8 as *mut T::Raw<'static>, + gpu_ptr, + alloc, + inner, + }) + } + + /// Returns the GPU VA of this object (as a raw [`NonZeroU64`]) + pub(crate) fn gpu_va(&self) -> NonZeroU64 { + self.gpu_ptr.0 + } + + /// Returns a strong GPU pointer to this object, with a lifetime. + pub(crate) fn gpu_pointer(&self) -> GpuPointer<'_, T> { + GpuPointer(self.gpu_ptr.0, PhantomData) + } + + /// Returns a weak GPU pointer to this object, with no lifetime. + pub(crate) fn weak_pointer(&self) -> GpuWeakPointer<T> { + GpuWeakPointer(self.gpu_ptr.0, PhantomData) + } + + /// Perform a mutation to the inner `Raw` data given a user-supplied callback. + /// + /// The callback gets a mutable reference to the `GpuStruct` type. + pub(crate) fn with_mut<RetVal>( + &mut self, + callback: impl for<'a> FnOnce(&'a mut <T as GpuStruct>::Raw<'a>, &'a mut T) -> RetVal, + ) -> RetVal { + // SAFETY: `self.raw` is valid per the type invariant, and the second half is just + // converting lifetimes. + unsafe { callback(&mut *self.raw, &mut *(&mut *self.inner as *mut _)) } + } + + /// Access the inner `Raw` data given a user-supplied callback. + /// + /// The callback gets a reference to the `GpuStruct` type. + pub(crate) fn with<RetVal>( + &self, + callback: impl for<'a> FnOnce(&'a <T as GpuStruct>::Raw<'a>, &'a T) -> RetVal, + ) -> RetVal { + // SAFETY: `self.raw` is valid per the type invariant, and the second half is just + // converting lifetimes. + unsafe { callback(&*self.raw, &*(&*self.inner as *const _)) } + } +} + +impl<T: GpuStruct, U: Allocation<T>> Deref for GpuObject<T, U> { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl<T: GpuStruct, U: Allocation<T>> DerefMut for GpuObject<T, U> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +impl<T: GpuStruct + Debug, U: Allocation<T>> Debug for GpuObject<T, U> +where + <T as GpuStruct>::Raw<'static>: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct(core::any::type_name::<T>()) + // SAFETY: `self.raw` is valid per the type invariant. + .field("raw", &format_args!("{:#X?}", unsafe { &*self.raw })) + .field("inner", &format_args!("{:#X?}", &self.inner)) + .field("alloc", &format_args!("{:?}", &self.alloc)) + .finish() + } +} + +impl<T: GpuStruct + Default, U: Allocation<T>> GpuObject<T, U> +where + for<'a> <T as GpuStruct>::Raw<'a>: Default + Zeroed, +{ + /// Create a new GpuObject with default data. `T` must implement `Default` and `T::Raw` must + /// implement `Zeroed`, since the GPU-side memory is initialized by zeroing. + pub(crate) fn new_default(alloc: U) -> Result<Self> { + GpuObject::<T, U>::new_inplace(alloc, Default::default(), |_inner, raw| { + // SAFETY: `raw` is valid here, and `T::Raw` implements `Zeroed`. + Ok(unsafe { + ptr::write_bytes(raw, 0, 1); + (*raw).assume_init_mut() + }) + }) + } +} + +impl<T: GpuStruct, U: Allocation<T>> Drop for GpuObject<T, U> { + fn drop(&mut self) { + mod_dev_dbg!( + self.alloc.device(), + "Dropping {} @ {:?}\n", + core::any::type_name::<T>(), + self.gpu_pointer() + ); + } +} + +// SAFETY: GpuObjects are Send as long as the GpuStruct itself is Send +unsafe impl<T: GpuStruct + Send, U: Allocation<T>> Send for GpuObject<T, U> {} +// SAFETY: GpuObjects are Send as long as the GpuStruct itself is Send +unsafe impl<T: GpuStruct + Sync, U: Allocation<T>> Sync for GpuObject<T, U> {} + +/// Trait used to erase the type of a GpuObject, used when we need to keep a list of heterogenous +/// objects around. +pub(crate) trait OpaqueGpuObject: Send + Sync { + fn gpu_va(&self) -> NonZeroU64; +} + +impl<T: GpuStruct + Sync + Send, U: Allocation<T>> OpaqueGpuObject for GpuObject<T, U> { + fn gpu_va(&self) -> NonZeroU64 { + Self::gpu_va(self) + } +} + +/// An array of raw GPU objects that is only accessible to the GPU (no CPU-side mapping required). +/// +/// This must necessarily be uninitialized as far as the GPU is concerned, so it cannot be used +/// when initialization is required. +/// +/// # Invariants +/// +/// `alloc` is valid and at least as large as `len` times the size of one `T`. +/// `gpu_ptr` is valid and points to the allocation start. +pub(crate) struct GpuOnlyArray<T, U: Allocation<T>> { + len: usize, + alloc: U, + gpu_ptr: NonZeroU64, + _p: PhantomData<T>, +} + +impl<T, U: Allocation<T>> GpuOnlyArray<T, U> { + /// Allocate a new GPU-only array with the given length. + pub(crate) fn new(alloc: U, count: usize) -> Result<GpuOnlyArray<T, U>> { + let bytes = count * mem::size_of::<T>(); + let gpu_ptr = NonZeroU64::new(alloc.gpu_ptr()).ok_or(EINVAL)?; + if alloc.size() < bytes { + return Err(ENOMEM); + } + Ok(Self { + len: count, + alloc, + gpu_ptr, + _p: PhantomData, + }) + } + + /// Returns the GPU VA of this arraw (as a raw [`NonZeroU64`]) + pub(crate) fn gpu_va(&self) -> NonZeroU64 { + self.gpu_ptr + } + + /// Returns a strong GPU pointer to this array, with a lifetime. + pub(crate) fn gpu_pointer(&self) -> GpuPointer<'_, &'_ [T]> { + GpuPointer(self.gpu_ptr, PhantomData) + } + + /// Returns a weak GPU pointer to this array, with no lifetime. + pub(crate) fn weak_pointer(&self) -> GpuWeakPointer<[T]> { + GpuWeakPointer(self.gpu_ptr, PhantomData) + } + + /// Returns a pointer to an offset within the array (as a subslice). + pub(crate) fn gpu_offset_pointer(&self, offset: usize) -> GpuPointer<'_, &'_ [T]> { + if offset > self.len { + panic!("Index {} out of bounds (len: {})", offset, self.len); + } + GpuPointer( + NonZeroU64::new(self.gpu_ptr.get() + (offset * mem::size_of::<T>()) as u64).unwrap(), + PhantomData, + ) + } + + /* Not used yet + /// Returns a weak pointer to an offset within the array (as a subslice). + pub(crate) fn weak_offset_pointer(&self, offset: usize) -> GpuWeakPointer<[T]> { + if offset > self.len { + panic!("Index {} out of bounds (len: {})", offset, self.len); + } + GpuWeakPointer( + NonZeroU64::new(self.gpu_ptr.get() + (offset * mem::size_of::<T>()) as u64).unwrap(), + PhantomData, + ) + } + + /// Returns a pointer to an element within the array. + pub(crate) fn gpu_item_pointer(&self, index: usize) -> GpuPointer<'_, &'_ T> { + if index >= self.len { + panic!("Index {} out of bounds (len: {})", index, self.len); + } + GpuPointer( + NonZeroU64::new(self.gpu_ptr.get() + (index * mem::size_of::<T>()) as u64).unwrap(), + PhantomData, + ) + } + */ + + /// Returns a weak pointer to an element within the array. + pub(crate) fn weak_item_pointer(&self, index: usize) -> GpuWeakPointer<T> { + if index >= self.len { + panic!("Index {} out of bounds (len: {})", index, self.len); + } + GpuWeakPointer( + NonZeroU64::new(self.gpu_ptr.get() + (index * mem::size_of::<T>()) as u64).unwrap(), + PhantomData, + ) + } + + /// Returns the length of the array. + pub(crate) fn len(&self) -> usize { + self.len + } +} + +impl<T: Debug, U: Allocation<T>> Debug for GpuOnlyArray<T, U> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct(core::any::type_name::<T>()) + .field("len", &format_args!("{:#X?}", self.len())) + .finish() + } +} + +impl<T, U: Allocation<T>> Drop for GpuOnlyArray<T, U> { + fn drop(&mut self) { + mod_dev_dbg!( + self.alloc.device(), + "Dropping {} @ {:?}\n", + core::any::type_name::<T>(), + self.gpu_pointer() + ); + } +} + +/// An array of raw GPU objects that is also CPU-accessible. +/// +/// # Invariants +/// +/// `raw` is valid and points to the CPU-side view of the array (which must have one). +pub(crate) struct GpuArray<T, U: Allocation<T>> { + raw: *mut T, + array: GpuOnlyArray<T, U>, +} + +/* Not used yet +impl<T: Copy, U: Allocation<T>> GpuArray<T, U> { + /// Allocate a new GPU array, copying the contents from a slice. + pub(crate) fn new(alloc: U, data: &[T]) -> Result<GpuArray<T, U>> { + let p = alloc.ptr().ok_or(EINVAL)?.as_ptr(); + let inner = GpuOnlyArray::new(alloc, data.len())?; + // SAFETY: `p` is valid per the Allocation type invariant, and GpuOnlyArray guarantees + // that its size is at least as large as `data.len()`. + unsafe { ptr::copy(data.as_ptr(), p, data.len()) }; + Ok(Self { + raw: p, + array: inner, + }) + } +} +*/ + +impl<T: Default, U: Allocation<T>> GpuArray<T, U> { + /// Allocate a new GPU array, initializing each element to its default. + pub(crate) fn empty(alloc: U, count: usize) -> Result<GpuArray<T, U>> { + let p = alloc.ptr().ok_or(EINVAL)?.as_ptr() as *mut T; + let inner = GpuOnlyArray::new(alloc, count)?; + let mut pi = p; + for _i in 0..count { + // SAFETY: `pi` is valid per the Allocation type invariant, and GpuOnlyArray guarantees + // that it can never iterate beyond the buffer length. + unsafe { + pi.write(Default::default()); + pi = pi.add(1); + } + } + Ok(Self { + raw: p, + array: inner, + }) + } +} + +impl<T, U: Allocation<T>> GpuArray<T, U> { + /// Get a slice view of the array contents. + pub(crate) fn as_slice(&self) -> &[T] { + // SAFETY: self.raw / self.len are valid per the type invariant + unsafe { slice::from_raw_parts(self.raw, self.len) } + } + + /// Get a mutable slice view of the array contents. + pub(crate) fn as_mut_slice(&mut self) -> &mut [T] { + // SAFETY: self.raw / self.len are valid per the type invariant + unsafe { slice::from_raw_parts_mut(self.raw, self.len) } + } +} + +impl<T, U: Allocation<T>> Deref for GpuArray<T, U> { + type Target = GpuOnlyArray<T, U>; + + fn deref(&self) -> &GpuOnlyArray<T, U> { + &self.array + } +} + +impl<T, U: Allocation<T>> Index<usize> for GpuArray<T, U> { + type Output = T; + + fn index(&self, index: usize) -> &T { + if index >= self.len { + panic!("Index {} out of bounds (len: {})", index, self.len); + } + // SAFETY: This is bounds checked above + unsafe { &*(self.raw.add(index)) } + } +} + +impl<T, U: Allocation<T>> IndexMut<usize> for GpuArray<T, U> { + fn index_mut(&mut self, index: usize) -> &mut T { + if index >= self.len { + panic!("Index {} out of bounds (len: {})", index, self.len); + } + // SAFETY: This is bounds checked above + unsafe { &mut *(self.raw.add(index)) } + } +} + +// SAFETY: GpuArray are Send as long as the contained type itself is Send +unsafe impl<T: Send, U: Allocation<T>> Send for GpuArray<T, U> {} +// SAFETY: GpuArray are Sync as long as the contained type itself is Sync +unsafe impl<T: Sync, U: Allocation<T>> Sync for GpuArray<T, U> {} + +impl<T: Debug, U: Allocation<T>> Debug for GpuArray<T, U> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct(core::any::type_name::<T>()) + .field("array", &format_args!("{:#X?}", self.as_slice())) + .finish() + } +} diff --git a/drivers/gpu/drm/asahi/place.rs b/drivers/gpu/drm/asahi/place.rs new file mode 100644 index 000000000000..40c51f4fab8d --- /dev/null +++ b/drivers/gpu/drm/asahi/place.rs @@ -0,0 +1,343 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! "Placement new" macro +//! +//! This cursed abomination of a declarative macro is used to emulate a "placement new" feature, +//! which allows initializing objects directly in a user-provided memory region without first +//! going through the stack. +//! +//! This driver needs to manage several large GPU objects of a fixed layout. Linux kernel stacks are +//! very small, so it is impossible to create these objects on the stack. While the compiler can +//! sometimes optimize away the stack copy and directly instantiate in target memory, this is not +//! guaranteed and not reliable. Therefore, we need some mechanism to ergonomically initialize +//! complex structures directly in a pre-allocated piece of memory. +//! +//! This issue also affects some driver-internal structs which are large/complex enough to overflow +//! the stack. While this can be solved by breaking them up into pieces and using `Box` more +//! liberally, this has performance implications and still isn't very nice. This macro can also be +//! used to solve this issue. +//! +//! # Further reading +//! https://github.com/rust-lang/rust/issues/27779#issuecomment-378416911 +//! https://internals.rust-lang.org/t/removal-of-all-unstable-placement-features/7223 + +/// Initialize a `MaybeUninit` in-place, without constructing the value on the stack first. +/// +/// This macro is analogous to `MaybeUninit::write()`. In other words, +/// `place!(foo, bar)` is equivalent to `MaybeUninit::write(foo, bar)`, except that `bar` is not +/// constructed first, but rather its fields (if it is a structure constructor) are copied one by +/// one into the correct location in the `MaybeUninit`. +/// +/// The macro supports most Rust initialization syntax including type paths, generic arguments, +/// and nested structures. Nested structures are themselves initialized in-place field by field. +/// `..Default::default()` is supported, but this macro converts it to `..Zeroed::zeroed()`, as it +/// initializes those structs by zero-initializing the underlying memory. Usage of +/// `..Default::default()` with a type not implementing `Zeroed` will result in a compile error. +/// +/// Usage: +/// ``` +/// let mut buf = MaybeUninit::uninit(); +/// let mut_ref = place!(&mut buf, MyStruct { +/// b: true, +/// s: String::from("works"), +/// i: str::parse::<i32>("123").unwrap(), +/// v: vec![String::from("works")], +/// x: foo::MyOtherCoolStruct { +/// a: false, +/// b: String::from("Hello, world!"), +/// }, +/// y: foo::MyOtherCoolStruct { +/// a: false, +/// b: String::from("Hello, world!"), +/// }, +/// z: foo::MyCoolGenericStruct::<bool, String> { +/// a: false, +/// b: String::from("Hello, world!"), +/// }, +/// }; +/// // `mut_ref` is now a mutable reference to the `buf`, which is now safely initialized. +/// ``` +/// +/// Based on https://crates.io/crates/place by DianaNites, with contributions by Joshua Barretto. +#[macro_export] +macro_rules! place { + // Top-level struct + (@STRUCT $ptr:ident, _TOP, $typ:path, {$($typ_init:tt)*} { $($fields:tt)* }) => {{ + place!(@STRUCT_ZERO $ptr, {$($typ_init)*} { $($fields)* }); + place!(@STRUCT_CHECK $ptr, {$($typ_init)*} { $($fields)* } { + place!(@FIELDS $ptr, $($fields)*); + }); + }}; + // Nested structure + (@STRUCT $ptr:ident, $f_struct:ident, $typ:path, {$($typ_init:tt)*} { $($fields:tt)* }) => {{ + use core::ptr::addr_of_mut; + let buf = unsafe { addr_of_mut!((*$ptr).$f_struct) }; + place!(@STRUCT_ZERO buf, {$($typ_init)*} { $($fields)* }); + place!(@STRUCT_CHECK $ptr, {$($typ_init)*} { $($fields)* } { + place!(@FIELDS buf, $($fields)*); + }); + }}; + + // Zero-initialize structure if the initializer ends in ..default::Default() + (@STRUCT_ZERO $ptr:ident, {$($typ_init:tt)*} { $($f:ident $(: $v:expr)?),* $(,)? }) => {}; + (@STRUCT_ZERO $ptr:ident, {$($typ_init:tt)*} { $($($f:ident $(: $v:expr)?),*,)? ..Default::default() }) => {{ + // Check that the structure actually implements Zeroed + const _: () = { + fn _check_default() { + let _ = $($typ_init)* { + ..Zeroed::zeroed() + }; + } + }; + use core::ptr; + unsafe { ptr::write_bytes($ptr, 0, 1) }; + + }}; + + // Check that all fields are specified + (@STRUCT_CHECK $ptr:ident, {$($typ_init:tt)*} { $($($f:ident $(: $v:expr)?),*,)? ..Default::default() } {$($body:tt)*}) => { + if false { + #[allow(clippy::redundant_field_names)] + let _x = $($typ_init)* { + $($( + $f $(: $v)? + ),* + ,)? + ..Zeroed::zeroed() + }; + } else { + {$($body)*} + } + }; + (@STRUCT_CHECK $ptr:ident, {$($typ_init:tt)*} { $($f:ident $(: $v:expr)?),* $(,)? } {$($body:tt)*}) => { + if false { + #[allow(clippy::redundant_field_names)] + let _x = $($typ_init)* { + $( + $f $(: $v)? + ),* + }; + } else { + {$($body)*} + } + }; + // Top-level scalar + (@SCALAR $ptr:ident, _TOP, $val:expr) => { + let tmp = $val; + unsafe { $ptr.write(tmp); } + }; + // Regular field + (@SCALAR $ptr:ident, $f:ident, $val:expr) => {{ + use core::ptr::addr_of_mut; + let tmp = $val; + unsafe { addr_of_mut!((*$ptr).$f).write(tmp); } + }}; + // Type-like name followed by braces is a nested structure + (@PARTIAL $ptr:ident, $f:ident, {$($head:tt)*}, {{ $($fields:tt)* } $($tail:tt)*}) => { + place!(@STRUCT $ptr, $f, $($head)*, {$($head)*} { $($fields)* }); + place!(@FIELDS $ptr $($tail)*) + }; + // Type-like name followed by ::ident, append to head + (@PARTIAL $ptr:ident, $f:ident, {$($head:tt)*}, {::$id:ident $($tail:tt)*}) => { + place!(@PARTIAL $ptr, $f, {$($head)* :: $id}, {$($tail)*}); + }; + // Type-like name followed by ::<args>, append to head + (@PARTIAL $ptr:ident, $f:ident, {$($head:tt)*}, {::<$($gen:ty),*> $($tail:tt)*}) => { + place!(@PARTIAL $ptr, $f, {$($head)* :: <$($gen),*>}, {$($tail)*}); + }; + // Type-like name followed by ::<'lifetime>, append to head + (@PARTIAL $ptr:ident, $f:ident, {$($head:tt)*}, {::<$li:lifetime> $($tail:tt)*}) => { + place!(@PARTIAL $ptr, $f, {$($head)* :: <$li>}, {$($tail)*}); + }; + // Anything else, parse it as an expression + (@PARTIAL $ptr:ident, $f:ident, {$($head:tt)*}, {$($tail:tt)*}) => { + place!(@EXPR $ptr, $f, $($head)* $($tail)*) + }; + // Expression followed by more fields + (@EXPR $ptr:ident, $f:ident, $val:expr, $($tail:tt)*) => { + place!(@SCALAR $ptr, $f, $val); + place!(@FIELDS $ptr, $($tail)*) + }; + // Last field expression, without a trailing comma + (@EXPR $ptr:ident, $f:ident, $val:expr) => { + place!(@SCALAR $ptr, $f, $val); + }; + // Field with a value starting with an ident, start incremental type parsing + (@FIELDS $ptr:ident, $f:ident : $id:ident $($tail:tt)*) => { + place!(@PARTIAL $ptr, $f, {$id}, {$($tail)*}); + }; + // Same, but starting with ::ident + (@FIELDS $ptr:ident, $f:ident : ::$id:ident $($tail:tt)*) => { + place!(@PARTIAL $ptr, $f, {::$id}, {$($tail)*}); + }; + // Otherwise, parse it as an expression + (@FIELDS $ptr:ident, $f:ident : $($tail:tt)*) => { + place!(@EXPR $ptr, $f, $($tail)*) + }; + // Default terminating case + (@FIELDS $ptr:ident, ..Default::default() ) => {}; + // Terminating case + (@FIELDS $ptr:ident $(,)? ) => {}; + ( + $buf:expr, + $($val:tt)* + ) => {{ + use core::mem::MaybeUninit; + // Ensures types are correct + let obj: &mut MaybeUninit<_> = $buf; + let top_ptr = obj.as_mut_ptr(); + place!(@FIELDS top_ptr, _TOP: $($val)*); + // SAFETY: All fields have been initialized above + // The compiler ensures that all fields were used, all types were correct, + // and that size and alignment are correct. + unsafe { obj.assume_init_mut() } + }}; +} + +/// Helper macro to get the struct type part of a struct initialization expression. +#[macro_export] +#[doc(hidden)] +macro_rules! get_type { + ($t:ty { $($val:tt)* }) => { + $t + }; +} + +/// Like `Box::try_new(...)`, but with in-place initialization. +#[macro_export] +macro_rules! box_in_place { + ($($val:tt)*) => {{ + use $crate::place; + let b = Box::<$crate::get_type!($($val)*)>::try_new_uninit(); + match b { + Ok(mut p) => { + place!((&mut *p), $($val)*); + Ok(unsafe { p.assume_init() }) + } + Err(e) => Err(e) + } + }}; +} + +// TODO: figure out how to make this run +#[cfg(test)] +mod tests { + use super::*; + use core::mem::MaybeUninit; + + #[derive(Debug, PartialEq)] + struct MyCoolStruct { + b: bool, + s: String, + i: i32, + v: Vec<String>, + x: MyOtherCoolStruct, + y: MyOtherCoolStruct, + z: foo::MyCoolGenericStruct<bool, String>, + } + + #[derive(Debug, PartialEq)] + struct MyDefaultStruct { + b: bool, + i: i32, + j: i16, + } + default_zeroed!(MyDefaultStruct); + + mod foo { + #[derive(Debug, PartialEq)] + pub struct MyOtherCoolStruct { + pub a: bool, + pub b: String, + } + #[derive(Debug, PartialEq)] + pub struct MyCoolGenericStruct<T, U> { + pub a: T, + pub b: U, + } + } + + use foo::MyOtherCoolStruct; + + #[test] + fn test_initialized() { + let mut buf: MaybeUninit<MyCoolStruct> = MaybeUninit::uninit(); + + let x: &mut MyCoolStruct = place!( + &mut buf, + MyCoolStruct { + b: true, + s: String::from("works"), + i: str::parse::<i32>("123").unwrap(), + v: vec![String::from("works")], + x: MyOtherCoolStruct { + a: false, + b: String::from("Hello, world!"), + }, + y: foo::MyOtherCoolStruct { + a: false, + b: String::from("Hello, world!"), + }, + z: foo::MyCoolGenericStruct::<bool, String> { + a: false, + b: String::from("Hello, world!"), + } + } + ); + //dbg!(x); + + assert_eq!( + x, + &MyCoolStruct { + b: true, + s: String::from("works"), + i: str::parse::<i32>("123").unwrap(), + v: vec![String::from("works")], + x: foo::MyOtherCoolStruct { + a: false, + b: String::from("Hello, world!"), + }, + y: foo::MyOtherCoolStruct { + a: false, + b: String::from("Hello, world!"), + }, + z: foo::MyCoolGenericStruct::<bool, String> { + a: false, + b: String::from("Hello, world!"), + }, + }, + ); + } + + #[test] + fn test_default() { + let mut buf: MaybeUninit<MyDefaultStruct> = MaybeUninit::uninit(); + + let x: &mut MyDefaultStruct = place!( + &mut buf, + MyDefaultStruct { + b: true, + i: 1, + ..Default::default() + } + ); + + assert_eq!( + x, + &MyDefaultStruct { + b: true, + i: 1, + j: 0, + }, + ); + } + + #[test] + fn test_scalar() { + let mut buf: MaybeUninit<u32> = MaybeUninit::uninit(); + + let x: &mut u32 = place!(&mut buf, 1234); + + assert_eq!(x, &mut 1234u32); + } +} diff --git a/drivers/gpu/drm/asahi/queue/common.rs b/drivers/gpu/drm/asahi/queue/common.rs new file mode 100644 index 000000000000..127b4ccc6eca --- /dev/null +++ b/drivers/gpu/drm/asahi/queue/common.rs @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Common queue functionality. +//! +//! Shared helpers used by the submission logic for multiple command types. + +use crate::fw::microseq; +use crate::fw::types::*; + +use kernel::bindings; +use kernel::io_buffer::IoBufferReader; +use kernel::prelude::*; +use kernel::user_ptr::UserSlicePtr; + +use core::mem::MaybeUninit; + +pub(super) fn build_attachments(pointer: u64, count: u32) -> Result<microseq::Attachments> { + if count as usize > microseq::MAX_ATTACHMENTS { + return Err(EINVAL); + } + + const STRIDE: usize = core::mem::size_of::<bindings::drm_asahi_attachment>(); + let size = STRIDE * count as usize; + + // SAFETY: We only read this once, so there are no TOCTOU issues. + let mut reader = unsafe { UserSlicePtr::new(pointer as usize as *mut _, size).reader() }; + + let mut attachments: microseq::Attachments = Default::default(); + + for i in 0..count { + let mut att: MaybeUninit<bindings::drm_asahi_attachment> = MaybeUninit::uninit(); + + // SAFETY: The size of `att` is STRIDE + unsafe { reader.read_raw(att.as_mut_ptr() as *mut u8, STRIDE)? }; + + // SAFETY: All bit patterns in the struct are valid + let att = unsafe { att.assume_init() }; + + let cache_lines = (att.size + 127) >> 7; + let order = 1; + attachments.list[i as usize] = microseq::Attachment { + address: U64(att.pointer), + size: cache_lines, + unk_c: 0x17, + unk_e: order, + }; + + attachments.count += 1; + } + + Ok(attachments) +} diff --git a/drivers/gpu/drm/asahi/queue/compute.rs b/drivers/gpu/drm/asahi/queue/compute.rs new file mode 100644 index 000000000000..6590382c75af --- /dev/null +++ b/drivers/gpu/drm/asahi/queue/compute.rs @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +#![allow(clippy::unusual_byte_groupings)] + +//! Compute work queue. +//! +//! A compute queue consists of one underlying WorkQueue. +//! This module is in charge of creating all of the firmware structures required to submit compute +//! work to the GPU, based on the userspace command buffer. + +use super::common; +use crate::alloc::Allocator; +use crate::debug::*; +use crate::fw::types::*; +use crate::gpu::GpuManager; +use crate::{box_in_place, inner_ptr, inner_weak_ptr, place}; +use crate::{fw, gpu, microseq}; +use core::mem::MaybeUninit; +use core::sync::atomic::Ordering; +use kernel::bindings; +use kernel::dma_fence::RawDmaFence; +use kernel::drm::sched::Job; +use kernel::io_buffer::IoBufferReader; +use kernel::prelude::*; +use kernel::sync::Arc; +use kernel::user_ptr::UserSlicePtr; + +const DEBUG_CLASS: DebugFlags = DebugFlags::Compute; + +#[versions(AGX)] +impl super::Queue::ver { + /// Submit work to a compute queue. + pub(super) fn submit_compute( + &self, + job: &mut Job<super::QueueJob::ver>, + cmd: &bindings::drm_asahi_command, + result_writer: Option<super::ResultWriter>, + id: u64, + flush_stamps: bool, + ) -> Result { + if cmd.cmd_type != bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_COMPUTE { + return Err(EINVAL); + } + + let dev = self.dev.data(); + let gpu = match dev.gpu.as_any().downcast_ref::<gpu::GpuManager::ver>() { + Some(gpu) => gpu, + None => { + dev_crit!(self.dev, "GpuManager mismatched with Queue!\n"); + return Err(EIO); + } + }; + + let mut alloc = gpu.alloc(); + let kalloc = &mut *alloc; + + mod_dev_dbg!(self.dev, "[Submission {}] Compute!\n", id); + + let mut cmdbuf_reader = unsafe { + UserSlicePtr::new( + cmd.cmd_buffer as usize as *mut _, + core::mem::size_of::<bindings::drm_asahi_cmd_compute>(), + ) + .reader() + }; + + let mut cmdbuf: MaybeUninit<bindings::drm_asahi_cmd_compute> = MaybeUninit::uninit(); + unsafe { + cmdbuf_reader.read_raw( + cmdbuf.as_mut_ptr() as *mut u8, + core::mem::size_of::<bindings::drm_asahi_cmd_compute>(), + )?; + } + let cmdbuf = unsafe { cmdbuf.assume_init() }; + + if cmdbuf.flags != 0 { + return Err(EINVAL); + } + + // This sequence number increases per new client/VM? assigned to some slot, + // but it's unclear *which* slot... + let slot_client_seq: u8 = (self.id & 0xff) as u8; + + let vm_bind = job.vm_bind.clone(); + + mod_dev_dbg!( + self.dev, + "[Submission {}] VM slot = {}\n", + id, + vm_bind.slot() + ); + + let notifier = self.notifier.clone(); + + let fence = job.fence.clone(); + let comp_job = job.get_comp()?; + let ev_comp = comp_job.event_info(); + + // TODO: Is this the same on all GPUs? Is this really for preemption? + let preempt_size = 0x7fa0; + let preempt2_off = 0x7f80; + let preempt3_off = 0x7f88; + let preempt4_off = 0x7f90; + let preempt5_off = 0x7f98; + + let preempt_buf = self.ualloc.lock().array_empty(preempt_size)?; + + let mut seq_buf = self.ualloc.lock().array_empty(0x800)?; + for i in 1..0x400 { + seq_buf[i] = (i + 1) as u64; + } + + mod_dev_dbg!( + self.dev, + "[Submission {}] Event #{} {:#x?} -> {:#x?}\n", + id, + ev_comp.slot, + ev_comp.value, + ev_comp.value.next(), + ); + + let timestamps = Arc::try_new(kalloc.shared.new_default::<fw::job::JobTimestamps>()?)?; + + let uuid = cmdbuf.cmd_id; + + let unk3 = debug_enabled(debug::DebugFlags::Debug3); + + mod_dev_dbg!(self.dev, "[Submission {}] UUID = {:#x?}\n", id, uuid); + + // TODO: check + #[ver(V >= V13_0B4)] + let count = self.counter.fetch_add(1, Ordering::Relaxed); + + let comp = GpuObject::new_prealloc( + kalloc.private.alloc_object()?, + |ptr: GpuWeakPointer<fw::compute::RunCompute::ver>| { + let mut builder = microseq::Builder::new(); + + let stats = gpu.initdata.runtime_pointers.stats.comp.weak_pointer(); + + let start_comp = builder.add(microseq::StartCompute::ver { + header: microseq::op::StartCompute::HEADER, + unk_pointer: inner_weak_ptr!(ptr, unk_pointee), + job_params1: inner_weak_ptr!(ptr, job_params1), + stats, + work_queue: ev_comp.info_ptr, + vm_slot: vm_bind.slot(), + unk_28: 0x1, + event_generation: self.id as u32, + cmd_seq: U64(ev_comp.cmd_seq), + unk_38: 0x0, + job_params2: inner_weak_ptr!(ptr, job_params2), + unk_44: 0x0, + uuid, + attachments: common::build_attachments( + cmdbuf.attachments, + cmdbuf.attachment_count, + )?, + padding: Default::default(), + #[ver(V >= V13_0B4)] + unk_flag: inner_weak_ptr!(ptr, unk_flag), + #[ver(V >= V13_0B4)] + counter: U64(count), + #[ver(V >= V13_0B4)] + notifier_buf: inner_weak_ptr!(notifier.weak_pointer(), state.unk_buf), + })?; + + if result_writer.is_some() { + builder.add(microseq::Timestamp::ver { + header: microseq::op::Timestamp::new(true), + cur_ts: inner_weak_ptr!(ptr, cur_ts), + start_ts: inner_weak_ptr!(ptr, start_ts), + update_ts: inner_weak_ptr!(ptr, start_ts), + work_queue: ev_comp.info_ptr, + unk_24: U64(0), + #[ver(V >= V13_0B4)] + unk_ts: inner_weak_ptr!(ptr, unk_ts), + uuid, + unk_30_padding: 0, + })?; + } + + builder.add(microseq::WaitForIdle { + header: microseq::op::WaitForIdle::new(microseq::Pipe::Compute), + })?; + + if result_writer.is_some() { + builder.add(microseq::Timestamp::ver { + header: microseq::op::Timestamp::new(false), + cur_ts: inner_weak_ptr!(ptr, cur_ts), + start_ts: inner_weak_ptr!(ptr, start_ts), + update_ts: inner_weak_ptr!(ptr, end_ts), + work_queue: ev_comp.info_ptr, + unk_24: U64(0), + #[ver(V >= V13_0B4)] + unk_ts: inner_weak_ptr!(ptr, unk_ts), + uuid, + unk_30_padding: 0, + })?; + } + + let off = builder.offset_to(start_comp); + builder.add(microseq::FinalizeCompute::ver { + header: microseq::op::FinalizeCompute::HEADER, + stats, + work_queue: ev_comp.info_ptr, + vm_slot: vm_bind.slot(), + #[ver(V < V13_0B4)] + unk_18: 0, + job_params2: inner_weak_ptr!(ptr, job_params2), + unk_24: 0, + uuid, + fw_stamp: ev_comp.fw_stamp_pointer, + stamp_value: ev_comp.value.next(), + unk_38: 0, + unk_3c: 0, + unk_40: 0, + unk_44: 0, + unk_48: 0, + unk_4c: 0, + unk_50: 0, + unk_54: 0, + unk_58: 0, + #[ver(G == G14 && V < V13_0B4)] + unk_5c_g14: U64(0), + restart_branch_offset: off, + unk_60: unk3.into(), + #[ver(V >= V13_0B4)] + unk_64: Default::default(), + #[ver(V >= V13_0B4)] + unk_flag: inner_weak_ptr!(ptr, unk_flag), + #[ver(V >= V13_0B4)] + unk_79: Default::default(), + })?; + + builder.add(microseq::RetireStamp { + header: microseq::op::RetireStamp::HEADER, + })?; + + Ok(box_in_place!(fw::compute::RunCompute::ver { + notifier: notifier.clone(), + preempt_buf: preempt_buf, + seq_buf: seq_buf, + micro_seq: builder.build(&mut kalloc.private)?, + vm_bind: vm_bind.clone(), + timestamps: timestamps.clone(), + })?) + }, + |inner, ptr| { + Ok(place!( + ptr, + fw::compute::raw::RunCompute::ver { + tag: fw::workqueue::CommandType::RunCompute, + #[ver(V >= V13_0B4)] + counter: U64(count), + unk_4: 0, + vm_slot: vm_bind.slot(), + notifier: inner.notifier.gpu_pointer(), + unk_pointee: Default::default(), + job_params1: fw::compute::raw::JobParameters1 { + preempt_buf1: inner.preempt_buf.gpu_pointer(), + encoder: U64(cmdbuf.encoder_ptr), + // buf2-5 Only if internal program is used + preempt_buf2: inner.preempt_buf.gpu_offset_pointer(preempt2_off), + preempt_buf3: inner.preempt_buf.gpu_offset_pointer(preempt3_off), + preempt_buf4: inner.preempt_buf.gpu_offset_pointer(preempt4_off), + preempt_buf5: inner.preempt_buf.gpu_offset_pointer(preempt5_off), + pipeline_base: U64(0x11_00000000), + unk_38: U64(0x8c60), + unk_40: cmdbuf.ctx_switch_prog, // Internal program addr | 1 + unk_44: 0, + compute_layout_addr: U64(cmdbuf.buffer_descriptor), // Only if internal program used + unk_50: cmdbuf.buffer_descriptor_size, // 0x40 if internal program used + unk_54: 0, + unk_58: 1, + unk_5c: 0, + iogpu_unk_40: cmdbuf.iogpu_unk_40, // 0x1c if internal program used + }, + unk_b8: Default::default(), + microsequence: inner.micro_seq.gpu_pointer(), + microsequence_size: inner.micro_seq.len() as u32, + job_params2: fw::compute::raw::JobParameters2::ver { + #[ver(V >= V13_0B4)] + unk_0_0: 0, + unk_0: Default::default(), + preempt_buf1: inner.preempt_buf.gpu_pointer(), + encoder_end: U64(cmdbuf.encoder_end), + unk_34: Default::default(), + #[ver(V < V13_0B4)] + unk_5c: 0, + }, + encoder_params: fw::job::raw::EncoderParams { + unk_8: 0x0, // fixed + unk_c: 0x0, // fixed + unk_10: 0x0, // fixed + encoder_id: cmdbuf.encoder_id, + unk_18: 0x0, // fixed + iogpu_compute_unk44: cmdbuf.iogpu_unk_44, + seq_buffer: inner.seq_buf.gpu_pointer(), + unk_28: U64(0x0), // fixed + }, + meta: fw::job::raw::JobMeta { + unk_4: 0, + stamp: ev_comp.stamp_pointer, + fw_stamp: ev_comp.fw_stamp_pointer, + stamp_value: ev_comp.value.next(), + stamp_slot: ev_comp.slot, + evctl_index: 0, // fixed + flush_stamps: flush_stamps as u32, + uuid: uuid, + cmd_seq: ev_comp.cmd_seq as u32, + }, + cur_ts: U64(0), + start_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), start)), + end_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), end)), + unk_2c0: 0, + unk_2c4: 0, + unk_2c8: 0, + unk_2cc: 0, + client_sequence: slot_client_seq, + pad_2d1: Default::default(), + unk_2d4: 0, + unk_2d8: 0, + #[ver(V >= V13_0B4)] + unk_ts: U64(0), + #[ver(V >= V13_0B4)] + unk_2e1: Default::default(), + #[ver(V >= V13_0B4)] + unk_flag: U32(0), + #[ver(V >= V13_0B4)] + unk_pad: Default::default(), + } + )) + }, + )?; + + core::mem::drop(alloc); + + fence.add_command(); + comp_job.add_cb(comp, vm_bind.slot(), move |cmd, error| { + if let Some(err) = error { + fence.set_error(err.into()) + } + if let Some(mut rw) = result_writer { + let mut result: bindings::drm_asahi_result_compute = Default::default(); + + cmd.timestamps.with(|raw, _inner| { + result.ts_start = raw.start.load(Ordering::Relaxed); + result.ts_end = raw.end.load(Ordering::Relaxed); + }); + + if let Some(err) = error { + result.info = err.into(); + } else { + result.info.status = bindings::drm_asahi_status_DRM_ASAHI_STATUS_COMPLETE; + } + + rw.write(result); + } + + fence.command_complete(); + })?; + + notifier.threshold.with(|raw, _inner| { + raw.increment(); + }); + + comp_job.next_seq(); + + Ok(()) + } +} diff --git a/drivers/gpu/drm/asahi/queue/mod.rs b/drivers/gpu/drm/asahi/queue/mod.rs new file mode 100644 index 000000000000..15988af33cf3 --- /dev/null +++ b/drivers/gpu/drm/asahi/queue/mod.rs @@ -0,0 +1,725 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Submission queue management +//! +//! This module implements the userspace view of submission queues and the logic to map userspace +//! submissions to firmware queues. + +use kernel::dma_fence::*; +use kernel::prelude::*; +use kernel::{ + bindings, c_str, dma_fence, + drm::gem::shmem::VMap, + drm::sched, + macros::versions, + sync::{smutex::Mutex, Arc}, +}; + +use crate::alloc::Allocator; +use crate::debug::*; +use crate::driver::AsahiDevice; +use crate::fw::types::*; +use crate::gpu::GpuManager; +use crate::{alloc, buffer, channel, event, file, fw, gem, gpu, mmu, workqueue}; +use crate::{inner_weak_ptr, place}; + +use core::mem::MaybeUninit; +use core::sync::atomic::{AtomicU64, Ordering}; + +const DEBUG_CLASS: DebugFlags = DebugFlags::Queue; + +const WQ_SIZE: u32 = 0x500; + +mod common; +mod compute; +mod render; + +/// Trait implemented by all versioned queues. +pub(crate) trait Queue: Send + Sync { + fn submit( + &mut self, + id: u64, + in_syncs: Vec<file::SyncItem>, + out_syncs: Vec<file::SyncItem>, + result_buf: Option<gem::ObjectRef>, + commands: Vec<bindings::drm_asahi_command>, + ) -> Result; +} + +#[versions(AGX)] +struct SubQueue { + wq: Arc<workqueue::WorkQueue::ver>, +} + +#[versions(AGX)] +impl SubQueue::ver { + fn new_job(&mut self) -> SubQueueJob::ver { + SubQueueJob::ver { + wq: self.wq.clone(), + job: None, + } + } +} + +#[versions(AGX)] +struct SubQueueJob { + wq: Arc<workqueue::WorkQueue::ver>, + job: Option<workqueue::Job::ver>, +} + +#[versions(AGX)] +impl SubQueueJob::ver { + fn get(&mut self) -> Result<&mut workqueue::Job::ver> { + if self.job.is_none() { + mod_pr_debug!("SubQueueJob: Creating {:?} job\n", self.wq.pipe_type()); + self.job.replace(self.wq.new_job()?); + } + Ok(self.job.as_mut().expect("expected a Job")) + } + + fn commit(&mut self) -> Result { + match self.job.as_mut() { + Some(job) => job.commit(), + None => Ok(()), + } + } + + fn can_submit(&self) -> bool { + match self.job.as_ref() { + None => true, + Some(job) => job.can_submit(), + } + } +} + +#[versions(AGX)] +pub(crate) struct Queue { + dev: AsahiDevice, + _sched: sched::Scheduler<QueueJob::ver>, + entity: sched::Entity<QueueJob::ver>, + vm: mmu::Vm, + ualloc: Arc<Mutex<alloc::DefaultAllocator>>, + q_vtx: Option<SubQueue::ver>, + q_frag: Option<SubQueue::ver>, + q_comp: Option<SubQueue::ver>, + buffer: Option<Mutex<buffer::Buffer::ver>>, + gpu_context: Arc<workqueue::GpuContext>, + notifier_list: Arc<GpuObject<fw::event::NotifierList>>, + notifier: Arc<GpuObject<fw::event::Notifier::ver>>, + id: u64, + fence_ctx: FenceContexts, + #[ver(V >= V13_0B4)] + counter: AtomicU64, +} + +#[versions(AGX)] +#[derive(Default)] +pub(crate) struct JobFence { + id: u64, + pending: AtomicU64, +} + +#[versions(AGX)] +impl JobFence::ver { + fn add_command(self: &FenceObject<Self>) { + self.pending.fetch_add(1, Ordering::Relaxed); + } + + fn command_complete(self: &FenceObject<Self>) { + let remain = self.pending.fetch_sub(1, Ordering::Relaxed) - 1; + mod_pr_debug!( + "JobFence[{}]: Command complete (remain: {})\n", + self.id, + remain + ); + if remain == 0 { + mod_pr_debug!("JobFence[{}]: Signaling\n", self.id); + if self.signal().is_err() { + pr_err!("JobFence[{}]: Fence signal failed\n", self.id); + } + } + } +} + +#[versions(AGX)] +#[vtable] +impl dma_fence::FenceOps for JobFence::ver { + const USE_64BIT_SEQNO: bool = true; + + fn get_driver_name<'a>(self: &'a FenceObject<Self>) -> &'a CStr { + c_str!("asahi") + } + fn get_timeline_name<'a>(self: &'a FenceObject<Self>) -> &'a CStr { + c_str!("queue") + } +} + +#[versions(AGX)] +pub(crate) struct QueueJob { + dev: AsahiDevice, + vm_bind: mmu::VmBind, + op_guard: Option<gpu::OpGuard>, + sj_vtx: Option<SubQueueJob::ver>, + sj_frag: Option<SubQueueJob::ver>, + sj_comp: Option<SubQueueJob::ver>, + fence: UserFence<JobFence::ver>, + did_run: bool, + id: u64, +} + +#[versions(AGX)] +impl QueueJob::ver { + fn get_vtx(&mut self) -> Result<&mut workqueue::Job::ver> { + self.sj_vtx.as_mut().ok_or(EINVAL)?.get() + } + fn get_frag(&mut self) -> Result<&mut workqueue::Job::ver> { + self.sj_frag.as_mut().ok_or(EINVAL)?.get() + } + fn get_comp(&mut self) -> Result<&mut workqueue::Job::ver> { + self.sj_comp.as_mut().ok_or(EINVAL)?.get() + } + + fn commit(&mut self) -> Result { + mod_dev_dbg!(self.dev, "QueueJob: Committing\n"); + + self.sj_vtx.as_mut().map(|a| a.commit()).unwrap_or(Ok(()))?; + self.sj_frag + .as_mut() + .map(|a| a.commit()) + .unwrap_or(Ok(()))?; + self.sj_comp.as_mut().map(|a| a.commit()).unwrap_or(Ok(())) + } +} + +#[versions(AGX)] +impl sched::JobImpl for QueueJob::ver { + fn can_run(job: &mut sched::Job<Self>) -> bool { + mod_dev_dbg!(job.dev, "QueueJob {}: Checking runnability\n", job.id); + + if let Some(sj) = job.sj_vtx.as_ref() { + if !sj.can_submit() { + mod_dev_dbg!( + job.dev, + "QueueJob {}: Blocking due to vertex queue full\n", + job.id + ); + return false; + } + } + if let Some(sj) = job.sj_frag.as_ref() { + if !sj.can_submit() { + mod_dev_dbg!( + job.dev, + "QueueJob {}: Blocking due to fragment queue full\n", + job.id + ); + return false; + } + } + if let Some(sj) = job.sj_comp.as_ref() { + if !sj.can_submit() { + mod_dev_dbg!( + job.dev, + "QueueJob {}: Blocking due to compute queue full\n", + job.id + ); + return false; + } + } + true + } + + #[allow(unused_assignments)] + fn run(job: &mut sched::Job<Self>) -> Result<Option<dma_fence::Fence>> { + mod_dev_dbg!(job.dev, "QueueJob {}: Running Job\n", job.id); + + let dev = job.dev.data(); + let gpu = match dev + .gpu + .clone() + .arc_as_any() + .downcast::<gpu::GpuManager::ver>() + { + Ok(gpu) => gpu, + Err(_) => { + dev_crit!(job.dev, "GpuManager mismatched with QueueJob!\n"); + return Err(EIO); + } + }; + + if job.op_guard.is_none() { + job.op_guard = Some(gpu.start_op()?); + } + + // First submit all the commands for each queue. This can fail. + + let mut frag_job = None; + let mut frag_sub = None; + if let Some(sj) = job.sj_frag.as_mut() { + frag_job = sj.job.take(); + if let Some(wqjob) = frag_job.as_mut() { + mod_dev_dbg!(job.dev, "QueueJob {}: Submit fragment\n", job.id); + frag_sub = Some(wqjob.submit()?); + } + } + + let mut vtx_job = None; + let mut vtx_sub = None; + if let Some(sj) = job.sj_vtx.as_mut() { + vtx_job = sj.job.take(); + if let Some(wqjob) = vtx_job.as_mut() { + mod_dev_dbg!(job.dev, "QueueJob {}: Submit vertex\n", job.id); + vtx_sub = Some(wqjob.submit()?); + } + } + + let mut comp_job = None; + let mut comp_sub = None; + if let Some(sj) = job.sj_comp.as_mut() { + comp_job = sj.job.take(); + if let Some(wqjob) = comp_job.as_mut() { + mod_dev_dbg!(job.dev, "QueueJob {}: Submit compute\n", job.id); + comp_sub = Some(wqjob.submit()?); + } + } + + // Now we fully commit to running the job + mod_dev_dbg!(job.dev, "QueueJob {}: Run fragment\n", job.id); + frag_sub.map(|a| gpu.run_job(a)).transpose()?; + + mod_dev_dbg!(job.dev, "QueueJob {}: Run vertex\n", job.id); + vtx_sub.map(|a| gpu.run_job(a)).transpose()?; + + mod_dev_dbg!(job.dev, "QueueJob {}: Run compute\n", job.id); + comp_sub.map(|a| gpu.run_job(a)).transpose()?; + + mod_dev_dbg!(job.dev, "QueueJob {}: Drop compute job\n", job.id); + core::mem::drop(comp_job); + mod_dev_dbg!(job.dev, "QueueJob {}: Drop vertex job\n", job.id); + core::mem::drop(vtx_job); + mod_dev_dbg!(job.dev, "QueueJob {}: Drop fragment job\n", job.id); + core::mem::drop(frag_job); + + job.did_run = true; + + Ok(Some(Fence::from_fence(&job.fence))) + } + + fn timed_out(job: &mut sched::Job<Self>) -> sched::Status { + // FIXME: Handle timeouts properly + dev_err!( + job.dev, + "QueueJob {}: Job timed out on the DRM scheduler, things will probably break (ran: {})\n", + job.id, job.did_run + ); + sched::Status::NoDevice + } +} + +#[versions(AGX)] +impl Drop for QueueJob::ver { + fn drop(&mut self) { + mod_dev_dbg!(self.dev, "QueueJob {}: Dropping\n", self.id); + } +} + +struct ResultWriter { + vmap: VMap<gem::DriverObject>, + offset: usize, + len: usize, +} + +impl ResultWriter { + fn write<T>(&mut self, mut value: T) { + let p: *mut u8 = &mut value as *mut _ as *mut u8; + // SAFETY: We know `p` points to a type T of that size, and UAPI types must have + // no padding and all bit patterns valid. + let slice = unsafe { core::slice::from_raw_parts_mut(p, core::mem::size_of::<T>()) }; + let len = slice.len().min(self.len); + self.vmap.as_mut_slice()[self.offset..self.offset + len].copy_from_slice(&slice[..len]); + } +} + +static QUEUE_NAME: &CStr = c_str!("asahi_fence"); +static QUEUE_CLASS_KEY: kernel::sync::LockClassKey = kernel::sync::LockClassKey::new(); + +#[versions(AGX)] +impl Queue::ver { + /// Create a new user queue. + #[allow(clippy::too_many_arguments)] + pub(crate) fn new( + dev: &AsahiDevice, + vm: mmu::Vm, + alloc: &mut gpu::KernelAllocators, + ualloc: Arc<Mutex<alloc::DefaultAllocator>>, + ualloc_priv: Arc<Mutex<alloc::DefaultAllocator>>, + event_manager: Arc<event::EventManager>, + mgr: &buffer::BufferManager, + id: u64, + priority: u32, + caps: u32, + ) -> Result<Queue::ver> { + mod_dev_dbg!(dev, "[Queue {}] Creating queue\n", id); + + let data = dev.data(); + + let mut notifier_list = alloc.private.new_default::<fw::event::NotifierList>()?; + + let self_ptr = notifier_list.weak_pointer(); + notifier_list.with_mut(|raw, _inner| { + raw.list_head.next = Some(inner_weak_ptr!(self_ptr, list_head)); + }); + + let threshold = alloc.shared.new_default::<fw::event::Threshold>()?; + + let notifier: Arc<GpuObject<fw::event::Notifier::ver>> = + Arc::try_new(alloc.private.new_inplace( + fw::event::Notifier::ver { threshold }, + |inner, ptr: &mut MaybeUninit<fw::event::raw::Notifier::ver<'_>>| { + Ok(place!( + ptr, + fw::event::raw::Notifier::ver { + threshold: inner.threshold.gpu_pointer(), + generation: AtomicU32::new(id as u32), + cur_count: AtomicU32::new(0), + unk_10: AtomicU32::new(0x50), + state: Default::default() + } + )) + }, + )?)?; + + let sched = sched::Scheduler::new(dev, WQ_SIZE, 0, 100000, c_str!("asahi_sched"))?; + // Priorities are handled by the AGX scheduler, there is no meaning within a + // per-queue scheduler. + let entity = sched::Entity::new(&sched, sched::Priority::Normal)?; + + let mut ret = Queue::ver { + dev: dev.clone(), + _sched: sched, + entity, + vm, + ualloc, + q_vtx: None, + q_frag: None, + q_comp: None, + buffer: None, + gpu_context: Arc::try_new(workqueue::GpuContext::new(dev, alloc)?)?, + notifier_list: Arc::try_new(notifier_list)?, + notifier, + id, + fence_ctx: FenceContexts::new(1, QUEUE_NAME, &QUEUE_CLASS_KEY)?, + #[ver(V >= V13_0B4)] + counter: AtomicU64::new(0), + }; + + // Rendering structures + if caps & bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_RENDER != 0 { + let buffer = + buffer::Buffer::ver::new(&*data.gpu, alloc, ret.ualloc.clone(), ualloc_priv, mgr)?; + let tvb_blocks = { + let lock = crate::THIS_MODULE.kernel_param_lock(); + *crate::initial_tvb_size.read(&lock) + }; + + buffer.ensure_blocks(tvb_blocks)?; + + ret.buffer = Some(Mutex::new(buffer)); + ret.q_vtx = Some(SubQueue::ver { + wq: workqueue::WorkQueue::ver::new( + alloc, + event_manager.clone(), + ret.gpu_context.clone(), + ret.notifier_list.clone(), + channel::PipeType::Vertex, + id, + priority, + WQ_SIZE, + )?, + }); + } + + // Rendering & blit structures + if caps + & (bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_RENDER + | bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_BLIT) + != 0 + { + ret.q_frag = Some(SubQueue::ver { + wq: workqueue::WorkQueue::ver::new( + alloc, + event_manager.clone(), + ret.gpu_context.clone(), + ret.notifier_list.clone(), + channel::PipeType::Fragment, + id, + priority, + WQ_SIZE, + )?, + }); + } + + // Compute structures + if caps & bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_COMPUTE != 0 { + ret.q_comp = Some(SubQueue::ver { + wq: workqueue::WorkQueue::ver::new( + alloc, + event_manager, + ret.gpu_context.clone(), + ret.notifier_list.clone(), + channel::PipeType::Compute, + id, + priority, + WQ_SIZE, + )?, + }); + } + + mod_dev_dbg!(dev, "[Queue {}] Queue created\n", id); + Ok(ret) + } +} + +const SQ_RENDER: usize = bindings::drm_asahi_subqueue_DRM_ASAHI_SUBQUEUE_RENDER as usize; +const SQ_COMPUTE: usize = bindings::drm_asahi_subqueue_DRM_ASAHI_SUBQUEUE_COMPUTE as usize; +const SQ_COUNT: usize = bindings::drm_asahi_subqueue_DRM_ASAHI_SUBQUEUE_COUNT as usize; + +#[versions(AGX)] +impl Queue for Queue::ver { + fn submit( + &mut self, + id: u64, + in_syncs: Vec<file::SyncItem>, + out_syncs: Vec<file::SyncItem>, + result_buf: Option<gem::ObjectRef>, + commands: Vec<bindings::drm_asahi_command>, + ) -> Result { + let dev = self.dev.data(); + let gpu = match dev + .gpu + .clone() + .arc_as_any() + .downcast::<gpu::GpuManager::ver>() + { + Ok(gpu) => gpu, + Err(_) => { + dev_crit!(self.dev, "GpuManager mismatched with JobImpl!\n"); + return Err(EIO); + } + }; + + mod_dev_dbg!(self.dev, "[Submission {}] Submit job\n", id); + + if gpu.is_crashed() { + dev_err!( + self.dev, + "[Submission {}] GPU is crashed, cannot submit\n", + id + ); + return Err(ENODEV); + } + + // Empty submissions are not legal + if commands.is_empty() { + return Err(EINVAL); + } + + let op_guard = if !in_syncs.is_empty() { + Some(gpu.start_op()?) + } else { + None + }; + + let mut events: [Vec<Option<workqueue::QueueEventInfo::ver>>; SQ_COUNT] = + Default::default(); + + events[SQ_RENDER].try_push(self.q_frag.as_ref().and_then(|a| a.wq.event_info()))?; + events[SQ_COMPUTE].try_push(self.q_comp.as_ref().and_then(|a| a.wq.event_info()))?; + + let vm_bind = gpu.bind_vm(&self.vm)?; + let vm_slot = vm_bind.slot(); + + mod_dev_dbg!(self.dev, "[Submission {}] Creating job\n", id); + let mut job = self.entity.new_job(QueueJob::ver { + dev: self.dev.clone(), + vm_bind, + op_guard, + sj_vtx: self.q_vtx.as_mut().map(|a| a.new_job()), + sj_frag: self.q_frag.as_mut().map(|a| a.new_job()), + sj_comp: self.q_comp.as_mut().map(|a| a.new_job()), + fence: self + .fence_ctx + .new_fence::<JobFence::ver>( + 0, + JobFence::ver { + id, + pending: Default::default(), + }, + )? + .into(), + did_run: false, + id, + })?; + + mod_dev_dbg!( + self.dev, + "[Submission {}] Adding {} in_syncs\n", + id, + in_syncs.len() + ); + for sync in in_syncs { + job.add_dependency(sync.fence.expect("in_sync missing fence"))?; + } + + let mut last_render = None; + let mut last_compute = None; + + for (i, cmd) in commands.iter().enumerate() { + match cmd.cmd_type { + bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_RENDER => last_render = Some(i), + bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_COMPUTE => last_compute = Some(i), + _ => return Err(EINVAL), + } + } + + mod_dev_dbg!( + self.dev, + "[Submission {}] Submitting {} commands\n", + id, + commands.len() + ); + for (i, cmd) in commands.into_iter().enumerate() { + for (queue_idx, index) in cmd.barriers.iter().enumerate() { + if *index == bindings::DRM_ASAHI_BARRIER_NONE as u32 { + continue; + } + if let Some(event) = events[queue_idx].get(*index as usize).ok_or(EINVAL)? { + let mut alloc = gpu.alloc(); + let queue_job = match cmd.cmd_type { + bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_RENDER => job.get_vtx()?, + bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_COMPUTE => job.get_comp()?, + _ => return Err(EINVAL), + }; + mod_dev_dbg!(self.dev, "[Submission {}] Create Explicit Barrier\n", id); + let barrier: GpuObject<fw::workqueue::Barrier> = alloc.private.new_inplace( + Default::default(), + |_inner, ptr: &mut MaybeUninit<fw::workqueue::raw::Barrier>| { + Ok(place!( + ptr, + fw::workqueue::raw::Barrier { + tag: fw::workqueue::CommandType::Barrier, + wait_stamp: event.fw_stamp_pointer, + wait_value: event.value, + wait_slot: event.slot, + stamp_self: queue_job.event_info().value.next(), + uuid: 0xffffbbbb, + unk: 0, + } + )) + }, + )?; + mod_dev_dbg!(self.dev, "[Submission {}] Add Explicit Barrier\n", id); + queue_job.add(barrier, vm_slot)?; + } else { + assert!(*index == 0); + } + } + + let result_writer = match result_buf.as_ref() { + None => { + if cmd.result_offset != 0 || cmd.result_size != 0 { + return Err(EINVAL); + } + None + } + Some(buf) => { + if cmd.result_size != 0 { + if cmd + .result_offset + .checked_add(cmd.result_size) + .ok_or(EINVAL)? + > buf.size() as u64 + { + return Err(EINVAL); + } + Some(ResultWriter { + vmap: buf.gem.vmap()?, + offset: cmd.result_offset.try_into()?, + len: cmd.result_size.try_into()?, + }) + } else { + None + } + } + }; + + match cmd.cmd_type { + bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_RENDER => { + self.submit_render( + &mut job, + &cmd, + result_writer, + id, + last_render.unwrap() == i, + )?; + events[SQ_RENDER].try_push(Some( + job.sj_frag + .as_ref() + .expect("No frag queue?") + .job + .as_ref() + .expect("No frag job?") + .event_info(), + ))?; + } + bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_COMPUTE => { + self.submit_compute( + &mut job, + &cmd, + result_writer, + id, + last_compute.unwrap() == i, + )?; + events[SQ_COMPUTE].try_push(Some( + job.sj_comp + .as_ref() + .expect("No comp queue?") + .job + .as_ref() + .expect("No comp job?") + .event_info(), + ))?; + } + _ => return Err(EINVAL), + } + } + + mod_dev_dbg!(self.dev, "Queue: Committing job\n"); + job.commit()?; + + mod_dev_dbg!(self.dev, "Queue: Arming job\n"); + let job = job.arm(); + let out_fence = job.fences().finished(); + mod_dev_dbg!(self.dev, "Queue: Pushing job\n"); + job.push(); + + mod_dev_dbg!(self.dev, "Queue: Adding {} out_syncs\n", out_syncs.len()); + for mut sync in out_syncs { + if let Some(chain) = sync.chain_fence.take() { + sync.syncobj + .add_point(chain, &out_fence, sync.timeline_value); + } else { + sync.syncobj.replace_fence(Some(&out_fence)); + } + } + + Ok(()) + } +} + +#[versions(AGX)] +impl Drop for Queue::ver { + fn drop(&mut self) { + mod_dev_dbg!(self.dev, "[Queue {}] Dropping queue\n", self.id); + } +} diff --git a/drivers/gpu/drm/asahi/queue/render.rs b/drivers/gpu/drm/asahi/queue/render.rs new file mode 100644 index 000000000000..318c952df020 --- /dev/null +++ b/drivers/gpu/drm/asahi/queue/render.rs @@ -0,0 +1,1173 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT +#![allow(clippy::unusual_byte_groupings)] + +//! Render work queue. +//! +//! A render queue consists of two underlying WorkQueues, one for vertex and one for fragment work. +//! This module is in charge of creating all of the firmware structures required to submit 3D +//! rendering work to the GPU, based on the userspace command buffer. + +use super::common; +use crate::alloc::Allocator; +use crate::debug::*; +use crate::fw::types::*; +use crate::gpu::GpuManager; +use crate::util::*; +use crate::workqueue::WorkError; +use crate::{box_in_place, inner_ptr, inner_weak_ptr, place}; +use crate::{buffer, fw, gpu, microseq, workqueue}; +use core::mem::MaybeUninit; +use core::sync::atomic::Ordering; +use kernel::bindings; +use kernel::dma_fence::RawDmaFence; +use kernel::drm::sched::Job; +use kernel::io_buffer::IoBufferReader; +use kernel::prelude::*; +use kernel::sync::{smutex::Mutex, Arc}; +use kernel::user_ptr::UserSlicePtr; + +const DEBUG_CLASS: DebugFlags = DebugFlags::Render; + +/// Tiling/Vertex control bit to disable using more than one GPU cluster. This results in decreased +/// throughput but also less latency, which is probably desirable for light vertex loads where the +/// overhead of clustering/merging would exceed the time it takes to just run the job on one +/// cluster. +const TILECTL_DISABLE_CLUSTERING: u32 = 1u32 << 0; + +struct RenderResult { + result: bindings::drm_asahi_result_render, + vtx_complete: bool, + frag_complete: bool, + vtx_error: Option<workqueue::WorkError>, + frag_error: Option<workqueue::WorkError>, + writer: super::ResultWriter, +} + +impl RenderResult { + fn commit(&mut self) { + if !self.vtx_complete || !self.frag_complete { + return; + } + + let mut error = self.vtx_error.take(); + if let Some(frag_error) = self.frag_error.take() { + if error.is_none() || error == Some(WorkError::Killed) { + error = Some(frag_error); + } + } + + if let Some(err) = error { + self.result.info = err.into(); + } else { + self.result.info.status = bindings::drm_asahi_status_DRM_ASAHI_STATUS_COMPLETE; + } + + self.writer.write(self.result); + } +} + +#[versions(AGX)] +impl super::Queue::ver { + /// Get the appropriate tiling parameters for a given userspace command buffer. + fn get_tiling_params( + cmdbuf: &bindings::drm_asahi_cmd_render, + num_clusters: u32, + ) -> Result<buffer::TileInfo> { + let width: u32 = cmdbuf.fb_width; + let height: u32 = cmdbuf.fb_height; + let layers: u32 = cmdbuf.layers; + + if width > 65536 || height > 65536 { + return Err(EINVAL); + } + + if layers == 0 || layers > 2048 { + return Err(EINVAL); + } + + let tile_width = 32u32; + let tile_height = 32u32; + + let utile_width = cmdbuf.utile_width; + let utile_height = cmdbuf.utile_height; + + match (utile_width, utile_height) { + (32, 32) | (32, 16) | (16, 16) => (), + _ => return Err(EINVAL), + }; + + let utiles_per_tile_x = tile_width / utile_width; + let utiles_per_tile_y = tile_height / utile_height; + + let utiles_per_tile = utiles_per_tile_x * utiles_per_tile_y; + + let tiles_x = (width + tile_width - 1) / tile_width; + let tiles_y = (height + tile_height - 1) / tile_height; + let tiles = tiles_x * tiles_y; + + let mtiles_x = 4u32; + let mtiles_y = 4u32; + let mtiles = mtiles_x * mtiles_y; + + // TODO: *samples + let tiles_per_mtile_x = align(div_ceil(tiles_x, mtiles_x), 4); + let tiles_per_mtile_y = align(div_ceil(tiles_y, mtiles_y), 4); + let tiles_per_mtile = tiles_per_mtile_x * tiles_per_mtile_y; + + let mtile_x1 = tiles_per_mtile_x; + let mtile_x2 = 2 * tiles_per_mtile_x; + let mtile_x3 = 3 * tiles_per_mtile_x; + + let mtile_y1 = tiles_per_mtile_y; + let mtile_y2 = 2 * tiles_per_mtile_y; + let mtile_y3 = 3 * tiles_per_mtile_y; + + let rgn_entry_size = 5; + // Macrotile stride in 32-bit words + let rgn_size = align(rgn_entry_size * tiles_per_mtile * utiles_per_tile, 4) / 4; + let tilemap_size = (4 * rgn_size * mtiles * layers) as usize; + + let tpc_entry_size = 8; + // TPC stride in 32-bit words + let tpc_mtile_stride = tpc_entry_size * utiles_per_tile * tiles_per_mtile / 4; + let tpc_size = (num_clusters * (4 * tpc_mtile_stride * mtiles) * layers) as usize; + + // No idea where this comes from, but it fits what macOS does... + // TODO: layers? + let meta1_blocks = if num_clusters > 1 { + div_ceil(align(tiles_x, 2) * align(tiles_y, 4), 0x1980) + } else { + 0 + }; + + let min_tvb_blocks = + div_ceil(tiles_x * tiles_y, 128).max(if num_clusters > 1 { 9 } else { 8 }) as usize; + + // Sometimes clustering seems to use twice the cluster tilemap count + // and twice the meta4 size. TODO: Is this random or can we calculate + // it somehow??? Does it go higher??? + let cluster_factor = 2; + + Ok(buffer::TileInfo { + tiles_x, + tiles_y, + tiles, + utile_width, + utile_height, + //mtiles_x, + //mtiles_y, + tiles_per_mtile_x, + tiles_per_mtile_y, + //tiles_per_mtile, + utiles_per_mtile_x: tiles_per_mtile_x * utiles_per_tile_x, + utiles_per_mtile_y: tiles_per_mtile_y * utiles_per_tile_y, + //utiles_per_mtile: tiles_per_mtile * utiles_per_tile, + tilemap_size, + tpc_size, + meta1_blocks, + min_tvb_blocks, + cluster_factor, + params: fw::vertex::raw::TilingParameters { + rgn_size, + unk_4: 0x88, + ppp_ctrl: cmdbuf.ppp_ctrl, + x_max: (width - 1) as u16, + y_max: (height - 1) as u16, + te_screen: ((tiles_y - 1) << 12) | (tiles_x - 1), + te_mtile1: mtile_x3 | (mtile_x2 << 9) | (mtile_x1 << 18), + te_mtile2: mtile_y3 | (mtile_y2 << 9) | (mtile_y1 << 18), + tiles_per_mtile, + tpc_stride: tpc_mtile_stride, + unk_24: 0x100, + unk_28: if layers > 1 { + 0xe000 | (layers - 1) + } else { + 0x8000 + }, + }, + }) + } + + /// Submit work to a render queue. + pub(super) fn submit_render( + &self, + job: &mut Job<super::QueueJob::ver>, + cmd: &bindings::drm_asahi_command, + result_writer: Option<super::ResultWriter>, + id: u64, + flush_stamps: bool, + ) -> Result { + if cmd.cmd_type != bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_RENDER { + return Err(EINVAL); + } + + mod_dev_dbg!(self.dev, "[Submission {}] Render!\n", id); + + let mut cmdbuf_reader = unsafe { + UserSlicePtr::new( + cmd.cmd_buffer as usize as *mut _, + core::mem::size_of::<bindings::drm_asahi_cmd_render>(), + ) + .reader() + }; + + let mut cmdbuf: MaybeUninit<bindings::drm_asahi_cmd_render> = MaybeUninit::uninit(); + unsafe { + cmdbuf_reader.read_raw( + cmdbuf.as_mut_ptr() as *mut u8, + core::mem::size_of::<bindings::drm_asahi_cmd_render>(), + )?; + } + let cmdbuf = unsafe { cmdbuf.assume_init() }; + + if cmdbuf.flags + & !(bindings::ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES + | bindings::ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S + | bindings::ASAHI_RENDER_MEMORYLESS_RTS_USED + | bindings::ASAHI_RENDER_PROCESS_EMPTY_TILES + | bindings::ASAHI_RENDER_NO_VERTEX_CLUSTERING) as u64 + != 0 + { + return Err(EINVAL); + } + + if cmdbuf.flags & bindings::ASAHI_RENDER_MEMORYLESS_RTS_USED as u64 != 0 { + // Not supported yet + return Err(EINVAL); + } + + if cmdbuf.fb_width == 0 + || cmdbuf.fb_height == 0 + || cmdbuf.fb_width > 16384 + || cmdbuf.fb_height > 16384 + { + mod_dev_dbg!( + self.dev, + "[Submission {}] Invalid dimensions {}x{}\n", + id, + cmdbuf.fb_width, + cmdbuf.fb_height + ); + return Err(EINVAL); + } + + let dev = self.dev.data(); + let gpu = match dev.gpu.as_any().downcast_ref::<gpu::GpuManager::ver>() { + Some(gpu) => gpu, + None => { + dev_crit!(self.dev, "GpuManager mismatched with Queue!\n"); + return Err(EIO); + } + }; + + let nclusters = gpu.get_dyncfg().id.num_clusters; + + // Can be set to false to disable clustering (for simpler jobs), but then the + // core masks below should be adjusted to cover a single rolling cluster. + let mut clustering = nclusters > 1; + + if debug_enabled(debug::DebugFlags::DisableClustering) + || cmdbuf.flags & bindings::ASAHI_RENDER_NO_VERTEX_CLUSTERING as u64 != 0 + { + clustering = false; + } + + #[ver(G < G14)] + let tiling_control = { + let render_cfg = gpu.get_cfg().render; + let mut tiling_control = render_cfg.tiling_control; + + if !clustering { + tiling_control |= TILECTL_DISABLE_CLUSTERING; + } + tiling_control + }; + + let mut alloc = gpu.alloc(); + let kalloc = &mut *alloc; + + // This sequence number increases per new client/VM? assigned to some slot, + // but it's unclear *which* slot... + let slot_client_seq: u8 = (self.id & 0xff) as u8; + + let tile_info = Self::get_tiling_params(&cmdbuf, if clustering { nclusters } else { 1 })?; + + let buffer = self.buffer.as_ref().ok_or(EINVAL)?.lock(); + + let scene = Arc::try_new(buffer.new_scene(kalloc, &tile_info)?)?; + + let notifier = self.notifier.clone(); + + let tvb_autogrown = buffer.auto_grow()?; + if tvb_autogrown { + let new_size = buffer.block_count() as usize; + cls_dev_dbg!( + TVBStats, + &self.dev, + "[Submission {}] TVB grew to {} bytes ({} blocks) due to overflows\n", + id, + new_size * buffer::BLOCK_SIZE, + new_size, + ); + } + + let tvb_grown = buffer.ensure_blocks(tile_info.min_tvb_blocks)?; + if tvb_grown { + cls_dev_dbg!( + TVBStats, + &self.dev, + "[Submission {}] TVB grew to {} bytes ({} blocks) due to dimensions ({}x{})\n", + id, + tile_info.min_tvb_blocks * buffer::BLOCK_SIZE, + tile_info.min_tvb_blocks, + cmdbuf.fb_width, + cmdbuf.fb_height + ); + } + + let vm_bind = job.vm_bind.clone(); + + mod_dev_dbg!( + self.dev, + "[Submission {}] VM slot = {}\n", + id, + vm_bind.slot() + ); + + let ev_vtx = job.get_vtx()?.event_info(); + let ev_frag = job.get_frag()?.event_info(); + + mod_dev_dbg!( + self.dev, + "[Submission {}] Vert event #{} -> {:#x?}\n", + id, + ev_vtx.slot, + ev_vtx.value.next(), + ); + mod_dev_dbg!( + self.dev, + "[Submission {}] Frag event #{} -> {:#x?}\n", + id, + ev_frag.slot, + ev_frag.value.next(), + ); + + let uuid_3d = cmdbuf.cmd_3d_id; + let uuid_ta = cmdbuf.cmd_ta_id; + + mod_dev_dbg!( + self.dev, + "[Submission {}] Vert UUID = {:#x?}\n", + id, + uuid_ta + ); + mod_dev_dbg!( + self.dev, + "[Submission {}] Frag UUID = {:#x?}\n", + id, + uuid_3d + ); + + let fence = job.fence.clone(); + let frag_job = job.get_frag()?; + + mod_dev_dbg!(self.dev, "[Submission {}] Create Barrier\n", id); + let barrier: GpuObject<fw::workqueue::Barrier> = kalloc.private.new_inplace( + Default::default(), + |_inner, ptr: &mut MaybeUninit<fw::workqueue::raw::Barrier>| { + Ok(place!( + ptr, + fw::workqueue::raw::Barrier { + tag: fw::workqueue::CommandType::Barrier, + wait_stamp: ev_vtx.fw_stamp_pointer, + wait_value: ev_vtx.value.next(), + wait_slot: ev_vtx.slot, + stamp_self: ev_frag.value.next(), + uuid: uuid_3d, + unk: 0, + } + )) + }, + )?; + + mod_dev_dbg!(self.dev, "[Submission {}] Add Barrier\n", id); + frag_job.add(barrier, vm_bind.slot())?; + + let timestamps = Arc::try_new(kalloc.shared.new_default::<fw::job::RenderTimestamps>()?)?; + + let unk1 = debug_enabled(debug::DebugFlags::Debug1); + let unk2 = debug_enabled(debug::DebugFlags::Debug2); + let unk3 = debug_enabled(debug::DebugFlags::Debug3); + + let mut tile_config: u64 = 0; + if !unk1 { + tile_config |= 0x280; + } + if cmdbuf.layers > 1 { + tile_config |= 1; + } + if cmdbuf.flags & bindings::ASAHI_RENDER_PROCESS_EMPTY_TILES as u64 != 0 { + tile_config |= 0x10000; + } + + let mut utile_config = + ((tile_info.utile_width / 16) << 12) | ((tile_info.utile_height / 16) << 14); + utile_config |= match cmdbuf.samples { + 1 => 0, + 2 => 1, + 4 => 2, + _ => return Err(EINVAL), + }; + + let frag_result = result_writer + .map(|writer| { + let mut result = RenderResult { + result: Default::default(), + vtx_complete: false, + frag_complete: false, + vtx_error: None, + frag_error: None, + writer, + }; + + if tvb_autogrown { + result.result.flags |= bindings::DRM_ASAHI_RESULT_RENDER_TVB_GROW_OVF as u64; + } + if tvb_grown { + result.result.flags |= bindings::DRM_ASAHI_RESULT_RENDER_TVB_GROW_MIN as u64; + } + result.result.tvb_size_bytes = buffer.size() as u64; + + Arc::try_new(Mutex::new(result)) + }) + .transpose()?; + + let vtx_result = frag_result.clone(); + + // TODO: check + #[ver(V >= V13_0B4)] + let count_frag = self.counter.fetch_add(2, Ordering::Relaxed); + #[ver(V >= V13_0B4)] + let count_vtx = count_frag + 1; + + mod_dev_dbg!(self.dev, "[Submission {}] Create Frag\n", id); + let frag = GpuObject::new_prealloc( + kalloc.private.alloc_object()?, + |ptr: GpuWeakPointer<fw::fragment::RunFragment::ver>| { + let mut builder = microseq::Builder::new(); + + let stats = inner_weak_ptr!( + gpu.initdata.runtime_pointers.stats.frag.weak_pointer(), + stats + ); + + let start_frag = builder.add(microseq::StartFragment::ver { + header: microseq::op::StartFragment::HEADER, + job_params2: inner_weak_ptr!(ptr, job_params2), + job_params1: inner_weak_ptr!(ptr, job_params1), + scene: scene.gpu_pointer(), + stats, + busy_flag: inner_weak_ptr!(ptr, busy_flag), + tvb_overflow_count: inner_weak_ptr!(ptr, tvb_overflow_count), + unk_pointer: inner_weak_ptr!(ptr, unk_pointee), + work_queue: ev_frag.info_ptr, + work_item: ptr, + vm_slot: vm_bind.slot(), + unk_50: 0x1, // fixed + event_generation: self.id as u32, + buffer_slot: scene.slot(), + unk_5c: 0, + cmd_seq: U64(ev_frag.cmd_seq), + unk_68: 0, + unk_758_flag: inner_weak_ptr!(ptr, unk_758_flag), + unk_job_buf: inner_weak_ptr!(ptr, unk_buf_0), + unk_7c: 0, + unk_80: 0, + unk_84: 0, + uuid: uuid_3d, + attachments: common::build_attachments( + cmdbuf.attachments, + cmdbuf.attachment_count, + )?, + unk_190: 0, + #[ver(V >= V13_0B4)] + counter: U64(count_frag), + #[ver(V >= V13_0B4)] + notifier_buf: inner_weak_ptr!(notifier.weak_pointer(), state.unk_buf), + })?; + + if frag_result.is_some() { + builder.add(microseq::Timestamp::ver { + header: microseq::op::Timestamp::new(true), + cur_ts: inner_weak_ptr!(ptr, cur_ts), + start_ts: inner_weak_ptr!(ptr, start_ts), + update_ts: inner_weak_ptr!(ptr, start_ts), + work_queue: ev_frag.info_ptr, + unk_24: U64(0), + #[ver(V >= V13_0B4)] + unk_ts: inner_weak_ptr!(ptr, unk_ts), + uuid: uuid_3d, + unk_30_padding: 0, + })?; + } + + builder.add(microseq::WaitForIdle { + header: microseq::op::WaitForIdle::new(microseq::Pipe::Fragment), + })?; + + if frag_result.is_some() { + builder.add(microseq::Timestamp::ver { + header: microseq::op::Timestamp::new(false), + cur_ts: inner_weak_ptr!(ptr, cur_ts), + start_ts: inner_weak_ptr!(ptr, start_ts), + update_ts: inner_weak_ptr!(ptr, end_ts), + work_queue: ev_frag.info_ptr, + unk_24: U64(0), + #[ver(V >= V13_0B4)] + unk_ts: inner_weak_ptr!(ptr, unk_ts), + uuid: uuid_3d, + unk_30_padding: 0, + })?; + } + + let off = builder.offset_to(start_frag); + builder.add(microseq::FinalizeFragment::ver { + header: microseq::op::FinalizeFragment::HEADER, + uuid: uuid_3d, + unk_8: 0, + fw_stamp: ev_frag.fw_stamp_pointer, + stamp_value: ev_frag.value.next(), + unk_18: 0, + scene: scene.weak_pointer(), + buffer: scene.weak_buffer_pointer(), + unk_2c: U64(1), + stats, + unk_pointer: inner_weak_ptr!(ptr, unk_pointee), + busy_flag: inner_weak_ptr!(ptr, busy_flag), + work_queue: ev_frag.info_ptr, + work_item: ptr, + vm_slot: vm_bind.slot(), + unk_60: 0, + unk_758_flag: inner_weak_ptr!(ptr, unk_758_flag), + unk_6c: U64(0), + unk_74: U64(0), + unk_7c: U64(0), + unk_84: U64(0), + unk_8c: U64(0), + #[ver(G == G14 && V < V13_0B4)] + unk_8c_g14: U64(0), + restart_branch_offset: off, + unk_98: unk3.into(), + #[ver(V >= V13_0B4)] + unk_9c: Default::default(), + })?; + + builder.add(microseq::RetireStamp { + header: microseq::op::RetireStamp::HEADER, + })?; + + Ok(box_in_place!(fw::fragment::RunFragment::ver { + notifier: notifier.clone(), + scene: scene.clone(), + micro_seq: builder.build(&mut kalloc.private)?, + vm_bind: vm_bind.clone(), + aux_fb: self.ualloc.lock().array_empty(0x8000)?, + timestamps: timestamps.clone(), + })?) + }, + |inner, ptr| { + let aux_fb_info = fw::fragment::raw::AuxFBInfo::ver { + iogpu_unk_214: cmdbuf.iogpu_unk_214, + unk2: 0, + width: cmdbuf.fb_width, + height: cmdbuf.fb_height, + #[ver(V >= V13_0B4)] + unk3: U64(0x100000), + }; + + Ok(place!( + ptr, + fw::fragment::raw::RunFragment::ver { + tag: fw::workqueue::CommandType::RunFragment, + #[ver(V >= V13_0B4)] + counter: U64(count_frag), + vm_slot: vm_bind.slot(), + unk_8: 0, + microsequence: inner.micro_seq.gpu_pointer(), + microsequence_size: inner.micro_seq.len() as u32, + notifier: inner.notifier.gpu_pointer(), + buffer: inner.scene.buffer_pointer(), + scene: inner.scene.gpu_pointer(), + unk_buffer_buf: inner.scene.kernel_buffer_pointer(), + tvb_tilemap: inner.scene.tvb_tilemap_pointer(), + ppp_multisamplectl: U64(cmdbuf.ppp_multisamplectl), + samples: cmdbuf.samples, + tiles_per_mtile_y: tile_info.tiles_per_mtile_y as u16, + tiles_per_mtile_x: tile_info.tiles_per_mtile_x as u16, + unk_50: U64(0), + unk_58: U64(0), + merge_upper_x: F32::from_bits(cmdbuf.merge_upper_x), + merge_upper_y: F32::from_bits(cmdbuf.merge_upper_y), + unk_68: U64(0), + tile_count: U64(tile_info.tiles as u64), + job_params1: fw::fragment::raw::JobParameters1::ver { + utile_config: utile_config, + unk_4: 0, + clear_pipeline: fw::fragment::raw::ClearPipelineBinding { + pipeline_bind: U64(cmdbuf.load_pipeline_bind as u64), + address: U64(cmdbuf.load_pipeline as u64), + }, + ppp_multisamplectl: U64(cmdbuf.ppp_multisamplectl), + scissor_array: U64(cmdbuf.scissor_array), + depth_bias_array: U64(cmdbuf.depth_bias_array), + aux_fb_info: aux_fb_info, + depth_dimensions: U64(cmdbuf.depth_dimensions as u64), + visibility_result_buffer: U64(cmdbuf.visibility_result_buffer), + zls_ctrl: U64(cmdbuf.zls_ctrl), + #[ver(G >= G14)] + unk_58_g14_0: U64(0x4040404), + #[ver(G >= G14)] + unk_58_g14_8: U64(0), + depth_buffer_ptr1: U64(cmdbuf.depth_buffer_1), + depth_buffer_ptr2: U64(cmdbuf.depth_buffer_2), + stencil_buffer_ptr1: U64(cmdbuf.stencil_buffer_1), + stencil_buffer_ptr2: U64(cmdbuf.stencil_buffer_2), + #[ver(G >= G14)] + unk_68_g14_0: Default::default(), + unk_78: Default::default(), + depth_meta_buffer_ptr1: U64(cmdbuf.depth_meta_buffer_1), + unk_a0: Default::default(), + depth_meta_buffer_ptr2: U64(cmdbuf.depth_meta_buffer_2), + unk_b0: Default::default(), + stencil_meta_buffer_ptr1: U64(cmdbuf.stencil_meta_buffer_1), + unk_c0: Default::default(), + stencil_meta_buffer_ptr2: U64(cmdbuf.stencil_meta_buffer_2), + unk_d0: Default::default(), + tvb_tilemap: inner.scene.tvb_tilemap_pointer(), + tvb_heapmeta: inner.scene.tvb_heapmeta_pointer(), + mtile_stride_dwords: U64((4 * tile_info.params.rgn_size as u64) << 24), + tvb_heapmeta_2: inner.scene.tvb_heapmeta_pointer(), + tile_config: U64(tile_config), + aux_fb: inner.aux_fb.gpu_pointer(), + unk_108: Default::default(), + pipeline_base: U64(0x11_00000000), + unk_140: U64(0x8c60), + unk_148: U64(0x0), + unk_150: U64(0x0), + unk_158: U64(0x1c), + unk_160: U64(0), + unk_168_padding: Default::default(), + #[ver(V < V13_0B4)] + __pad0: Default::default(), + }, + job_params2: fw::fragment::raw::JobParameters2 { + store_pipeline_bind: cmdbuf.store_pipeline_bind, + store_pipeline_addr: cmdbuf.store_pipeline, + unk_8: 0x0, + unk_c: 0x0, + merge_upper_x: F32::from_bits(cmdbuf.merge_upper_x), + merge_upper_y: F32::from_bits(cmdbuf.merge_upper_y), + unk_18: U64(0x0), + utiles_per_mtile_y: tile_info.utiles_per_mtile_y as u16, + utiles_per_mtile_x: tile_info.utiles_per_mtile_x as u16, + unk_24: 0x0, + tile_counts: ((tile_info.tiles_y - 1) << 12) | (tile_info.tiles_x - 1), + iogpu_unk_212: cmdbuf.iogpu_unk_212, + isp_bgobjdepth: cmdbuf.isp_bgobjdepth, + // TODO: does this flag need to be exposed to userspace? + isp_bgobjvals: cmdbuf.isp_bgobjvals | 0x400, + unk_38: 0x0, + unk_3c: 0x1, + unk_40: 0, + }, + job_params3: fw::fragment::raw::JobParameters3::ver { + unk_44_padding: Default::default(), + depth_bias_array: fw::fragment::raw::ArrayAddr { + ptr: U64(cmdbuf.depth_bias_array), + unk_padding: U64(0), + }, + scissor_array: fw::fragment::raw::ArrayAddr { + ptr: U64(cmdbuf.scissor_array), + unk_padding: U64(0), + }, + visibility_result_buffer: U64(cmdbuf.visibility_result_buffer), + unk_118: U64(0x0), + unk_120: Default::default(), + unk_reload_pipeline: fw::fragment::raw::ClearPipelineBinding { + pipeline_bind: U64(cmdbuf.partial_reload_pipeline_bind as u64), + address: U64(cmdbuf.partial_reload_pipeline as u64), + }, + unk_258: U64(0), + unk_260: U64(0), + unk_268: U64(0), + unk_270: U64(0), + reload_pipeline: fw::fragment::raw::ClearPipelineBinding { + pipeline_bind: U64(cmdbuf.partial_reload_pipeline_bind as u64), + address: U64(cmdbuf.partial_reload_pipeline as u64), + }, + zls_ctrl: U64(cmdbuf.zls_ctrl), + unk_290: U64(0x0), + depth_buffer_ptr1: U64(cmdbuf.depth_buffer_1), + unk_2a0: U64(0x0), + unk_2a8: U64(0x0), + depth_buffer_ptr2: U64(cmdbuf.depth_buffer_2), + depth_buffer_ptr3: U64(cmdbuf.depth_buffer_3), + depth_meta_buffer_ptr3: U64(cmdbuf.depth_meta_buffer_3), + stencil_buffer_ptr1: U64(cmdbuf.stencil_buffer_1), + unk_2d0: U64(0x0), + unk_2d8: U64(0x0), + stencil_buffer_ptr2: U64(cmdbuf.stencil_buffer_2), + stencil_buffer_ptr3: U64(cmdbuf.stencil_buffer_3), + stencil_meta_buffer_ptr3: U64(cmdbuf.stencil_meta_buffer_3), + unk_2f8: Default::default(), + iogpu_unk_212: cmdbuf.iogpu_unk_212, + unk_30c: 0x0, + aux_fb_info: aux_fb_info, + unk_320_padding: Default::default(), + unk_partial_store_pipeline: + fw::fragment::raw::StorePipelineBinding::new( + cmdbuf.partial_store_pipeline_bind, + cmdbuf.partial_store_pipeline + ), + partial_store_pipeline: fw::fragment::raw::StorePipelineBinding::new( + cmdbuf.partial_store_pipeline_bind, + cmdbuf.partial_store_pipeline + ), + isp_bgobjdepth: cmdbuf.isp_bgobjdepth, + isp_bgobjvals: cmdbuf.isp_bgobjvals, + iogpu_unk_49: cmdbuf.iogpu_unk_49, + unk_37c: 0x0, + unk_380: U64(0x0), + unk_388: U64(0x0), + #[ver(V >= V13_0B4)] + unk_390_0: U64(0x0), + depth_dimensions: U64(cmdbuf.depth_dimensions as u64), + }, + unk_758_flag: 0, + unk_75c_flag: 0, + unk_buf: Default::default(), + busy_flag: 0, + tvb_overflow_count: 0, + unk_878: 0, + encoder_params: fw::job::raw::EncoderParams { + unk_8: (cmdbuf.flags + & bindings::ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S as u64 + != 0) as u32, + unk_c: 0x0, // fixed + unk_10: 0x0, // fixed + encoder_id: cmdbuf.encoder_id, + unk_18: 0x0, // fixed + iogpu_compute_unk44: 0xffffffff, + seq_buffer: inner.scene.seq_buf_pointer(), + unk_28: U64(0x0), // fixed + }, + process_empty_tiles: (cmdbuf.flags + & bindings::ASAHI_RENDER_PROCESS_EMPTY_TILES as u64 + != 0) as u32, + no_clear_pipeline_textures: (cmdbuf.flags + & bindings::ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES as u64 + != 0) as u32, + unk_param: unk2.into(), // 1 for boot stuff? + unk_pointee: 0, + meta: fw::job::raw::JobMeta { + unk_4: 0, + stamp: ev_frag.stamp_pointer, + fw_stamp: ev_frag.fw_stamp_pointer, + stamp_value: ev_frag.value.next(), + stamp_slot: ev_frag.slot, + evctl_index: 0, // fixed + flush_stamps: flush_stamps as u32, + uuid: uuid_3d, + cmd_seq: ev_frag.cmd_seq as u32, + }, + unk_after_meta: unk1.into(), + unk_buf_0: U64(0), + unk_buf_8: U64(0), + unk_buf_10: U64(1), + cur_ts: U64(0), + start_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), frag.start)), + end_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), frag.end)), + unk_914: 0, + unk_918: U64(0), + unk_920: 0, + client_sequence: slot_client_seq, + pad_925: Default::default(), + unk_928: 0, + unk_92c: 0, + #[ver(V >= V13_0B4)] + unk_ts: U64(0), + #[ver(V >= V13_0B4)] + unk_92d_8: Default::default(), + } + )) + }, + )?; + + mod_dev_dbg!(self.dev, "[Submission {}] Add Frag\n", id); + fence.add_command(); + + frag_job.add_cb(frag, vm_bind.slot(), move |cmd, error| { + if let Some(err) = error { + fence.set_error(err.into()); + } + if let Some(mut res) = frag_result.as_ref().map(|a| a.lock()) { + cmd.timestamps.with(|raw, _inner| { + res.result.fragment_ts_start = raw.frag.start.load(Ordering::Relaxed); + res.result.fragment_ts_end = raw.frag.end.load(Ordering::Relaxed); + }); + cmd.with(|raw, _inner| { + res.result.num_tvb_overflows = raw.tvb_overflow_count; + }); + res.frag_error = error; + res.frag_complete = true; + res.commit(); + } + fence.command_complete(); + })?; + + let fence = job.fence.clone(); + let vtx_job = job.get_vtx()?; + + if scene.rebind() || tvb_grown || tvb_autogrown { + mod_dev_dbg!(self.dev, "[Submission {}] Create Bind Buffer\n", id); + let bind_buffer = kalloc.private.new_inplace( + fw::buffer::InitBuffer::ver { + scene: scene.clone(), + }, + |inner, ptr: &mut MaybeUninit<fw::buffer::raw::InitBuffer::ver<'_>>| { + Ok(place!( + ptr, + fw::buffer::raw::InitBuffer::ver { + tag: fw::workqueue::CommandType::InitBuffer, + vm_slot: vm_bind.slot(), + buffer_slot: inner.scene.slot(), + unk_c: 0, + block_count: buffer.block_count(), + buffer: inner.scene.buffer_pointer(), + stamp_value: ev_vtx.value.next(), + } + )) + }, + )?; + + mod_dev_dbg!(self.dev, "[Submission {}] Add Bind Buffer\n", id); + vtx_job.add(bind_buffer, vm_bind.slot())?; + } + + mod_dev_dbg!(self.dev, "[Submission {}] Create Vertex\n", id); + let vtx = GpuObject::new_prealloc( + kalloc.private.alloc_object()?, + |ptr: GpuWeakPointer<fw::vertex::RunVertex::ver>| { + let mut builder = microseq::Builder::new(); + + let stats = inner_weak_ptr!( + gpu.initdata.runtime_pointers.stats.vtx.weak_pointer(), + stats + ); + + let start_vtx = builder.add(microseq::StartVertex::ver { + header: microseq::op::StartVertex::HEADER, + tiling_params: inner_weak_ptr!(ptr, tiling_params), + job_params1: inner_weak_ptr!(ptr, job_params1), + buffer: scene.weak_buffer_pointer(), + scene: scene.weak_pointer(), + stats, + work_queue: ev_vtx.info_ptr, + vm_slot: vm_bind.slot(), + unk_38: 1, // fixed + event_generation: self.id as u32, + buffer_slot: scene.slot(), + unk_44: 0, + cmd_seq: U64(ev_vtx.cmd_seq), + unk_50: 0, + unk_pointer: inner_weak_ptr!(ptr, unk_pointee), + unk_job_buf: inner_weak_ptr!(ptr, unk_buf_0), + unk_64: 0x0, // fixed + unk_68: unk1.into(), + uuid: uuid_ta, + unk_70: 0x0, // fixed + unk_74: Default::default(), // fixed + unk_15c: 0x0, // fixed + unk_160: U64(0x0), // fixed + unk_168: 0x0, // fixed + unk_16c: 0x0, // fixed + unk_170: U64(0x0), // fixed + #[ver(V >= V13_0B4)] + counter: U64(count_vtx), + #[ver(V >= V13_0B4)] + notifier_buf: inner_weak_ptr!(notifier.weak_pointer(), state.unk_buf), + unk_178: 0x0, // padding? + })?; + + if vtx_result.is_some() { + builder.add(microseq::Timestamp::ver { + header: microseq::op::Timestamp::new(true), + cur_ts: inner_weak_ptr!(ptr, cur_ts), + start_ts: inner_weak_ptr!(ptr, start_ts), + update_ts: inner_weak_ptr!(ptr, start_ts), + work_queue: ev_vtx.info_ptr, + unk_24: U64(0), + #[ver(V >= V13_0B4)] + unk_ts: inner_weak_ptr!(ptr, unk_ts), + uuid: uuid_ta, + unk_30_padding: 0, + })?; + } + + builder.add(microseq::WaitForIdle { + header: microseq::op::WaitForIdle::new(microseq::Pipe::Vertex), + })?; + + if vtx_result.is_some() { + builder.add(microseq::Timestamp::ver { + header: microseq::op::Timestamp::new(false), + cur_ts: inner_weak_ptr!(ptr, cur_ts), + start_ts: inner_weak_ptr!(ptr, start_ts), + update_ts: inner_weak_ptr!(ptr, end_ts), + work_queue: ev_vtx.info_ptr, + unk_24: U64(0), + #[ver(V >= V13_0B4)] + unk_ts: inner_weak_ptr!(ptr, unk_ts), + uuid: uuid_ta, + unk_30_padding: 0, + })?; + } + + let off = builder.offset_to(start_vtx); + builder.add(microseq::FinalizeVertex::ver { + header: microseq::op::FinalizeVertex::HEADER, + scene: scene.weak_pointer(), + buffer: scene.weak_buffer_pointer(), + stats, + work_queue: ev_vtx.info_ptr, + vm_slot: vm_bind.slot(), + unk_28: 0x0, // fixed + unk_pointer: inner_weak_ptr!(ptr, unk_pointee), + unk_34: 0x0, // fixed + uuid: uuid_ta, + fw_stamp: ev_vtx.fw_stamp_pointer, + stamp_value: ev_vtx.value.next(), + unk_48: U64(0x0), // fixed + unk_50: 0x0, // fixed + unk_54: 0x0, // fixed + unk_58: U64(0x0), // fixed + unk_60: 0x0, // fixed + unk_64: 0x0, // fixed + unk_68: 0x0, // fixed + #[ver(G >= G14 && V < V13_0B4)] + unk_68_g14: U64(0), + restart_branch_offset: off, + unk_70: 0x0, // fixed + #[ver(V >= V13_0B4)] + unk_74: Default::default(), // Ventura + })?; + + builder.add(microseq::RetireStamp { + header: microseq::op::RetireStamp::HEADER, + })?; + + Ok(box_in_place!(fw::vertex::RunVertex::ver { + notifier: notifier, + scene: scene.clone(), + micro_seq: builder.build(&mut kalloc.private)?, + vm_bind: vm_bind.clone(), + timestamps: timestamps, + })?) + }, + |inner, ptr| { + #[ver(G < G14)] + let core_masks = gpu.core_masks_packed(); + Ok(place!( + ptr, + fw::vertex::raw::RunVertex::ver { + tag: fw::workqueue::CommandType::RunVertex, + #[ver(V >= V13_0B4)] + counter: U64(count_vtx), + vm_slot: vm_bind.slot(), + unk_8: 0, + notifier: inner.notifier.gpu_pointer(), + buffer_slot: inner.scene.slot(), + unk_1c: 0, + buffer: inner.scene.buffer_pointer(), + scene: inner.scene.gpu_pointer(), + unk_buffer_buf: inner.scene.kernel_buffer_pointer(), + unk_34: 0, + job_params1: fw::vertex::raw::JobParameters1::ver { + unk_0: U64(if unk1 { 0 } else { 0x200 }), // sometimes 0 + unk_8: f32!(1e-20), // fixed + unk_c: f32!(1e-20), // fixed + tvb_tilemap: inner.scene.tvb_tilemap_pointer(), + #[ver(G < G14)] + tvb_cluster_tilemaps: inner.scene.cluster_tilemaps_pointer(), + tpc: inner.scene.tpc_pointer(), + tvb_heapmeta: inner + .scene + .tvb_heapmeta_pointer() + .or(0x8000_0000_0000_0000), + iogpu_unk_54: 0x6b0003, // fixed + iogpu_unk_55: 0x3a0012, // fixed + iogpu_unk_56: U64(0x1), // fixed + #[ver(G < G14)] + tvb_cluster_meta1: inner + .scene + .meta_1_pointer() + .map(|x| x.or((tile_info.meta1_blocks as u64) << 50)), + utile_config: utile_config, + unk_4c: 0, + ppp_multisamplectl: U64(cmdbuf.ppp_multisamplectl), // fixed + tvb_heapmeta_2: inner.scene.tvb_heapmeta_pointer(), + #[ver(G < G14)] + unk_60: U64(0x0), // fixed + #[ver(G < G14)] + core_mask: Array::new([ + *core_masks.first().unwrap_or(&0), + *core_masks.get(1).unwrap_or(&0), + ]), + preempt_buf1: inner.scene.preempt_buf_1_pointer(), + preempt_buf2: inner.scene.preempt_buf_2_pointer(), + unk_80: U64(0x1), // fixed + preempt_buf3: inner + .scene + .preempt_buf_3_pointer() + .or(0x4_0000_0000_0000), // check + encoder_addr: U64(cmdbuf.encoder_ptr), + #[ver(G < G14)] + tvb_cluster_meta2: inner.scene.meta_2_pointer(), + #[ver(G < G14)] + tvb_cluster_meta3: inner.scene.meta_3_pointer(), + #[ver(G < G14)] + tiling_control: tiling_control, + #[ver(G < G14)] + unk_ac: Default::default(), // fixed + unk_b0: Default::default(), // fixed + pipeline_base: U64(0x11_00000000), + #[ver(G < G14)] + tvb_cluster_meta4: inner + .scene + .meta_4_pointer() + .map(|x| x.or(0x3000_0000_0000_0000)), + #[ver(G < G14)] + unk_f0: U64(0x1c + align(tile_info.meta1_blocks, 4) as u64), + unk_f8: U64(0x8c60), // fixed + unk_100: Default::default(), // fixed + unk_118: 0x1c, // fixed + #[ver(G >= G14)] + __pad: Default::default(), + }, + unk_154: Default::default(), + tiling_params: tile_info.params, + unk_3e8: Default::default(), + tpc: inner.scene.tpc_pointer(), + tpc_size: U64(tile_info.tpc_size as u64), + microsequence: inner.micro_seq.gpu_pointer(), + microsequence_size: inner.micro_seq.len() as u32, + fragment_stamp_slot: ev_frag.slot, + fragment_stamp_value: ev_frag.value.next(), + unk_pointee: 0, + unk_pad: 0, + job_params2: fw::vertex::raw::JobParameters2 { + unk_480: Default::default(), // fixed + unk_498: U64(0x0), // fixed + unk_4a0: 0x0, // fixed + preempt_buf1: inner.scene.preempt_buf_1_pointer(), + unk_4ac: 0x0, // fixed + unk_4b0: U64(0x0), // fixed + unk_4b8: 0x0, // fixed + unk_4bc: U64(0x0), // fixed + unk_4c4_padding: Default::default(), + unk_50c: 0x0, // fixed + unk_510: U64(0x0), // fixed + unk_518: U64(0x0), // fixed + unk_520: U64(0x0), // fixed + }, + encoder_params: fw::job::raw::EncoderParams { + unk_8: 0x0, // fixed + unk_c: 0x0, // fixed + unk_10: 0x0, // fixed + encoder_id: cmdbuf.encoder_id, + unk_18: 0x0, // fixed + iogpu_compute_unk44: 0xffffffff, + seq_buffer: inner.scene.seq_buf_pointer(), + unk_28: U64(0x0), // fixed + }, + unk_55c: 0, + unk_560: 0, + memoryless_rts_used: (cmdbuf.flags + & bindings::ASAHI_RENDER_MEMORYLESS_RTS_USED as u64 + != 0) as u32, + unk_568: 0, + unk_56c: 0, + meta: fw::job::raw::JobMeta { + unk_4: 0, + stamp: ev_vtx.stamp_pointer, + fw_stamp: ev_vtx.fw_stamp_pointer, + stamp_value: ev_vtx.value.next(), + stamp_slot: ev_vtx.slot, + evctl_index: 0, // fixed + flush_stamps: flush_stamps as u32, + uuid: uuid_ta, + cmd_seq: ev_vtx.cmd_seq as u32, + }, + unk_after_meta: unk1.into(), + unk_buf_0: U64(0), + unk_buf_8: U64(0), + unk_buf_10: U64(0), + cur_ts: U64(0), + start_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), vtx.start)), + end_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), vtx.end)), + unk_5c4: 0, + unk_5c8: 0, + unk_5cc: 0, + unk_5d0: 0, + client_sequence: slot_client_seq, + pad_5d5: Default::default(), + unk_5d8: 0, + unk_5dc: 0, + #[ver(V >= V13_0B4)] + unk_ts: U64(0), + #[ver(V >= V13_0B4)] + unk_5dd_8: Default::default(), + } + )) + }, + )?; + + core::mem::drop(alloc); + + mod_dev_dbg!(self.dev, "[Submission {}] Add Vertex\n", id); + fence.add_command(); + vtx_job.add_cb(vtx, vm_bind.slot(), move |cmd, error| { + if let Some(err) = error { + fence.set_error(err.into()) + } + if let Some(mut res) = vtx_result.as_ref().map(|a| a.lock()) { + cmd.timestamps.with(|raw, _inner| { + res.result.vertex_ts_start = raw.vtx.start.load(Ordering::Relaxed); + res.result.vertex_ts_end = raw.vtx.end.load(Ordering::Relaxed); + }); + res.result.tvb_usage_bytes = cmd.scene.used_bytes() as u64; + if cmd.scene.overflowed() { + res.result.flags |= bindings::DRM_ASAHI_RESULT_RENDER_TVB_OVERFLOWED as u64; + } + res.vtx_error = error; + res.vtx_complete = true; + res.commit(); + } + fence.command_complete(); + })?; + + mod_dev_dbg!(self.dev, "[Submission {}] Increment counters\n", id); + self.notifier.threshold.with(|raw, _inner| { + raw.increment(); + raw.increment(); + }); + + // TODO: handle rollbacks, move to job submit? + buffer.increment(); + + job.get_vtx()?.next_seq(); + job.get_frag()?.next_seq(); + + Ok(()) + } +} diff --git a/drivers/gpu/drm/asahi/regs.rs b/drivers/gpu/drm/asahi/regs.rs new file mode 100644 index 000000000000..019d7214793d --- /dev/null +++ b/drivers/gpu/drm/asahi/regs.rs @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU MMIO register abstraction +//! +//! Since the vast majority of the interactions with the GPU are brokered through the firmware, +//! there is very little need to interact directly with GPU MMIO register. This module abstracts +//! the few operations that require that, mainly reading the MMU fault status, reading GPU ID +//! information, and starting the GPU firmware coprocessor. + +use crate::hw; +use kernel::{device, io_mem::IoMem, platform, prelude::*}; + +/// Size of the ASC control MMIO region. +pub(crate) const ASC_CTL_SIZE: usize = 0x4000; + +/// Size of the SGX MMIO region. +pub(crate) const SGX_SIZE: usize = 0x1000000; + +const CPU_CONTROL: usize = 0x44; +const CPU_RUN: u32 = 0x1 << 4; // BIT(4) + +const FAULT_INFO: usize = 0x17030; + +const ID_VERSION: usize = 0xd04000; +const ID_UNK08: usize = 0xd04008; +const ID_COUNTS_1: usize = 0xd04010; +const ID_COUNTS_2: usize = 0xd04014; +const ID_UNK18: usize = 0xd04018; +const ID_CLUSTERS: usize = 0xd0401c; + +const CORE_MASK_0: usize = 0xd01500; +const CORE_MASK_1: usize = 0xd01514; + +/// Enum representing the unit that caused an MMU fault. +#[allow(non_camel_case_types)] +#[allow(clippy::upper_case_acronyms)] +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub(crate) enum FaultUnit { + /// Decompress / pixel fetch + DCMP(u8), + /// USC L1 Cache (device loads/stores) + UL1C(u8), + /// Compress / pixel store + CMP(u8), + GSL1(u8), + IAP(u8), + VCE(u8), + /// Tiling Engine + TE(u8), + RAS(u8), + /// Vertex Data Master + VDM(u8), + PPP(u8), + /// ISP Parameter Fetch + IPF(u8), + IPF_CPF(u8), + VF(u8), + VF_CPF(u8), + /// Depth/Stencil load/store + ZLS(u8), + + /// Parameter Management + dPM, + /// Compute Data Master + dCDM_KS(u8), + dIPP, + dIPP_CS, + // Vertex Data Master + dVDM_CSD, + dVDM_SSD, + dVDM_ILF, + dVDM_ILD, + dRDE(u8), + FC, + GSL2, + + /// Graphics L2 Cache Control? + GL2CC_META(u8), + GL2CC_MB, + + /// Parameter Management + gPM_SP(u8), + /// Vertex Data Master - CSD + gVDM_CSD_SP(u8), + gVDM_SSD_SP(u8), + gVDM_ILF_SP(u8), + gVDM_TFP_SP(u8), + gVDM_MMB_SP(u8), + /// Compute Data Master + gCDM_CS_KS0_SP(u8), + gCDM_CS_KS1_SP(u8), + gCDM_CS_KS2_SP(u8), + gCDM_KS0_SP(u8), + gCDM_KS1_SP(u8), + gCDM_KS2_SP(u8), + gIPP_SP(u8), + gIPP_CS_SP(u8), + gRDE0_SP(u8), + gRDE1_SP(u8), + + Unknown(u8), +} + +/// Reason for an MMU fault. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub(crate) enum FaultReason { + Unmapped, + AfFault, + WriteOnly, + ReadOnly, + NoAccess, + Unknown(u8), +} + +/// Collection of information about an MMU fault. +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub(crate) struct FaultInfo { + pub(crate) address: u64, + pub(crate) sideband: u8, + pub(crate) vm_slot: u32, + pub(crate) unit_code: u8, + pub(crate) unit: FaultUnit, + pub(crate) level: u8, + pub(crate) unk_5: u8, + pub(crate) read: bool, + pub(crate) reason: FaultReason, +} + +/// Device resources for this GPU instance. +pub(crate) struct Resources { + dev: device::Device, + asc: IoMem<ASC_CTL_SIZE>, + sgx: IoMem<SGX_SIZE>, +} + +impl Resources { + /// Map the required resources given our platform device. + pub(crate) fn new(pdev: &mut platform::Device) -> Result<Resources> { + // TODO: add device abstraction to ioremap by name + let asc_res = unsafe { pdev.ioremap_resource(0)? }; + let sgx_res = unsafe { pdev.ioremap_resource(1)? }; + + Ok(Resources { + // SAFETY: This device does DMA via the UAT IOMMU. + dev: device::Device::from_dev(pdev), + asc: asc_res, + sgx: sgx_res, + }) + } + + fn sgx_read32(&self, off: usize) -> u32 { + self.sgx.readl_relaxed(off) + } + + /* Not yet used + fn sgx_write32(&self, off: usize, val: u32) { + self.sgx.writel_relaxed(val, off) + } + */ + + fn sgx_read64(&self, off: usize) -> u64 { + self.sgx.readq_relaxed(off) + } + + /* Not yet used + fn sgx_write64(&self, off: usize, val: u64) { + self.sgx.writeq_relaxed(val, off) + } + */ + + /// Initialize the MMIO registers for the GPU. + pub(crate) fn init_mmio(&self) -> Result { + // Nothing to do for now... + + Ok(()) + } + + /// Start the ASC coprocessor CPU. + pub(crate) fn start_cpu(&self) -> Result { + let val = self.asc.readl_relaxed(CPU_CONTROL); + + self.asc.writel_relaxed(val | CPU_RUN, CPU_CONTROL); + + Ok(()) + } + + /// Get the GPU identification info from registers. + /// + /// See [`hw::GpuIdConfig`] for the result. + pub(crate) fn get_gpu_id(&self) -> Result<hw::GpuIdConfig> { + let id_version = self.sgx_read32(ID_VERSION); + let id_unk08 = self.sgx_read32(ID_UNK08); + let id_counts_1 = self.sgx_read32(ID_COUNTS_1); + let id_counts_2 = self.sgx_read32(ID_COUNTS_2); + let id_unk18 = self.sgx_read32(ID_UNK18); + let id_clusters = self.sgx_read32(ID_CLUSTERS); + + dev_info!( + self.dev, + "GPU ID registers: {:#x} {:#x} {:#x} {:#x} {:#x} {:#x}\n", + id_version, + id_unk08, + id_counts_1, + id_counts_2, + id_unk18, + id_clusters + ); + + let core_mask_0 = self.sgx_read32(CORE_MASK_0); + let core_mask_1 = self.sgx_read32(CORE_MASK_1); + let mut core_mask = (core_mask_0 as u64) | ((core_mask_1 as u64) << 32); + + dev_info!(self.dev, "Core mask: {:#x}\n", core_mask); + + let num_clusters = (id_clusters >> 12) & 0xff; + let num_cores = id_counts_1 & 0xff; + + if num_cores * num_clusters > 64 { + dev_err!( + self.dev, + "Too many total cores ({} x {} > 64)\n", + num_clusters, + num_cores + ); + return Err(ENODEV); + } + + let mut core_masks = Vec::new(); + let mut total_active_cores: u32 = 0; + + let max_core_mask = (1u64 << num_cores) - 1; + for _i in 0..num_clusters { + let mask = core_mask & max_core_mask; + core_masks.try_push(mask as u32)?; + core_mask >>= num_cores; + total_active_cores += mask.count_ones(); + } + let mut core_masks_packed = Vec::new(); + core_masks_packed.try_push(core_mask_0)?; + if core_mask_1 != 0 { + core_masks_packed.try_push(core_mask_1)?; + } + + if core_mask != 0 { + dev_err!(self.dev, "Leftover core mask: {:#x}\n", core_mask); + return Err(EIO); + } + + let (gpu_rev, gpu_rev_id) = match (id_version >> 8) & 0xff { + 0x00 => (hw::GpuRevision::A0, hw::GpuRevisionID::A0), + 0x01 => (hw::GpuRevision::A1, hw::GpuRevisionID::A1), + 0x10 => (hw::GpuRevision::B0, hw::GpuRevisionID::B0), + 0x11 => (hw::GpuRevision::B1, hw::GpuRevisionID::B1), + 0x20 => (hw::GpuRevision::C0, hw::GpuRevisionID::C0), + 0x21 => (hw::GpuRevision::C1, hw::GpuRevisionID::C1), + a => { + dev_err!(self.dev, "Unknown GPU revision {}\n", a); + return Err(ENODEV); + } + }; + + Ok(hw::GpuIdConfig { + gpu_gen: match (id_version >> 24) & 0xff { + 4 => hw::GpuGen::G13, + 5 => hw::GpuGen::G14, + a => { + dev_err!(self.dev, "Unknown GPU generation {}\n", a); + return Err(ENODEV); + } + }, + gpu_variant: match (id_version >> 16) & 0xff { + 1 => hw::GpuVariant::P, // Guess + 2 => hw::GpuVariant::G, + 3 => hw::GpuVariant::S, + 4 => { + if num_clusters > 4 { + hw::GpuVariant::D + } else { + hw::GpuVariant::C + } + } + a => { + dev_err!(self.dev, "Unknown GPU variant {}\n", a); + return Err(ENODEV); + } + }, + gpu_rev, + gpu_rev_id, + max_dies: (id_clusters >> 20) & 0xf, + num_clusters, + num_cores, + num_frags: (id_counts_1 >> 8) & 0xff, + num_gps: (id_counts_2 >> 16) & 0xff, + total_active_cores, + core_masks, + core_masks_packed, + }) + } + + /// Get the fault information from the MMU status register, if one occurred. + pub(crate) fn get_fault_info(&self) -> Option<FaultInfo> { + let fault_info = self.sgx_read64(FAULT_INFO); + + if fault_info & 1 == 0 { + return None; + } + + let unit_code = ((fault_info >> 9) & 0xff) as u8; + let unit = match unit_code { + 0x00..=0x9f => match unit_code & 0xf { + 0x0 => FaultUnit::DCMP(unit_code >> 4), + 0x1 => FaultUnit::UL1C(unit_code >> 4), + 0x2 => FaultUnit::CMP(unit_code >> 4), + 0x3 => FaultUnit::GSL1(unit_code >> 4), + 0x4 => FaultUnit::IAP(unit_code >> 4), + 0x5 => FaultUnit::VCE(unit_code >> 4), + 0x6 => FaultUnit::TE(unit_code >> 4), + 0x7 => FaultUnit::RAS(unit_code >> 4), + 0x8 => FaultUnit::VDM(unit_code >> 4), + 0x9 => FaultUnit::PPP(unit_code >> 4), + 0xa => FaultUnit::IPF(unit_code >> 4), + 0xb => FaultUnit::IPF_CPF(unit_code >> 4), + 0xc => FaultUnit::VF(unit_code >> 4), + 0xd => FaultUnit::VF_CPF(unit_code >> 4), + 0xe => FaultUnit::ZLS(unit_code >> 4), + _ => FaultUnit::Unknown(unit_code), + }, + 0xa1 => FaultUnit::dPM, + 0xa2 => FaultUnit::dCDM_KS(0), + 0xa3 => FaultUnit::dCDM_KS(1), + 0xa4 => FaultUnit::dCDM_KS(2), + 0xa5 => FaultUnit::dIPP, + 0xa6 => FaultUnit::dIPP_CS, + 0xa7 => FaultUnit::dVDM_CSD, + 0xa8 => FaultUnit::dVDM_SSD, + 0xa9 => FaultUnit::dVDM_ILF, + 0xaa => FaultUnit::dVDM_ILD, + 0xab => FaultUnit::dRDE(0), + 0xac => FaultUnit::dRDE(1), + 0xad => FaultUnit::FC, + 0xae => FaultUnit::GSL2, + 0xb0..=0xb7 => FaultUnit::GL2CC_META(unit_code & 0xf), + 0xb8 => FaultUnit::GL2CC_MB, + 0xe0..=0xff => match unit_code & 0xf { + 0x0 => FaultUnit::gPM_SP((unit_code >> 4) & 1), + 0x1 => FaultUnit::gVDM_CSD_SP((unit_code >> 4) & 1), + 0x2 => FaultUnit::gVDM_SSD_SP((unit_code >> 4) & 1), + 0x3 => FaultUnit::gVDM_ILF_SP((unit_code >> 4) & 1), + 0x4 => FaultUnit::gVDM_TFP_SP((unit_code >> 4) & 1), + 0x5 => FaultUnit::gVDM_MMB_SP((unit_code >> 4) & 1), + 0x6 => FaultUnit::gCDM_CS_KS0_SP((unit_code >> 4) & 1), + 0x7 => FaultUnit::gCDM_CS_KS1_SP((unit_code >> 4) & 1), + 0x8 => FaultUnit::gCDM_CS_KS2_SP((unit_code >> 4) & 1), + 0x9 => FaultUnit::gCDM_KS0_SP((unit_code >> 4) & 1), + 0xa => FaultUnit::gCDM_KS1_SP((unit_code >> 4) & 1), + 0xb => FaultUnit::gCDM_KS2_SP((unit_code >> 4) & 1), + 0xc => FaultUnit::gIPP_SP((unit_code >> 4) & 1), + 0xd => FaultUnit::gIPP_CS_SP((unit_code >> 4) & 1), + 0xe => FaultUnit::gRDE0_SP((unit_code >> 4) & 1), + 0xf => FaultUnit::gRDE1_SP((unit_code >> 4) & 1), + _ => FaultUnit::Unknown(unit_code), + }, + _ => FaultUnit::Unknown(unit_code), + }; + + let reason = match (fault_info >> 1) & 0x7 { + 0 => FaultReason::Unmapped, + 1 => FaultReason::AfFault, + 2 => FaultReason::WriteOnly, + 3 => FaultReason::ReadOnly, + 4 => FaultReason::NoAccess, + a => FaultReason::Unknown(a as u8), + }; + + Some(FaultInfo { + address: (fault_info >> 30) << 6, + sideband: ((fault_info >> 23) & 0x7f) as u8, + vm_slot: ((fault_info >> 17) & 0x3f) as u32, + unit_code, + unit, + level: ((fault_info >> 7) & 3) as u8, + unk_5: ((fault_info >> 5) & 3) as u8, + read: (fault_info & (1 << 4)) != 0, + reason, + }) + } +} diff --git a/drivers/gpu/drm/asahi/slotalloc.rs b/drivers/gpu/drm/asahi/slotalloc.rs new file mode 100644 index 000000000000..6493111643fe --- /dev/null +++ b/drivers/gpu/drm/asahi/slotalloc.rs @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Generic slot allocator +//! +//! This is a simple allocator to manage fixed-size pools of GPU resources that are transiently +//! required during command execution. Each item resides in a "slot" at a given index. Users borrow +//! and return free items from the available pool. +//! +//! Allocations are "sticky", and return a token that callers can use to request the same slot +//! again later. This allows slots to be lazily invalidated, so that multiple uses by the same user +//! avoid any actual cleanup work. +//! +//! The allocation policy is currently a simple LRU mechanism, doing a full linear scan over the +//! slots when no token was previously provided. This is probably good enough, since in the absence +//! of serious system contention most allocation requests will be immediately fulfilled from the +//! previous slot without doing an LRU scan. + +use core::ops::{Deref, DerefMut}; +use kernel::{ + error::{code::*, Result}, + prelude::*, + sync::{Arc, CondVar, Mutex, UniqueArc}, +}; + +/// Trait representing a single item within a slot. +pub(crate) trait SlotItem { + /// Arbitrary user data associated with the SlotAllocator. + type Data; + + /// Called eagerly when this item is released back into the available pool. + fn release(&mut self, _data: &mut Self::Data, _slot: u32) {} +} + +/// Trivial implementation for users which do not require any slot data nor any allocator data. +impl SlotItem for () { + type Data = (); +} + +/// Represents a current or previous allocation of an item from a slot. Users keep `SlotToken`s +/// around across allocations to request that, if possible, the same slot be reused. +#[derive(Copy, Clone, Debug)] +pub(crate) struct SlotToken { + time: u64, + slot: u32, +} + +impl SlotToken { + /// Returns the slot index that this token represents a past assignment to. + pub(crate) fn last_slot(&self) -> u32 { + self.slot + } +} + +/// A guard representing active ownership of a slot. +pub(crate) struct Guard<T: SlotItem> { + item: Option<T>, + changed: bool, + token: SlotToken, + alloc: Arc<SlotAllocatorOuter<T>>, +} + +impl<T: SlotItem> Guard<T> { + /// Returns the active slot owned by this `Guard`. + pub(crate) fn slot(&self) -> u32 { + self.token.slot + } + + /// Returns `true` if the slot changed since the last allocation (or no `SlotToken` was + /// provided), or `false` if the previously allocated slot was successfully re-acquired with + /// no other users in the interim. + pub(crate) fn changed(&self) -> bool { + self.changed + } + + /// Returns a `SlotToken` that can be used to re-request the same slot at a later time, after + /// this `Guard` is dropped. + pub(crate) fn token(&self) -> SlotToken { + self.token + } +} + +impl<T: SlotItem> Deref for Guard<T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.item.as_ref().expect("SlotItem Guard lost our item!") + } +} + +impl<T: SlotItem> DerefMut for Guard<T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.item.as_mut().expect("SlotItem Guard lost our item!") + } +} + +/// A slot item that is currently free. +struct Entry<T: SlotItem> { + item: T, + get_time: u64, + drop_time: u64, +} + +/// Inner data for the `SlotAllocator`, protected by a `Mutex`. +struct SlotAllocatorInner<T: SlotItem> { + data: T::Data, + slots: Vec<Option<Entry<T>>>, + get_count: u64, + drop_count: u64, +} + +/// A single slot allocator instance. +struct SlotAllocatorOuter<T: SlotItem> { + inner: Mutex<SlotAllocatorInner<T>>, + cond: CondVar, +} + +/// A shared reference to a slot allocator instance. +pub(crate) struct SlotAllocator<T: SlotItem>(Arc<SlotAllocatorOuter<T>>); + +impl<T: SlotItem> SlotAllocator<T> { + /// Creates a new `SlotAllocator`, with a fixed number of slots and arbitrary associated data. + /// + /// The caller provides a constructor callback which takes a reference to the `T::Data` and + /// creates a single slot. This is called during construction to create all the initial + /// items, which then live the lifetime of the `SlotAllocator`. + pub(crate) fn new( + num_slots: u32, + mut data: T::Data, + mut constructor: impl FnMut(&mut T::Data, u32) -> T, + ) -> Result<SlotAllocator<T>> { + let mut slots = Vec::try_with_capacity(num_slots as usize)?; + + for i in 0..num_slots { + slots + .try_push(Some(Entry { + item: constructor(&mut data, i), + get_time: 0, + drop_time: 0, + })) + .expect("try_push() failed after reservation"); + } + + let inner = SlotAllocatorInner { + data, + slots, + get_count: 0, + drop_count: 0, + }; + + let mut alloc = Pin::from(UniqueArc::try_new(SlotAllocatorOuter { + // SAFETY: `condvar_init!` is called below. + cond: unsafe { CondVar::new() }, + // SAFETY: `mutex_init!` is called below. + inner: unsafe { Mutex::new(inner) }, + })?); + + // SAFETY: `cond` is pinned when `alloc` is. + let pinned = unsafe { alloc.as_mut().map_unchecked_mut(|s| &mut s.cond) }; + kernel::condvar_init!(pinned, "SlotAllocator::cond"); + + // SAFETY: `inner` is pinned when `alloc` is. + let pinned = unsafe { alloc.as_mut().map_unchecked_mut(|s| &mut s.inner) }; + kernel::mutex_init!(pinned, "SlotAllocator::inner"); + + Ok(SlotAllocator(alloc.into())) + } + + /// Calls a callback on the inner data associated with this allocator, taking the lock. + pub(crate) fn with_inner<RetVal>(&self, cb: impl FnOnce(&mut T::Data) -> RetVal) -> RetVal { + let mut inner = self.0.inner.lock(); + cb(&mut inner.data) + } + + /// Gets a fresh slot, optionally reusing a previous allocation if a `SlotToken` is provided. + /// + /// Blocks if no slots are free. + pub(crate) fn get(&self, token: Option<SlotToken>) -> Result<Guard<T>> { + self.get_inner(token, |_a, _b| Ok(())) + } + + /// Gets a fresh slot, optionally reusing a previous allocation if a `SlotToken` is provided. + /// + /// Blocks if no slots are free. + /// + /// This version allows the caller to pass in a callback that gets a mutable reference to the + /// user data for the allocator and the freshly acquired slot, which is called before the + /// allocator lock is released. This can be used to perform bookkeeping associated with + /// specific slots (such as tracking their current owner). + pub(crate) fn get_inner( + &self, + token: Option<SlotToken>, + cb: impl FnOnce(&mut T::Data, &mut Guard<T>) -> Result<()>, + ) -> Result<Guard<T>> { + let mut inner = self.0.inner.lock(); + + if let Some(token) = token { + let slot = &mut inner.slots[token.slot as usize]; + if slot.is_some() { + let count = slot.as_ref().unwrap().get_time; + if count == token.time { + let mut guard = Guard { + item: Some(slot.take().unwrap().item), + token, + changed: false, + alloc: self.0.clone(), + }; + cb(&mut inner.data, &mut guard)?; + return Ok(guard); + } + } + } + + let mut first = true; + let slot = loop { + let mut oldest_time = u64::MAX; + let mut oldest_slot = 0u32; + + for (i, slot) in inner.slots.iter().enumerate() { + if let Some(slot) = slot.as_ref() { + if slot.drop_time < oldest_time { + oldest_slot = i as u32; + oldest_time = slot.drop_time; + } + } + } + + if oldest_time == u64::MAX { + if first { + pr_warn!( + "{}: out of slots, blocking\n", + core::any::type_name::<Self>() + ); + } + first = false; + if self.0.cond.wait(&mut inner) { + return Err(ERESTARTSYS); + } + } else { + break oldest_slot; + } + }; + + inner.get_count += 1; + + let item = inner.slots[slot as usize] + .take() + .expect("Someone stole our slot?") + .item; + + let mut guard = Guard { + item: Some(item), + changed: true, + token: SlotToken { + time: inner.get_count, + slot, + }, + alloc: self.0.clone(), + }; + + cb(&mut inner.data, &mut guard)?; + Ok(guard) + } +} + +impl<T: SlotItem> Clone for SlotAllocator<T> { + fn clone(&self) -> Self { + SlotAllocator(self.0.clone()) + } +} + +impl<T: SlotItem> Drop for Guard<T> { + fn drop(&mut self) { + let mut inner = self.alloc.inner.lock(); + if inner.slots[self.token.slot as usize].is_some() { + pr_crit!( + "{}: tried to return an item into a full slot ({})\n", + core::any::type_name::<Self>(), + self.token.slot + ); + } else { + inner.drop_count += 1; + let mut item = self.item.take().expect("Guard lost its item"); + item.release(&mut inner.data, self.token.slot); + inner.slots[self.token.slot as usize] = Some(Entry { + item, + get_time: self.token.time, + drop_time: inner.drop_count, + }); + self.alloc.cond.notify_one(); + } + } +} diff --git a/drivers/gpu/drm/asahi/util.rs b/drivers/gpu/drm/asahi/util.rs new file mode 100644 index 000000000000..8d1a37f17cd8 --- /dev/null +++ b/drivers/gpu/drm/asahi/util.rs @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Miscellaneous utility functions + +use core::ops::{Add, BitAnd, Div, Not, Sub}; + +/// Aligns an integer type to a power of two. +pub(crate) fn align<T>(a: T, b: T) -> T +where + T: Copy + + Default + + BitAnd<Output = T> + + Not<Output = T> + + Add<Output = T> + + Sub<Output = T> + + Div<Output = T> + + core::cmp::PartialEq, +{ + let def: T = Default::default(); + #[allow(clippy::eq_op)] + let one: T = !def / !def; + + assert!((b & (b - one)) == def); + + (a + b - one) & !(b - one) +} + +/// Integer division rounding up. +pub(crate) fn div_ceil<T>(a: T, b: T) -> T +where + T: Copy + + Default + + BitAnd<Output = T> + + Not<Output = T> + + Add<Output = T> + + Sub<Output = T> + + Div<Output = T>, +{ + let def: T = Default::default(); + #[allow(clippy::eq_op)] + let one: T = !def / !def; + + (a + b - one) / b +} diff --git a/drivers/gpu/drm/asahi/workqueue.rs b/drivers/gpu/drm/asahi/workqueue.rs new file mode 100644 index 000000000000..ce1d1f89e48e --- /dev/null +++ b/drivers/gpu/drm/asahi/workqueue.rs @@ -0,0 +1,880 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! GPU command execution queues +//! +//! The AGX GPU firmware schedules GPU work commands out of work queues, which are ring buffers of +//! pointers to work commands. There can be an arbitrary number of work queues. Work queues have an +//! associated type (vertex, fragment, or compute) and may only contain generic commands or commands +//! specific to that type. +//! +//! This module manages queueing work commands into a work queue and submitting them for execution +//! by the firmware. An active work queue needs an event to signal completion of its work, which is +//! owned by what we call a batch. This event then notifies the work queue when work is completed, +//! and that triggers freeing of all resources associated with that work. An idle work queue gives +//! up its associated event. + +use crate::debug::*; +use crate::fw::channels::PipeType; +use crate::fw::types::*; +use crate::fw::workqueue::*; +use crate::object::OpaqueGpuObject; +use crate::regs::FaultReason; +use crate::{box_in_place, no_debug, place}; +use crate::{channel, driver, event, fw, gpu, object, regs}; +use core::num::NonZeroU64; +use core::sync::atomic::Ordering; +use kernel::{ + bindings, + error::code::*, + prelude::*, + sync::{Arc, Guard, Mutex, UniqueArc}, +}; + +const DEBUG_CLASS: DebugFlags = DebugFlags::WorkQueue; + +const MAX_JOB_SLOTS: u32 = 127; + +/// An enum of possible errors that might cause a piece of work to fail execution. +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub(crate) enum WorkError { + /// GPU timeout (command execution took too long). + Timeout, + /// GPU MMU fault (invalid access). + Fault(regs::FaultInfo), + /// Work failed due to an error caused by other concurrent GPU work. + Killed, + /// The GPU crashed. + NoDevice, + /// Unknown reason. + Unknown, +} + +impl From<WorkError> for bindings::drm_asahi_result_info { + fn from(err: WorkError) -> Self { + match err { + WorkError::Fault(info) => Self { + status: bindings::drm_asahi_status_DRM_ASAHI_STATUS_FAULT, + fault_type: match info.reason { + FaultReason::Unmapped => bindings::drm_asahi_fault_DRM_ASAHI_FAULT_UNMAPPED, + FaultReason::AfFault => bindings::drm_asahi_fault_DRM_ASAHI_FAULT_AF_FAULT, + FaultReason::WriteOnly => bindings::drm_asahi_fault_DRM_ASAHI_FAULT_WRITE_ONLY, + FaultReason::ReadOnly => bindings::drm_asahi_fault_DRM_ASAHI_FAULT_READ_ONLY, + FaultReason::NoAccess => bindings::drm_asahi_fault_DRM_ASAHI_FAULT_NO_ACCESS, + FaultReason::Unknown(_) => bindings::drm_asahi_fault_DRM_ASAHI_FAULT_UNKNOWN, + }, + unit: info.unit_code.into(), + sideband: info.sideband.into(), + level: info.level, + extra: info.unk_5.into(), + is_read: info.read as u8, + pad: 0, + address: info.address, + }, + a => Self { + status: match a { + WorkError::Timeout => bindings::drm_asahi_status_DRM_ASAHI_STATUS_TIMEOUT, + WorkError::Killed => bindings::drm_asahi_status_DRM_ASAHI_STATUS_KILLED, + WorkError::NoDevice => bindings::drm_asahi_status_DRM_ASAHI_STATUS_NO_DEVICE, + _ => bindings::drm_asahi_status_DRM_ASAHI_STATUS_UNKNOWN_ERROR, + }, + ..Default::default() + }, + } + } +} + +impl From<WorkError> for kernel::error::Error { + fn from(err: WorkError) -> Self { + match err { + WorkError::Timeout => ETIMEDOUT, + // Not EFAULT because that's for userspace faults + WorkError::Fault(_) => EIO, + WorkError::Unknown => ENODATA, + WorkError::Killed => ECANCELED, + WorkError::NoDevice => ENODEV, + } + } +} + +/// A GPU context tracking structure, which must be explicitly invalidated when dropped. +pub(crate) struct GpuContext { + dev: driver::AsahiDevice, + data: GpuObject<fw::workqueue::GpuContextData>, +} +no_debug!(GpuContext); + +impl GpuContext { + /// Allocate a new GPU context. + pub(crate) fn new( + dev: &driver::AsahiDevice, + alloc: &mut gpu::KernelAllocators, + ) -> Result<GpuContext> { + Ok(GpuContext { + dev: dev.clone(), + data: alloc + .shared + .new_object(Default::default(), |_inner| Default::default())?, + }) + } + + /// Returns the GPU pointer to the inner GPU context data structure. + pub(crate) fn gpu_pointer(&self) -> GpuPointer<'_, fw::workqueue::GpuContextData> { + self.data.gpu_pointer() + } +} + +impl Drop for GpuContext { + fn drop(&mut self) { + mod_dev_dbg!(self.dev, "GpuContext: Invalidating GPU context\n"); + let dev = self.dev.data(); + if dev.gpu.invalidate_context(&self.data).is_err() { + dev_err!(self.dev, "GpuContext: Failed to invalidate GPU context!\n"); + } + } +} + +struct SubmittedWork<O, C> +where + O: OpaqueGpuObject, + C: FnOnce(O, Option<WorkError>) + Send + Sync + 'static, +{ + object: O, + value: EventValue, + error: Option<WorkError>, + wptr: u32, + vm_slot: u32, + callback: C, +} + +trait GenSubmittedWork: Send + Sync { + fn gpu_va(&self) -> NonZeroU64; + fn value(&self) -> event::EventValue; + fn wptr(&self) -> u32; + fn set_wptr(&mut self, wptr: u32); + fn mark_error(&mut self, error: WorkError); + fn complete(self: Box<Self>); +} + +impl<O: OpaqueGpuObject, C: FnOnce(O, Option<WorkError>) + Send + Sync> GenSubmittedWork + for SubmittedWork<O, C> +{ + fn gpu_va(&self) -> NonZeroU64 { + self.object.gpu_va() + } + + fn value(&self) -> event::EventValue { + self.value + } + + fn wptr(&self) -> u32 { + self.wptr + } + + fn set_wptr(&mut self, wptr: u32) { + self.wptr = wptr; + } + + fn complete(self: Box<Self>) { + let SubmittedWork { + object, + value: _, + error, + wptr: _, + vm_slot: _, + callback, + } = *self; + + callback(object, error); + } + + fn mark_error(&mut self, error: WorkError) { + mod_pr_debug!("WorkQueue: Command at value {:#x?} failed\n", self.value); + self.error = Some(match error { + WorkError::Fault(info) if info.vm_slot != self.vm_slot => WorkError::Killed, + err => err, + }); + } +} + +/// Inner data for managing a single work queue. +#[versions(AGX)] +struct WorkQueueInner { + event_manager: Arc<event::EventManager>, + info: GpuObject<QueueInfo::ver>, + new: bool, + pipe_type: PipeType, + size: u32, + wptr: u32, + pending: Vec<Box<dyn GenSubmittedWork>>, + last_token: Option<event::Token>, + pending_jobs: usize, + last_submitted: Option<event::EventValue>, + last_completed: Option<event::EventValue>, + event: Option<(event::Event, event::EventValue)>, + priority: u32, + commit_seq: u64, + submit_seq: u64, +} + +/// An instance of a work queue. +#[versions(AGX)] +pub(crate) struct WorkQueue { + info_pointer: GpuWeakPointer<QueueInfo::ver>, + inner: Mutex<WorkQueueInner::ver>, +} + +#[versions(AGX)] +impl WorkQueueInner::ver { + /// Return the GPU done pointer, representing how many work items have been completed by the + /// GPU. + fn doneptr(&self) -> u32 { + self.info + .state + .with(|raw, _inner| raw.gpu_doneptr.load(Ordering::Acquire)) + } +} + +#[versions(AGX)] +#[derive(Copy, Clone)] +pub(crate) struct QueueEventInfo { + pub(crate) stamp_pointer: GpuWeakPointer<Stamp>, + pub(crate) fw_stamp_pointer: GpuWeakPointer<FwStamp>, + pub(crate) slot: u32, + pub(crate) value: event::EventValue, + pub(crate) cmd_seq: u64, + pub(crate) info_ptr: GpuWeakPointer<QueueInfo::ver>, +} + +#[versions(AGX)] +pub(crate) struct Job { + wq: Arc<WorkQueue::ver>, + event_info: QueueEventInfo::ver, + start_value: EventValue, + pending: Vec<Box<dyn GenSubmittedWork>>, + committed: bool, + submitted: bool, + event_count: usize, +} + +#[versions(AGX)] +pub(crate) struct JobSubmission<'a> { + inner: Option<Guard<'a, Mutex<WorkQueueInner::ver>>>, + wptr: u32, + event_count: usize, + command_count: usize, +} + +#[versions(AGX)] +impl Job::ver { + pub(crate) fn event_info(&self) -> QueueEventInfo::ver { + let mut info = self.event_info; + info.cmd_seq += self.event_count as u64; + + info + } + + pub(crate) fn next_seq(&mut self) { + self.event_count += 1; + self.event_info.value.increment(); + } + + pub(crate) fn add<O: object::OpaqueGpuObject + 'static>( + &mut self, + command: O, + vm_slot: u32, + ) -> Result { + self.add_cb(command, vm_slot, |_, _| {}) + } + + pub(crate) fn add_cb<O: object::OpaqueGpuObject + 'static>( + &mut self, + command: O, + vm_slot: u32, + callback: impl FnOnce(O, Option<WorkError>) + Sync + Send + 'static, + ) -> Result { + if self.committed { + pr_err!("WorkQueue: Tried to mutate committed Job\n"); + return Err(EINVAL); + } + + self.pending.try_push(Box::try_new(SubmittedWork::<_, _> { + object: command, + value: self.event_info.value.next(), + error: None, + callback, + wptr: 0, + vm_slot, + })?)?; + + Ok(()) + } + + pub(crate) fn commit(&mut self) -> Result { + if self.committed { + pr_err!("WorkQueue: Tried to commit committed Job\n"); + return Err(EINVAL); + } + + if self.pending.is_empty() { + pr_err!("WorkQueue: Job::commit() with no commands\n"); + return Err(EINVAL); + } + + let mut inner = self.wq.inner.lock(); + + let ev = inner.event.as_mut().expect("WorkQueue: Job lost its event"); + + if ev.1 != self.start_value { + pr_err!( + "WorkQueue: Job::commit() out of order (event slot {} {:?} != {:?}\n", + ev.0.slot(), + ev.1, + self.start_value + ); + return Err(EINVAL); + } + + ev.1 = self.event_info.value; + inner.commit_seq += self.pending.len() as u64; + self.committed = true; + + Ok(()) + } + + pub(crate) fn can_submit(&self) -> bool { + self.wq.free_slots() > self.event_count && self.wq.free_space() > self.pending.len() + } + + pub(crate) fn submit(&mut self) -> Result<JobSubmission::ver<'_>> { + if !self.committed { + pr_err!("WorkQueue: Tried to submit uncommitted Job\n"); + return Err(EINVAL); + } + + if self.submitted { + pr_err!("WorkQueue: Tried to submit Job twice\n"); + return Err(EINVAL); + } + + if self.pending.is_empty() { + pr_err!("WorkQueue: Job::submit() with no commands\n"); + return Err(EINVAL); + } + + let mut inner = self.wq.inner.lock(); + + if inner.submit_seq != self.event_info.cmd_seq { + pr_err!( + "WorkQueue: Job::submit() out of order (submit_seq {} != {})\n", + inner.submit_seq, + self.event_info.cmd_seq + ); + return Err(EINVAL); + } + + if inner.commit_seq < (self.event_info.cmd_seq + self.pending.len() as u64) { + pr_err!( + "WorkQueue: Job::submit() out of order (commit_seq {} != {})\n", + inner.commit_seq, + (self.event_info.cmd_seq + self.pending.len() as u64) + ); + return Err(EINVAL); + } + + let mut wptr = inner.wptr; + let command_count = self.pending.len(); + + if inner.free_space() <= command_count { + pr_err!("WorkQueue: Job does not fit in ring buffer\n"); + return Err(EBUSY); + } + + inner.pending.try_reserve(command_count)?; + + inner.last_submitted = inner.event.as_ref().map(|e| e.1); + + for mut command in self.pending.drain(..) { + command.set_wptr(wptr); + + let next_wptr = (wptr + 1) % inner.size; + assert!(inner.doneptr() != next_wptr); + inner.info.ring[wptr as usize] = command.gpu_va().get(); + wptr = next_wptr; + + // Cannot fail, since we did a try_reserve(1) above + inner + .pending + .try_push(command) + .expect("try_push() failed after try_reserve()"); + } + + self.submitted = true; + + Ok(JobSubmission::ver { + inner: Some(inner), + wptr, + command_count, + event_count: self.event_count, + }) + } +} + +#[versions(AGX)] +impl<'a> JobSubmission::ver<'a> { + pub(crate) fn run(mut self, channel: &mut channel::PipeChannel::ver) { + let command_count = self.command_count; + let mut inner = self.inner.take().expect("No inner?"); + let wptr = self.wptr; + core::mem::forget(self); + + inner + .info + .state + .with(|raw, _inner| raw.cpu_wptr.store(wptr, Ordering::Release)); + + inner.wptr = wptr; + + let event = inner.event.as_mut().expect("JobSubmission lost its event"); + + let event_slot = event.0.slot(); + + let msg = fw::channels::RunWorkQueueMsg::ver { + pipe_type: inner.pipe_type, + work_queue: Some(inner.info.weak_pointer()), + wptr: inner.wptr, + event_slot, + is_new: inner.new, + __pad: Default::default(), + }; + channel.send(&msg); + inner.new = false; + + inner.submit_seq += command_count as u64; + } + + pub(crate) fn pipe_type(&self) -> PipeType { + self.inner.as_ref().expect("No inner?").pipe_type + } + + pub(crate) fn priority(&self) -> u32 { + self.inner.as_ref().expect("No inner?").priority + } +} + +#[versions(AGX)] +impl Drop for Job::ver { + fn drop(&mut self) { + mod_pr_debug!("WorkQueue: Dropping Job\n"); + let mut inner = self.wq.inner.lock(); + + if self.committed && !self.submitted { + let pipe_type = inner.pipe_type; + let event = inner.event.as_mut().expect("Job lost its event"); + mod_pr_debug!( + "WorkQueue({:?}): Roll back {} events (slot {} val {:#x?}) and {} commands\n", + pipe_type, + self.event_count, + event.0.slot(), + event.1, + self.pending.len() + ); + event.1.sub(self.event_count as u32); + inner.commit_seq -= self.pending.len() as u64; + } + + inner.pending_jobs -= 1; + + if inner.pending.is_empty() && inner.pending_jobs == 0 { + mod_pr_debug!("WorkQueue({:?}): Dropping event\n", inner.pipe_type); + inner.event = None; + inner.last_submitted = None; + inner.last_completed = None; + } + mod_pr_debug!("WorkQueue({:?}): Dropped Job\n", inner.pipe_type); + } +} + +#[versions(AGX)] +impl<'a> Drop for JobSubmission::ver<'a> { + fn drop(&mut self) { + let inner = self.inner.as_mut().expect("No inner?"); + mod_pr_debug!("WorkQueue({:?}): Dropping JobSubmission\n", inner.pipe_type); + + let new_len = inner.pending.len() - self.command_count; + inner.pending.truncate(new_len); + + let pipe_type = inner.pipe_type; + let event = inner.event.as_mut().expect("JobSubmission lost its event"); + mod_pr_debug!( + "WorkQueue({:?}): Roll back {} events (slot {} val {:#x?}) and {} commands\n", + pipe_type, + self.event_count, + event.0.slot(), + event.1, + self.command_count + ); + event.1.sub(self.event_count as u32); + inner.commit_seq -= self.command_count as u64; + mod_pr_debug!("WorkQueue({:?}): Dropped JobSubmission\n", inner.pipe_type); + } +} + +#[versions(AGX)] +impl WorkQueueInner::ver { + /// Return the number of free entries in the workqueue + pub(crate) fn free_space(&self) -> usize { + self.size as usize - self.pending.len() - 1 + } + + pub(crate) fn free_slots(&self) -> usize { + let busy_slots = if let Some(ls) = self.last_submitted { + let lc = self + .last_completed + .expect("last_submitted but not completed?"); + ls.delta(&lc) + } else { + 0 + }; + + ((MAX_JOB_SLOTS as i32) - busy_slots).max(0) as usize + } +} + +#[versions(AGX)] +impl WorkQueue::ver { + /// Create a new WorkQueue of a given type and priority. + #[allow(clippy::too_many_arguments)] + pub(crate) fn new( + alloc: &mut gpu::KernelAllocators, + event_manager: Arc<event::EventManager>, + gpu_context: Arc<GpuContext>, + notifier_list: Arc<GpuObject<fw::event::NotifierList>>, + pipe_type: PipeType, + id: u64, + priority: u32, + size: u32, + ) -> Result<Arc<WorkQueue::ver>> { + let mut info = box_in_place!(QueueInfo::ver { + state: alloc.shared.new_default::<RingState>()?, + ring: alloc.shared.array_empty(size as usize)?, + gpu_buf: alloc.private.array_empty(0x2c18)?, + notifier_list: notifier_list, + gpu_context: gpu_context, + })?; + + info.state.with_mut(|raw, _inner| { + raw.rb_size = size; + }); + + let inner = WorkQueueInner::ver { + event_manager, + info: alloc.private.new_boxed(info, |inner, ptr| { + Ok(place!( + ptr, + raw::QueueInfo::ver { + state: inner.state.gpu_pointer(), + ring: inner.ring.gpu_pointer(), + notifier_list: inner.notifier_list.gpu_pointer(), + gpu_buf: inner.gpu_buf.gpu_pointer(), + gpu_rptr1: Default::default(), + gpu_rptr2: Default::default(), + gpu_rptr3: Default::default(), + event_id: AtomicI32::new(-1), + priority: *raw::PRIORITY.get(priority as usize).ok_or(EINVAL)?, + unk_4c: -1, + uuid: id as u32, + unk_54: -1, + unk_58: Default::default(), + busy: Default::default(), + __pad: Default::default(), + unk_84_state: Default::default(), + unk_88: 0, + unk_8c: 0, + unk_90: 0, + unk_94: 0, + pending: Default::default(), + unk_9c: 0, + #[ver(V >= V13_2)] + unk_a0_0: 0, + gpu_context: inner.gpu_context.gpu_pointer(), + unk_a8: Default::default(), + #[ver(V >= V13_2)] + unk_b0: 0, + } + )) + })?, + new: true, + pipe_type, + size, + wptr: 0, + pending: Vec::new(), + last_token: None, + event: None, + priority, + pending_jobs: 0, + commit_seq: 0, + submit_seq: 0, + last_completed: None, + last_submitted: None, + }; + + let mut queue = Pin::from(UniqueArc::try_new(Self { + info_pointer: inner.info.weak_pointer(), + // SAFETY: `mutex_init!` is called below. + inner: unsafe { Mutex::new(inner) }, + })?); + + // SAFETY: `inner` is pinned when `queue` is. + let pinned = unsafe { queue.as_mut().map_unchecked_mut(|s| &mut s.inner) }; + match pipe_type { + PipeType::Vertex => kernel::mutex_init!(pinned, "WorkQueue::inner (Vertex)"), + PipeType::Fragment => kernel::mutex_init!(pinned, "WorkQueue::inner (Fragment)"), + PipeType::Compute => kernel::mutex_init!(pinned, "WorkQueue::inner (Compute)"), + } + + Ok(queue.into()) + } + + pub(crate) fn event_info(&self) -> Option<QueueEventInfo::ver> { + let inner = self.inner.lock(); + + inner.event.as_ref().map(|ev| QueueEventInfo::ver { + stamp_pointer: ev.0.stamp_pointer(), + fw_stamp_pointer: ev.0.fw_stamp_pointer(), + slot: ev.0.slot(), + value: ev.1, + cmd_seq: inner.commit_seq, + info_ptr: self.info_pointer, + }) + } + + pub(crate) fn new_job(self: &Arc<Self>) -> Result<Job::ver> { + let mut inner = self.inner.lock(); + + if inner.event.is_none() { + mod_pr_debug!("WorkQueue({:?}): Grabbing event\n", inner.pipe_type); + let event = inner.event_manager.get(inner.last_token, self.clone())?; + let cur = event.current(); + inner.last_token = Some(event.token()); + mod_pr_debug!( + "WorkQueue({:?}): Grabbed event slot {}: {:#x?}\n", + inner.pipe_type, + event.slot(), + cur + ); + inner.event = Some((event, cur)); + inner.last_submitted = Some(cur); + inner.last_completed = Some(cur); + } + + inner.pending_jobs += 1; + + let ev = &inner.event.as_ref().unwrap(); + + mod_pr_debug!("WorkQueue({:?}): New job\n", inner.pipe_type); + Ok(Job::ver { + wq: self.clone(), + event_info: QueueEventInfo::ver { + stamp_pointer: ev.0.stamp_pointer(), + fw_stamp_pointer: ev.0.fw_stamp_pointer(), + slot: ev.0.slot(), + value: ev.1, + cmd_seq: inner.commit_seq, + info_ptr: self.info_pointer, + }, + start_value: ev.1, + pending: Vec::new(), + event_count: 0, + committed: false, + submitted: false, + }) + } + + /// Return the number of free entries in the workqueue + pub(crate) fn free_space(&self) -> usize { + self.inner.lock().free_space() + } + + /// Return the number of free job slots in the workqueue + pub(crate) fn free_slots(&self) -> usize { + self.inner.lock().free_slots() + } + + pub(crate) fn pipe_type(&self) -> PipeType { + self.inner.lock().pipe_type + } +} + +/// Trait used to erase the version-specific type of WorkQueues, to avoid leaking +/// version-specificity into the event module. +pub(crate) trait WorkQueue { + fn signal(&self) -> bool; + fn mark_error(&self, value: event::EventValue, error: WorkError); + fn fail_all(&self, error: WorkError); +} + +#[versions(AGX)] +impl WorkQueue for WorkQueue::ver { + /// Signal a workqueue that some work was completed. + /// + /// This will check the event stamp value to find out exactly how many commands were processed. + fn signal(&self) -> bool { + let mut inner = self.inner.lock(); + let event = inner.event.as_ref(); + let value = match event { + None => { + pr_err!("WorkQueue: signal() called but no event?\n"); + return true; + } + Some(event) => event.0.current(), + }; + + inner.last_completed = Some(value); + + mod_pr_debug!( + "WorkQueue({:?}): Signaling event {:?} value {:#x?}\n", + inner.pipe_type, + inner.last_token, + value + ); + + let mut completed_commands: usize = 0; + + for cmd in inner.pending.iter() { + if cmd.value() <= value { + mod_pr_debug!( + "WorkQueue({:?}): Command at value {:#x?} complete\n", + inner.pipe_type, + cmd.value() + ); + completed_commands += 1; + } else { + break; + } + } + + if completed_commands == 0 { + return inner.pending.is_empty(); + } + + let mut completed = Vec::new(); + + if completed.try_reserve(completed_commands).is_err() { + pr_crit!( + "WorkQueue({:?}): Failed to allocated space for {} completed commands\n", + inner.pipe_type, + completed_commands + ); + } + + let pipe_type = inner.pipe_type; + + for cmd in inner.pending.drain(..completed_commands) { + if completed.try_push(cmd).is_err() { + pr_crit!( + "WorkQueue({:?}): Failed to signal a completed command\n", + pipe_type, + ); + } + } + + mod_pr_debug!( + "WorkQueue({:?}): Completed {} commands\n", + inner.pipe_type, + completed_commands + ); + + if let Some(i) = completed.last() { + inner + .info + .state + .with(|raw, _inner| raw.cpu_freeptr.store(i.wptr(), Ordering::Release)); + } + + let empty = inner.pending.is_empty(); + if empty && inner.pending_jobs == 0 { + inner.event = None; + inner.last_submitted = None; + inner.last_completed = None; + } + + core::mem::drop(inner); + + for cmd in completed { + cmd.complete(); + } + + empty + } + + /// Mark this queue's work up to a certain stamp value as having failed. + fn mark_error(&self, value: event::EventValue, error: WorkError) { + // If anything is marked completed, we can consider it successful + // at this point, even if we didn't get the signal event yet. + self.signal(); + + let mut inner = self.inner.lock(); + + if inner.event.is_none() { + pr_err!("WorkQueue: signal_fault() called but no event?\n"); + return; + } + + mod_pr_debug!( + "WorkQueue({:?}): Signaling fault for event {:?} at value {:#x?}\n", + inner.pipe_type, + inner.last_token, + value + ); + + for cmd in inner.pending.iter_mut() { + if cmd.value() <= value { + cmd.mark_error(error); + } else { + break; + } + } + } + + /// Mark all of this queue's work as having failed, and complete it. + fn fail_all(&self, error: WorkError) { + // If anything is marked completed, we can consider it successful + // at this point, even if we didn't get the signal event yet. + self.signal(); + + let mut inner = self.inner.lock(); + + if inner.event.is_none() { + pr_err!("WorkQueue: fail_all() called but no event?\n"); + return; + } + + mod_pr_debug!( + "WorkQueue({:?}): Failing all jobs {:?}\n", + inner.pipe_type, + error + ); + + let mut cmds = Vec::new(); + + core::mem::swap(&mut inner.pending, &mut cmds); + + if inner.pending_jobs == 0 { + inner.event = None; + } + + core::mem::drop(inner); + + for mut cmd in cmds { + cmd.mark_error(error); + cmd.complete(); + } + } +} + +#[versions(AGX)] +impl Drop for WorkQueue::ver { + fn drop(&mut self) { + mod_pr_debug!("WorkQueue({:?}): Dropping\n", self.inner.lock().pipe_type); + } +} diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index b8db675e7fb5..3f27ebe4f40e 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -166,6 +166,7 @@ void drm_gem_private_object_init(struct drm_device *dev, drm_vma_node_reset(&obj->vma_node); INIT_LIST_HEAD(&obj->lru_node); + obj->exportable = true; } EXPORT_SYMBOL(drm_gem_private_object_init); diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c index b602cd72a120..427d28287da8 100644 --- a/drivers/gpu/drm/drm_gem_shmem_helper.c +++ b/drivers/gpu/drm/drm_gem_shmem_helper.c @@ -537,7 +537,7 @@ int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev, } EXPORT_SYMBOL_GPL(drm_gem_shmem_dumb_create); -static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) +vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct drm_gem_object *obj = vma->vm_private_data; @@ -566,8 +566,9 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf) return ret; } +EXPORT_SYMBOL_GPL(drm_gem_shmem_fault); -static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) +void drm_gem_shmem_vm_open(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); @@ -588,8 +589,9 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma) drm_gem_vm_open(vma); } +EXPORT_SYMBOL_GPL(drm_gem_shmem_vm_open); -static void drm_gem_shmem_vm_close(struct vm_area_struct *vma) +void drm_gem_shmem_vm_close(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj); @@ -597,6 +599,7 @@ static void drm_gem_shmem_vm_close(struct vm_area_struct *vma) drm_gem_shmem_put_pages(shmem); drm_gem_vm_close(vma); } +EXPORT_SYMBOL_GPL(drm_gem_shmem_vm_close); const struct vm_operations_struct drm_gem_shmem_vm_ops = { .fault = drm_gem_shmem_fault, diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index f924b8b4ab6b..9d2dd982580e 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -391,6 +391,11 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev, return dmabuf; } + if (!obj->exportable) { + dmabuf = ERR_PTR(-EINVAL); + return dmabuf; + } + if (obj->funcs && obj->funcs->export) dmabuf = obj->funcs->export(obj, flags); else diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index fd22d753b4ed..ae2f7c7343f2 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -984,6 +984,16 @@ static int drm_sched_main(void *param) if (!entity) continue; + if (sched->ops->can_run_job) { + sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue)); + if (!sched_job) { + complete_all(&entity->entity_idle); + continue; + } + if (!sched->ops->can_run_job(sched_job)) + continue; + } + sched_job = drm_sched_entity_pop_job(entity); if (!sched_job) { @@ -1092,10 +1102,33 @@ EXPORT_SYMBOL(drm_sched_init); void drm_sched_fini(struct drm_gpu_scheduler *sched) { struct drm_sched_entity *s_entity; + struct drm_sched_job *s_job, *tmp; int i; - if (sched->thread) - kthread_stop(sched->thread); + if (!sched->thread) + return; + + /* + * Stop the scheduler, detaching all jobs from their hardware callbacks + * and cleaning up complete jobs. + */ + drm_sched_stop(sched, NULL); + + /* + * Iterate through the pending job list and free all jobs. + * This assumes the driver has either guaranteed jobs are already stopped, or that + * otherwise it is responsible for keeping any necessary data structures for + * in-progress jobs alive even when the free_job() callback is called early (e.g. by + * putting them in its own queue or doing its own refcounting). + */ + list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) { + spin_lock(&sched->job_list_lock); + list_del_init(&s_job->list); + spin_unlock(&sched->job_list_lock); + sched->ops->free_job(s_job); + } + + kthread_stop(sched->thread); for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) { struct drm_sched_rq *rq = &sched->sched_rq[i]; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 72dcdd468cf3..85f35035ae95 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -130,6 +130,15 @@ #define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL #define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL +#define APPLE_UAT_MEMATTR_PRIV (((arm_lpae_iopte)0x0) << 2) +#define APPLE_UAT_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2) +#define APPLE_UAT_MEMATTR_SHARED (((arm_lpae_iopte)0x2) << 2) +#define APPLE_UAT_GPU_ACCESS (((arm_lpae_iopte)1) << 55) +#define APPLE_UAT_UXN (((arm_lpae_iopte)1) << 54) +#define APPLE_UAT_PXN (((arm_lpae_iopte)1) << 53) +#define APPLE_UAT_AP1 (((arm_lpae_iopte)1) << 7) +#define APPLE_UAT_AP0 (((arm_lpae_iopte)1) << 6) + /* IOPTE accessors */ #define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d)) @@ -402,7 +411,42 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, { arm_lpae_iopte pte; - if (data->iop.fmt == ARM_64_LPAE_S1 || + if (data->iop.fmt == APPLE_UAT) { + /* + * This bit enables GPU access and the particular permission + * rules that follow. Without it, access is firmware-only and + * permissions follow the firmware's Apple SPRR configuration. + */ + pte = APPLE_UAT_GPU_ACCESS; + if (prot & IOMMU_PRIV) { + /* Firmware structures */ + pte |= APPLE_UAT_AP0; + if (prot & IOMMU_WRITE) { + /* Firmware RW */ + pte |= APPLE_UAT_UXN; + } else if (!(prot & IOMMU_READ)) { + /* No access */ + pte |= APPLE_UAT_PXN; + } + } else if (prot & IOMMU_NOEXEC) { + /* GPU structures (no FW access) */ + pte |= APPLE_UAT_AP1 | ARM_LPAE_PTE_nG; + if (!(prot & IOMMU_READ)) { + pte |= APPLE_UAT_PXN; + if (!(prot & IOMMU_WRITE)) + pte |= APPLE_UAT_UXN; + } else if (prot & IOMMU_WRITE) { + pte |= APPLE_UAT_UXN; + } + } else { + pte |= ARM_LPAE_PTE_nG; + /* GPU structures (also FW accessible) */ + if (prot & IOMMU_WRITE) + pte |= APPLE_UAT_UXN; + if (prot & IOMMU_READ) + pte |= APPLE_UAT_PXN; + } + } else if (data->iop.fmt == ARM_64_LPAE_S1 || data->iop.fmt == ARM_32_LPAE_S1) { pte = ARM_LPAE_PTE_nG; if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) @@ -421,7 +465,14 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, * Note that this logic is structured to accommodate Mali LPAE * having stage-1-like attributes but stage-2-like permissions. */ - if (data->iop.fmt == ARM_64_LPAE_S2 || + if (data->iop.fmt == APPLE_UAT) { + if (prot & IOMMU_MMIO) + pte |= APPLE_UAT_MEMATTR_DEV; + else if (prot & IOMMU_CACHE) + pte |= APPLE_UAT_MEMATTR_SHARED; + else + pte |= APPLE_UAT_MEMATTR_PRIV; + } else if (data->iop.fmt == ARM_64_LPAE_S2 || data->iop.fmt == ARM_32_LPAE_S2) { if (prot & IOMMU_MMIO) pte |= ARM_LPAE_PTE_MEMATTR_DEV; @@ -444,12 +495,14 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data, * "outside the GPU" (i.e. either the Inner or System domain in CPU * terms, depending on coherency). */ - if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE) + if (data->iop.fmt == APPLE_UAT) + pte |= ARM_LPAE_PTE_SH_NS; + else if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE) pte |= ARM_LPAE_PTE_SH_IS; else pte |= ARM_LPAE_PTE_SH_OS; - if (prot & IOMMU_NOEXEC) + if (prot & IOMMU_NOEXEC && data->iop.fmt != APPLE_UAT) pte |= ARM_LPAE_PTE_XN; if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS) @@ -1079,6 +1132,41 @@ out_free_data: return NULL; } +static struct io_pgtable * +apple_uat_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct arm_lpae_io_pgtable *data; + + /* No quirks for UAT (hopefully) */ + if (cfg->quirks) + return NULL; + + if (cfg->ias > 48 || cfg->oas > 42) + return NULL; + + cfg->pgsize_bitmap &= SZ_16K; + + data = arm_lpae_alloc_pgtable(cfg); + if (!data) + return NULL; + + data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL, + cfg); + if (!data->pgd) + goto out_free_data; + + /* Ensure the empty pgd is visible before the TTBAT can be written */ + wmb(); + + cfg->apple_uat_cfg.ttbr = virt_to_phys(data->pgd); + + return &data->iop; + +out_free_data: + kfree(data); + return NULL; +} + struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = { .alloc = arm_64_lpae_alloc_pgtable_s1, .free = arm_lpae_free_pgtable, @@ -1104,6 +1192,11 @@ struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = { .free = arm_lpae_free_pgtable, }; +struct io_pgtable_init_fns io_pgtable_apple_uat_init_fns = { + .alloc = apple_uat_alloc_pgtable, + .free = arm_lpae_free_pgtable, +}; + #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST static struct io_pgtable_cfg *cfg_cookie __initdata; diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index b843fcd365d2..faec53e22388 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -20,6 +20,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { [ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns, [ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns, [ARM_MALI_LPAE] = &io_pgtable_arm_mali_lpae_init_fns, + [APPLE_UAT] = &io_pgtable_apple_uat_init_fns, #endif #ifdef CONFIG_IOMMU_IO_PGTABLE_DART [APPLE_DART] = &io_pgtable_apple_dart_init_fns, diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h index a17c2f903f81..58e63d504640 100644 --- a/include/drm/drm_gem.h +++ b/include/drm/drm_gem.h @@ -361,6 +361,14 @@ struct drm_gem_object { * The current LRU list that the GEM object is on. */ struct drm_gem_lru *lru; + + /** + * @exportable: + * + * Whether this GEM object can be exported via the drm_gem_object_funcs->export + * callback. Defaults to true. + */ + bool exportable; }; /** diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h index a2201b2488c5..b9f349b3ed76 100644 --- a/include/drm/drm_gem_shmem_helper.h +++ b/include/drm/drm_gem_shmem_helper.h @@ -138,6 +138,9 @@ void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem, struct drm_printer *p, unsigned int indent); extern const struct vm_operations_struct drm_gem_shmem_vm_ops; +vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf); +void drm_gem_shmem_vm_open(struct vm_area_struct *vma); +void drm_gem_shmem_vm_close(struct vm_area_struct *vma); /* * GEM object functions diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index ca857ec9e7eb..ad7e9aff6a7d 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -390,6 +390,14 @@ struct drm_sched_backend_ops { struct drm_sched_entity *s_entity); /** + * @can_run_job: Called before job execution to check whether the + * hardware is free enough to run the job. This can be used to + * implement more complex hardware resource policies than the + * hw_submission limit. + */ + bool (*can_run_job)(struct drm_sched_job *sched_job); + + /** * @run_job: Called to execute the job once all of the dependencies * have been resolved. This may be called multiple times, if * timedout_job() has happened and drm_sched_job_recovery() diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index e4697ff48d3a..50d4dce53ded 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -19,6 +19,7 @@ enum io_pgtable_fmt { AMD_IOMMU_V2, APPLE_DART, APPLE_DART2, + APPLE_UAT, IO_PGTABLE_NUM_FMTS, }; @@ -148,6 +149,10 @@ struct io_pgtable_cfg { u64 ttbr[4]; u32 n_ttbrs; } apple_dart_cfg; + + struct { + u64 ttbr; + } apple_uat_cfg; }; }; @@ -262,5 +267,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns; extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns; extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns; extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns; +extern struct io_pgtable_init_fns io_pgtable_apple_uat_init_fns; #endif /* __IO_PGTABLE_H */ diff --git a/include/uapi/drm/asahi_drm.h b/include/uapi/drm/asahi_drm.h new file mode 100644 index 000000000000..084a80220c5b --- /dev/null +++ b/include/uapi/drm/asahi_drm.h @@ -0,0 +1,560 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright (C) The Asahi Linux Contributors + * + * Based on asahi_drm.h which is + * + * Copyright © 2014-2018 Broadcom + * Copyright © 2019 Collabora ltd. + */ +#ifndef _ASAHI_DRM_H_ +#define _ASAHI_DRM_H_ + +#include "drm.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +#define DRM_ASAHI_UNSTABLE_UABI_VERSION 10006 + +#define DRM_ASAHI_GET_PARAMS 0x00 +#define DRM_ASAHI_VM_CREATE 0x01 +#define DRM_ASAHI_VM_DESTROY 0x02 +#define DRM_ASAHI_GEM_CREATE 0x03 +#define DRM_ASAHI_GEM_MMAP_OFFSET 0x04 +#define DRM_ASAHI_GEM_BIND 0x05 +#define DRM_ASAHI_QUEUE_CREATE 0x06 +#define DRM_ASAHI_QUEUE_DESTROY 0x07 +#define DRM_ASAHI_SUBMIT 0x08 +#define DRM_ASAHI_GET_TIME 0x09 + +#define DRM_ASAHI_MAX_CLUSTERS 32 + +struct drm_asahi_params_global { + __u32 unstable_uabi_version; + __u32 pad0; + + __u64 feat_compat; + __u64 feat_incompat; + + __u32 gpu_generation; + __u32 gpu_variant; + __u32 gpu_revision; + __u32 chip_id; + + __u32 num_dies; + __u32 num_clusters_total; + __u32 num_cores_per_cluster; + __u32 num_frags_per_cluster; + __u32 num_gps_per_cluster; + __u32 num_cores_total_active; + __u64 core_masks[DRM_ASAHI_MAX_CLUSTERS]; + + __u32 vm_page_size; + __u32 pad1; + __u64 vm_user_start; + __u64 vm_user_end; + __u64 vm_shader_start; + __u64 vm_shader_end; + + __u32 max_syncs_per_submission; + __u32 max_commands_per_submission; + __u32 max_commands_in_flight; + __u32 max_attachments; + + __u32 timer_frequency_hz; + __u32 min_frequency_khz; + __u32 max_frequency_khz; + __u32 max_power_mw; + + __u32 result_render_size; + __u32 result_compute_size; +}; + +/* +enum drm_asahi_feat_compat { +}; +*/ + +enum drm_asahi_feat_incompat { + DRM_ASAHI_FEAT_MANDATORY_ZS_COMPRESSION = (1UL) << 0, +}; + +struct drm_asahi_get_params { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @param: Parameter group to fetch (MBZ) */ + __u32 param_group; + + /** @pad: MBZ */ + __u32 pad; + + /** @value: User pointer to write parameter struct */ + __u64 pointer; + + /** @value: Size of user buffer, max size supported on return */ + __u64 size; +}; + +struct drm_asahi_vm_create { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @value: Returned VM ID */ + __u32 vm_id; + + /** @pad: MBZ */ + __u32 pad; +}; + +struct drm_asahi_vm_destroy { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @value: VM ID to be destroyed */ + __u32 vm_id; + + /** @pad: MBZ */ + __u32 pad; +}; + +#define ASAHI_GEM_WRITEBACK (1L << 0) +#define ASAHI_GEM_VM_PRIVATE (1L << 1) + +struct drm_asahi_gem_create { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @size: Size of the BO */ + __u64 size; + + /** @flags: BO creation flags */ + __u32 flags; + + /** @handle: VM ID to assign to the BO, if ASAHI_GEM_VM_PRIVATE is set. */ + __u32 vm_id; + + /** @handle: Returned GEM handle for the BO */ + __u32 handle; +}; + +struct drm_asahi_gem_mmap_offset { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @handle: Handle for the object being mapped. */ + __u32 handle; + + /** @flags: Must be zero */ + __u32 flags; + + /** @offset: The fake offset to use for subsequent mmap call */ + __u64 offset; +}; + +enum drm_asahi_bind_op { + ASAHI_BIND_OP_BIND = 0, + ASAHI_BIND_OP_UNBIND = 1, + ASAHI_BIND_OP_UNBIND_ALL = 2, +}; + +#define ASAHI_BIND_READ (1L << 0) +#define ASAHI_BIND_WRITE (1L << 1) + +struct drm_asahi_gem_bind { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @obj: Bind operation */ + __u32 op; + + /** @flags: One or more of ASAHI_BIND_* */ + __u32 flags; + + /** @obj: GEM object to bind */ + __u32 handle; + + /** @vm_id: The ID of the VM to bind to */ + __u32 vm_id; + + /** @offset: Offset into the object */ + __u64 offset; + + /** @range: Number of bytes from the object to bind to addr */ + __u64 range; + + /** @addr: Address to bind to */ + __u64 addr; +}; + +enum drm_asahi_cmd_type { + DRM_ASAHI_CMD_RENDER = 0, + DRM_ASAHI_CMD_BLIT = 1, + DRM_ASAHI_CMD_COMPUTE = 2, +}; + +/* Note: this is an enum so that it can be resolved by Rust bindgen. */ +enum drm_asahi_queue_cap { + DRM_ASAHI_QUEUE_CAP_RENDER = (1UL << DRM_ASAHI_CMD_RENDER), + DRM_ASAHI_QUEUE_CAP_BLIT = (1UL << DRM_ASAHI_CMD_BLIT), + DRM_ASAHI_QUEUE_CAP_COMPUTE = (1UL << DRM_ASAHI_CMD_COMPUTE), +}; + +struct drm_asahi_queue_create { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @flags: MBZ */ + __u32 flags; + + /** @vm_id: The ID of the VM this queue is bound to */ + __u32 vm_id; + + /** @type: Bitmask of DRM_ASAHI_QUEUE_CAP_* */ + __u32 queue_caps; + + /** @priority: Queue priority, 0-3 */ + __u32 priority; + + /** @queue_id: The returned queue ID */ + __u32 queue_id; +}; + +struct drm_asahi_queue_destroy { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @queue_id: The queue ID to be destroyed */ + __u32 queue_id; +}; + +enum drm_asahi_sync_type { + DRM_ASAHI_SYNC_SYNCOBJ = 0, + DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ = 1, +}; + +struct drm_asahi_sync { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @sync_type: One of drm_asahi_sync_type */ + __u32 sync_type; + + /** @handle: The sync object handle */ + __u32 handle; + + /** @timeline_value: Timeline value for timeline sync objects */ + __u64 timeline_value; +}; + +enum drm_asahi_subqueue { + DRM_ASAHI_SUBQUEUE_RENDER = 0, /* Also blit */ + DRM_ASAHI_SUBQUEUE_COMPUTE = 1, + DRM_ASAHI_SUBQUEUE_COUNT = 2, +}; + +#define DRM_ASAHI_BARRIER_NONE ~(0U) + +struct drm_asahi_command { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @type: One of drm_asahi_cmd_type */ + __u32 cmd_type; + + /** @flags: Flags for command submission */ + __u32 flags; + + /** @cmdbuf: Pointer to the appropriate command buffer structure */ + __u64 cmd_buffer; + + /** @cmdbuf: Size of the command buffer structure */ + __u64 cmd_buffer_size; + + /** @cmdbuf: Offset into the result BO to return information about this command */ + __u64 result_offset; + + /** @cmdbuf: Size of the result data structure */ + __u64 result_size; + + /** @barriers: Array of command indices per subqueue to wait on */ + __u32 barriers[DRM_ASAHI_SUBQUEUE_COUNT]; +}; + +struct drm_asahi_submit { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @in_syncs: An optional array of drm_asahi_sync to wait on before starting this job. */ + __u64 in_syncs; + + /** @in_syncs: An optional array of drm_asahi_sync objects to signal upon completion. */ + __u64 out_syncs; + + /** @commands: Pointer to the drm_asahi_command array of commands to submit. */ + __u64 commands; + + /** @flags: Flags for command submission (MBZ) */ + __u32 flags; + + /** @queue_id: The queue ID to be submitted to */ + __u32 queue_id; + + /** @result_handle: An optional BO handle to place result data in */ + __u32 result_handle; + + /** @in_sync_count: Number of sync objects to wait on before starting this job. */ + __u32 in_sync_count; + + /** @in_sync_count: Number of sync objects to signal upon completion of this job. */ + __u32 out_sync_count; + + /** @pad: Number of commands to be submitted */ + __u32 command_count; +}; + +#define ASAHI_ATTACHMENT_C 0 +#define ASAHI_ATTACHMENT_Z 1 +#define ASAHI_ATTACHMENT_S 2 + +/* FIXME: Type doesn't make sense here */ +struct drm_asahi_attachment { + __u32 type; + __u32 size; + __u64 pointer; +}; + +#define ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES (1UL << 0) +#define ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S (1UL << 1) +#define ASAHI_RENDER_MEMORYLESS_RTS_USED (1UL << 2) +#define ASAHI_RENDER_PROCESS_EMPTY_TILES (1UL << 3) +#define ASAHI_RENDER_NO_VERTEX_CLUSTERING (1UL << 4) + +struct drm_asahi_cmd_render { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + __u64 flags; + + __u64 encoder_ptr; + + __u64 attachments; + __u32 attachment_count; + __u32 pad; + + __u64 depth_buffer_1; + __u64 depth_buffer_2; + __u64 depth_buffer_3; + __u64 depth_meta_buffer_1; + __u64 depth_meta_buffer_2; + __u64 depth_meta_buffer_3; + + __u64 stencil_buffer_1; + __u64 stencil_buffer_2; + __u64 stencil_buffer_3; + __u64 stencil_meta_buffer_1; + __u64 stencil_meta_buffer_2; + __u64 stencil_meta_buffer_3; + + __u64 scissor_array; + __u64 depth_bias_array; + __u64 visibility_result_buffer; + + __u64 zls_ctrl; + __u64 ppp_multisamplectl; + __u32 ppp_ctrl; + + __u32 fb_width; + __u32 fb_height; + + __u32 utile_width; + __u32 utile_height; + + __u32 samples; + __u32 layers; + + __u32 encoder_id; + __u32 cmd_ta_id; + __u32 cmd_3d_id; + + __u32 iogpu_unk_49; + __u32 iogpu_unk_212; + __u32 iogpu_unk_214; + + __u32 merge_upper_x; + __u32 merge_upper_y; + + __u32 load_pipeline; + __u32 load_pipeline_bind; + + __u32 store_pipeline; + __u32 store_pipeline_bind; + + __u32 partial_reload_pipeline; + __u32 partial_reload_pipeline_bind; + + __u32 partial_store_pipeline; + __u32 partial_store_pipeline_bind; + + __u32 depth_dimensions; + __u32 isp_bgobjdepth; + __u32 isp_bgobjvals; + +}; + +struct drm_asahi_cmd_compute { + __u64 flags; + + __u64 encoder_ptr; + __u64 encoder_end; + + __u64 attachments; + __u32 attachment_count; + __u32 pad; + + __u64 buffer_descriptor; + + __u32 buffer_descriptor_size; /* ? */ + __u32 ctx_switch_prog; + + __u32 encoder_id; + __u32 cmd_id; + + __u32 iogpu_unk_40; + __u32 iogpu_unk_44; +}; + +enum drm_asahi_status { + DRM_ASAHI_STATUS_PENDING = 0, + DRM_ASAHI_STATUS_COMPLETE, + DRM_ASAHI_STATUS_UNKNOWN_ERROR, + DRM_ASAHI_STATUS_TIMEOUT, + DRM_ASAHI_STATUS_FAULT, + DRM_ASAHI_STATUS_KILLED, + DRM_ASAHI_STATUS_NO_DEVICE, +}; + +enum drm_asahi_fault { + DRM_ASAHI_FAULT_NONE = 0, + DRM_ASAHI_FAULT_UNKNOWN, + DRM_ASAHI_FAULT_UNMAPPED, + DRM_ASAHI_FAULT_AF_FAULT, + DRM_ASAHI_FAULT_WRITE_ONLY, + DRM_ASAHI_FAULT_READ_ONLY, + DRM_ASAHI_FAULT_NO_ACCESS, +}; + +struct drm_asahi_result_info { + /** @status: One of enum drm_asahi_status */ + __u32 status; + + /** @reason: One of drm_asahi_fault_type */ + __u32 fault_type; + + /** @unit: Unit number, hardware dependent */ + __u32 unit; + + /** @sideband: Sideband information, hardware dependent */ + __u32 sideband; + + /** @level: Page table level at which the fault occurred, hardware dependent */ + __u8 level; + + /** @read: Fault was a read */ + __u8 is_read; + + /** @pad: MBZ */ + __u16 pad; + + /** @unk_5: Extra bits, hardware dependent */ + __u32 extra; + + /** @address: Fault address, cache line aligned */ + __u64 address; +}; + +#define DRM_ASAHI_RESULT_RENDER_TVB_GROW_OVF (1UL << 0) +#define DRM_ASAHI_RESULT_RENDER_TVB_GROW_MIN (1UL << 1) +#define DRM_ASAHI_RESULT_RENDER_TVB_OVERFLOWED (1UL << 2) + +struct drm_asahi_result_render { + /** @address: Common result information */ + struct drm_asahi_result_info info; + + /** @flags: Zero or more of of DRM_ASAHI_RESULT_RENDER_* */ + __u64 flags; + + /** @vertex_ts_start: Timestamp of the start of vertex processing */ + __u64 vertex_ts_start; + + /** @vertex_ts_end: Timestamp of the end of vertex processing */ + __u64 vertex_ts_end; + + /** @fragment_ts_start: Timestamp of the start of fragment processing */ + __u64 fragment_ts_start; + + /** @fragment_ts_end: Timestamp of the end of fragment processing */ + __u64 fragment_ts_end; + + /** @tvb_size_bytes: TVB size at the start of this render */ + __u64 tvb_size_bytes; + + /** @tvb_usage_bytes: Total TVB usage in bytes for this render */ + __u64 tvb_usage_bytes; + + /** @num_tvb_overflows: Number of TVB overflows that occurred for this render */ + __u32 num_tvb_overflows; +}; + +struct drm_asahi_result_compute { + /** @address: Common result information */ + struct drm_asahi_result_info info; + + /** @flags: Zero or more of of DRM_ASAHI_RESULT_COMPUTE_* */ + __u64 flags; + + /** @ts_start: Timestamp of the start of this compute command */ + __u64 ts_start; + + /** @vertex_ts_end: Timestamp of the end of this compute command */ + __u64 ts_end; +}; + +struct drm_asahi_get_time { + /** @extensions: Pointer to the first extension struct, if any */ + __u64 extensions; + + /** @flags: MBZ. */ + __u64 flags; + + /** @tv_sec: On return, seconds part of a point in time */ + __s64 tv_sec; + + /** @tv_nsec: On return, nanoseconds part of a point in time */ + __s64 tv_nsec; + + /** @gpu_timestamp: On return, the GPU timestamp at that point in time */ + __u64 gpu_timestamp; +}; + +/* Note: this is an enum so that it can be resolved by Rust bindgen. */ +enum { + DRM_IOCTL_ASAHI_GET_PARAMS = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_GET_PARAMS, struct drm_asahi_get_params), + DRM_IOCTL_ASAHI_VM_CREATE = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_VM_CREATE, struct drm_asahi_vm_create), + DRM_IOCTL_ASAHI_VM_DESTROY = DRM_IOW(DRM_COMMAND_BASE + DRM_ASAHI_VM_DESTROY, struct drm_asahi_vm_destroy), + DRM_IOCTL_ASAHI_GEM_CREATE = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_GEM_CREATE, struct drm_asahi_gem_create), + DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_GEM_MMAP_OFFSET, struct drm_asahi_gem_mmap_offset), + DRM_IOCTL_ASAHI_GEM_BIND = DRM_IOW(DRM_COMMAND_BASE + DRM_ASAHI_GEM_BIND, struct drm_asahi_gem_bind), + DRM_IOCTL_ASAHI_QUEUE_CREATE = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_QUEUE_CREATE, struct drm_asahi_queue_create), + DRM_IOCTL_ASAHI_QUEUE_DESTROY = DRM_IOW(DRM_COMMAND_BASE + DRM_ASAHI_QUEUE_DESTROY, struct drm_asahi_queue_destroy), + DRM_IOCTL_ASAHI_SUBMIT = DRM_IOW(DRM_COMMAND_BASE + DRM_ASAHI_SUBMIT, struct drm_asahi_submit), + DRM_IOCTL_ASAHI_GET_TIME = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_GET_TIME, struct drm_asahi_get_time), +}; + +#if defined(__cplusplus) +} +#endif + +#endif /* _ASAHI_DRM_H_ */ diff --git a/rust/Makefile b/rust/Makefile index ff70c4c916f8..aee0701acf02 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -50,6 +50,7 @@ core-cfgs = \ --cfg no_fp_fmt_parse alloc-cfgs = \ + --cfg no_borrow \ --cfg no_fmt \ --cfg no_global_oom_handling \ --cfg no_macros \ @@ -262,6 +263,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ # Derived from `scripts/Makefile.clang`. BINDGEN_TARGET_x86 := x86_64-linux-gnu +BINDGEN_TARGET_arm64 := aarch64-linux-gnu BINDGEN_TARGET := $(BINDGEN_TARGET_$(SRCARCH)) # All warnings are inhibited since GCC builds are very experimental, @@ -359,9 +361,23 @@ rust-analyzer: $(Q)$(srctree)/scripts/generate_rust_analyzer.py $(srctree) $(objtree) \ $(RUST_LIB_SRC) > $(objtree)/rust-project.json +redirect-intrinsics = \ + __eqsf2 __gesf2 __lesf2 __nesf2 __unordsf2 \ + __unorddf2 \ + __muloti4 __multi3 \ + __udivmodti4 __udivti3 __umodti3 + +ifneq ($(or $(CONFIG_ARM64),$(and $(CONFIG_RISCV),$(CONFIG_64BIT))),) + # These intrinsics are defined for ARM64 and RISCV64 + redirect-intrinsics += \ + __ashrti3 \ + __ashlti3 __lshrti3 +endif + $(obj)/core.o: private skip_clippy = 1 $(obj)/core.o: private skip_flags = -Dunreachable_pub -$(obj)/core.o: private rustc_target_flags = $(core-cfgs) +$(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym)) +$(obj)/core.o: private rustc_target_flags = $(core-cfgs) -Aunused-imports $(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs $(obj)/target.json FORCE $(call if_changed_dep,rustc_library) diff --git a/rust/alloc/alloc.rs b/rust/alloc/alloc.rs index ca224a541770..0142178370e9 100644 --- a/rust/alloc/alloc.rs +++ b/rust/alloc/alloc.rs @@ -27,16 +27,23 @@ extern "Rust" { // (the code expanding that attribute macro generates those functions), or to call // the default implementations in libstd (`__rdl_alloc` etc. in `library/std/src/alloc.rs`) // otherwise. - // The rustc fork of LLVM also special-cases these function names to be able to optimize them + // The rustc fork of LLVM 14 and earlier also special-cases these function names to be able to optimize them // like `malloc`, `realloc`, and `free`, respectively. #[rustc_allocator] - #[rustc_allocator_nounwind] + #[cfg_attr(not(bootstrap), rustc_nounwind)] + #[cfg_attr(bootstrap, rustc_allocator_nounwind)] fn __rust_alloc(size: usize, align: usize) -> *mut u8; - #[rustc_allocator_nounwind] + #[rustc_deallocator] + #[cfg_attr(not(bootstrap), rustc_nounwind)] + #[cfg_attr(bootstrap, rustc_allocator_nounwind)] fn __rust_dealloc(ptr: *mut u8, size: usize, align: usize); - #[rustc_allocator_nounwind] + #[rustc_reallocator] + #[cfg_attr(not(bootstrap), rustc_nounwind)] + #[cfg_attr(bootstrap, rustc_allocator_nounwind)] fn __rust_realloc(ptr: *mut u8, old_size: usize, align: usize, new_size: usize) -> *mut u8; - #[rustc_allocator_nounwind] + #[rustc_allocator_zeroed] + #[cfg_attr(not(bootstrap), rustc_nounwind)] + #[cfg_attr(bootstrap, rustc_allocator_nounwind)] fn __rust_alloc_zeroed(size: usize, align: usize) -> *mut u8; } @@ -72,11 +79,14 @@ pub use std::alloc::Global; /// # Examples /// /// ``` -/// use std::alloc::{alloc, dealloc, Layout}; +/// use std::alloc::{alloc, dealloc, handle_alloc_error, Layout}; /// /// unsafe { /// let layout = Layout::new::<u16>(); /// let ptr = alloc(layout); +/// if ptr.is_null() { +/// handle_alloc_error(layout); +/// } /// /// *(ptr as *mut u16) = 42; /// assert_eq!(*(ptr as *mut u16), 42); @@ -400,13 +410,13 @@ pub mod __alloc_error_handler { // if there is no `#[alloc_error_handler]` #[rustc_std_internal_symbol] - pub unsafe extern "C-unwind" fn __rdl_oom(size: usize, _align: usize) -> ! { + pub unsafe fn __rdl_oom(size: usize, _align: usize) -> ! { panic!("memory allocation of {size} bytes failed") } // if there is an `#[alloc_error_handler]` #[rustc_std_internal_symbol] - pub unsafe extern "C-unwind" fn __rg_oom(size: usize, align: usize) -> ! { + pub unsafe fn __rg_oom(size: usize, align: usize) -> ! { let layout = unsafe { Layout::from_size_align_unchecked(size, align) }; extern "Rust" { #[lang = "oom"] diff --git a/rust/alloc/borrow.rs b/rust/alloc/borrow.rs deleted file mode 100644 index dde4957200d4..000000000000 --- a/rust/alloc/borrow.rs +++ /dev/null @@ -1,498 +0,0 @@ -// SPDX-License-Identifier: Apache-2.0 OR MIT - -//! A module for working with borrowed data. - -#![stable(feature = "rust1", since = "1.0.0")] - -use core::cmp::Ordering; -use core::hash::{Hash, Hasher}; -use core::ops::Deref; -#[cfg(not(no_global_oom_handling))] -use core::ops::{Add, AddAssign}; - -#[stable(feature = "rust1", since = "1.0.0")] -pub use core::borrow::{Borrow, BorrowMut}; - -use core::fmt; -#[cfg(not(no_global_oom_handling))] -use crate::string::String; - -use Cow::*; - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a, B: ?Sized> Borrow<B> for Cow<'a, B> -where - B: ToOwned, - <B as ToOwned>::Owned: 'a, -{ - fn borrow(&self) -> &B { - &**self - } -} - -/// A generalization of `Clone` to borrowed data. -/// -/// Some types make it possible to go from borrowed to owned, usually by -/// implementing the `Clone` trait. But `Clone` works only for going from `&T` -/// to `T`. The `ToOwned` trait generalizes `Clone` to construct owned data -/// from any borrow of a given type. -#[cfg_attr(not(test), rustc_diagnostic_item = "ToOwned")] -#[stable(feature = "rust1", since = "1.0.0")] -pub trait ToOwned { - /// The resulting type after obtaining ownership. - #[stable(feature = "rust1", since = "1.0.0")] - type Owned: Borrow<Self>; - - /// Creates owned data from borrowed data, usually by cloning. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// let s: &str = "a"; - /// let ss: String = s.to_owned(); - /// - /// let v: &[i32] = &[1, 2]; - /// let vv: Vec<i32> = v.to_owned(); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - #[must_use = "cloning is often expensive and is not expected to have side effects"] - fn to_owned(&self) -> Self::Owned; - - /// Uses borrowed data to replace owned data, usually by cloning. - /// - /// This is borrow-generalized version of `Clone::clone_from`. - /// - /// # Examples - /// - /// Basic usage: - /// - /// ``` - /// # #![feature(toowned_clone_into)] - /// let mut s: String = String::new(); - /// "hello".clone_into(&mut s); - /// - /// let mut v: Vec<i32> = Vec::new(); - /// [1, 2][..].clone_into(&mut v); - /// ``` - #[unstable(feature = "toowned_clone_into", reason = "recently added", issue = "41263")] - fn clone_into(&self, target: &mut Self::Owned) { - *target = self.to_owned(); - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<T> ToOwned for T -where - T: Clone, -{ - type Owned = T; - fn to_owned(&self) -> T { - self.clone() - } - - fn clone_into(&self, target: &mut T) { - target.clone_from(self); - } -} - -/// A clone-on-write smart pointer. -/// -/// The type `Cow` is a smart pointer providing clone-on-write functionality: it -/// can enclose and provide immutable access to borrowed data, and clone the -/// data lazily when mutation or ownership is required. The type is designed to -/// work with general borrowed data via the `Borrow` trait. -/// -/// `Cow` implements `Deref`, which means that you can call -/// non-mutating methods directly on the data it encloses. If mutation -/// is desired, `to_mut` will obtain a mutable reference to an owned -/// value, cloning if necessary. -/// -/// If you need reference-counting pointers, note that -/// [`Rc::make_mut`][crate::rc::Rc::make_mut] and -/// [`Arc::make_mut`][crate::sync::Arc::make_mut] can provide clone-on-write -/// functionality as well. -/// -/// # Examples -/// -/// ``` -/// use std::borrow::Cow; -/// -/// fn abs_all(input: &mut Cow<[i32]>) { -/// for i in 0..input.len() { -/// let v = input[i]; -/// if v < 0 { -/// // Clones into a vector if not already owned. -/// input.to_mut()[i] = -v; -/// } -/// } -/// } -/// -/// // No clone occurs because `input` doesn't need to be mutated. -/// let slice = [0, 1, 2]; -/// let mut input = Cow::from(&slice[..]); -/// abs_all(&mut input); -/// -/// // Clone occurs because `input` needs to be mutated. -/// let slice = [-1, 0, 1]; -/// let mut input = Cow::from(&slice[..]); -/// abs_all(&mut input); -/// -/// // No clone occurs because `input` is already owned. -/// let mut input = Cow::from(vec![-1, 0, 1]); -/// abs_all(&mut input); -/// ``` -/// -/// Another example showing how to keep `Cow` in a struct: -/// -/// ``` -/// use std::borrow::Cow; -/// -/// struct Items<'a, X: 'a> where [X]: ToOwned<Owned = Vec<X>> { -/// values: Cow<'a, [X]>, -/// } -/// -/// impl<'a, X: Clone + 'a> Items<'a, X> where [X]: ToOwned<Owned = Vec<X>> { -/// fn new(v: Cow<'a, [X]>) -> Self { -/// Items { values: v } -/// } -/// } -/// -/// // Creates a container from borrowed values of a slice -/// let readonly = [1, 2]; -/// let borrowed = Items::new((&readonly[..]).into()); -/// match borrowed { -/// Items { values: Cow::Borrowed(b) } => println!("borrowed {b:?}"), -/// _ => panic!("expect borrowed value"), -/// } -/// -/// let mut clone_on_write = borrowed; -/// // Mutates the data from slice into owned vec and pushes a new value on top -/// clone_on_write.values.to_mut().push(3); -/// println!("clone_on_write = {:?}", clone_on_write.values); -/// -/// // The data was mutated. Let's check it out. -/// match clone_on_write { -/// Items { values: Cow::Owned(_) } => println!("clone_on_write contains owned data"), -/// _ => panic!("expect owned data"), -/// } -/// ``` -#[stable(feature = "rust1", since = "1.0.0")] -#[cfg_attr(not(test), rustc_diagnostic_item = "Cow")] -pub enum Cow<'a, B: ?Sized + 'a> -where - B: ToOwned, -{ - /// Borrowed data. - #[stable(feature = "rust1", since = "1.0.0")] - Borrowed(#[stable(feature = "rust1", since = "1.0.0")] &'a B), - - /// Owned data. - #[stable(feature = "rust1", since = "1.0.0")] - Owned(#[stable(feature = "rust1", since = "1.0.0")] <B as ToOwned>::Owned), -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<B: ?Sized + ToOwned> Clone for Cow<'_, B> { - fn clone(&self) -> Self { - match *self { - Borrowed(b) => Borrowed(b), - Owned(ref o) => { - let b: &B = o.borrow(); - Owned(b.to_owned()) - } - } - } - - fn clone_from(&mut self, source: &Self) { - match (self, source) { - (&mut Owned(ref mut dest), &Owned(ref o)) => o.borrow().clone_into(dest), - (t, s) => *t = s.clone(), - } - } -} - -impl<B: ?Sized + ToOwned> Cow<'_, B> { - /// Returns true if the data is borrowed, i.e. if `to_mut` would require additional work. - /// - /// # Examples - /// - /// ``` - /// #![feature(cow_is_borrowed)] - /// use std::borrow::Cow; - /// - /// let cow = Cow::Borrowed("moo"); - /// assert!(cow.is_borrowed()); - /// - /// let bull: Cow<'_, str> = Cow::Owned("...moo?".to_string()); - /// assert!(!bull.is_borrowed()); - /// ``` - #[unstable(feature = "cow_is_borrowed", issue = "65143")] - #[rustc_const_unstable(feature = "const_cow_is_borrowed", issue = "65143")] - pub const fn is_borrowed(&self) -> bool { - match *self { - Borrowed(_) => true, - Owned(_) => false, - } - } - - /// Returns true if the data is owned, i.e. if `to_mut` would be a no-op. - /// - /// # Examples - /// - /// ``` - /// #![feature(cow_is_borrowed)] - /// use std::borrow::Cow; - /// - /// let cow: Cow<'_, str> = Cow::Owned("moo".to_string()); - /// assert!(cow.is_owned()); - /// - /// let bull = Cow::Borrowed("...moo?"); - /// assert!(!bull.is_owned()); - /// ``` - #[unstable(feature = "cow_is_borrowed", issue = "65143")] - #[rustc_const_unstable(feature = "const_cow_is_borrowed", issue = "65143")] - pub const fn is_owned(&self) -> bool { - !self.is_borrowed() - } - - /// Acquires a mutable reference to the owned form of the data. - /// - /// Clones the data if it is not already owned. - /// - /// # Examples - /// - /// ``` - /// use std::borrow::Cow; - /// - /// let mut cow = Cow::Borrowed("foo"); - /// cow.to_mut().make_ascii_uppercase(); - /// - /// assert_eq!( - /// cow, - /// Cow::Owned(String::from("FOO")) as Cow<str> - /// ); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn to_mut(&mut self) -> &mut <B as ToOwned>::Owned { - match *self { - Borrowed(borrowed) => { - *self = Owned(borrowed.to_owned()); - match *self { - Borrowed(..) => unreachable!(), - Owned(ref mut owned) => owned, - } - } - Owned(ref mut owned) => owned, - } - } - - /// Extracts the owned data. - /// - /// Clones the data if it is not already owned. - /// - /// # Examples - /// - /// Calling `into_owned` on a `Cow::Borrowed` returns a clone of the borrowed data: - /// - /// ``` - /// use std::borrow::Cow; - /// - /// let s = "Hello world!"; - /// let cow = Cow::Borrowed(s); - /// - /// assert_eq!( - /// cow.into_owned(), - /// String::from(s) - /// ); - /// ``` - /// - /// Calling `into_owned` on a `Cow::Owned` returns the owned data. The data is moved out of the - /// `Cow` without being cloned. - /// - /// ``` - /// use std::borrow::Cow; - /// - /// let s = "Hello world!"; - /// let cow: Cow<str> = Cow::Owned(String::from(s)); - /// - /// assert_eq!( - /// cow.into_owned(), - /// String::from(s) - /// ); - /// ``` - #[stable(feature = "rust1", since = "1.0.0")] - pub fn into_owned(self) -> <B as ToOwned>::Owned { - match self { - Borrowed(borrowed) => borrowed.to_owned(), - Owned(owned) => owned, - } - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -#[rustc_const_unstable(feature = "const_deref", issue = "88955")] -impl<B: ?Sized + ToOwned> const Deref for Cow<'_, B> -where - B::Owned: ~const Borrow<B>, -{ - type Target = B; - - fn deref(&self) -> &B { - match *self { - Borrowed(borrowed) => borrowed, - Owned(ref owned) => owned.borrow(), - } - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<B: ?Sized> Eq for Cow<'_, B> where B: Eq + ToOwned {} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<B: ?Sized> Ord for Cow<'_, B> -where - B: Ord + ToOwned, -{ - #[inline] - fn cmp(&self, other: &Self) -> Ordering { - Ord::cmp(&**self, &**other) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a, 'b, B: ?Sized, C: ?Sized> PartialEq<Cow<'b, C>> for Cow<'a, B> -where - B: PartialEq<C> + ToOwned, - C: ToOwned, -{ - #[inline] - fn eq(&self, other: &Cow<'b, C>) -> bool { - PartialEq::eq(&**self, &**other) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<'a, B: ?Sized> PartialOrd for Cow<'a, B> -where - B: PartialOrd + ToOwned, -{ - #[inline] - fn partial_cmp(&self, other: &Cow<'a, B>) -> Option<Ordering> { - PartialOrd::partial_cmp(&**self, &**other) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<B: ?Sized> fmt::Debug for Cow<'_, B> -where - B: fmt::Debug + ToOwned<Owned: fmt::Debug>, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self { - Borrowed(ref b) => fmt::Debug::fmt(b, f), - Owned(ref o) => fmt::Debug::fmt(o, f), - } - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<B: ?Sized> fmt::Display for Cow<'_, B> -where - B: fmt::Display + ToOwned<Owned: fmt::Display>, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match *self { - Borrowed(ref b) => fmt::Display::fmt(b, f), - Owned(ref o) => fmt::Display::fmt(o, f), - } - } -} - -#[stable(feature = "default", since = "1.11.0")] -impl<B: ?Sized> Default for Cow<'_, B> -where - B: ToOwned<Owned: Default>, -{ - /// Creates an owned Cow<'a, B> with the default value for the contained owned value. - fn default() -> Self { - Owned(<B as ToOwned>::Owned::default()) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<B: ?Sized> Hash for Cow<'_, B> -where - B: Hash + ToOwned, -{ - #[inline] - fn hash<H: Hasher>(&self, state: &mut H) { - Hash::hash(&**self, state) - } -} - -#[stable(feature = "rust1", since = "1.0.0")] -impl<T: ?Sized + ToOwned> AsRef<T> for Cow<'_, T> { - fn as_ref(&self) -> &T { - self - } -} - -#[cfg(not(no_global_oom_handling))] -#[stable(feature = "cow_add", since = "1.14.0")] -impl<'a> Add<&'a str> for Cow<'a, str> { - type Output = Cow<'a, str>; - - #[inline] - fn add(mut self, rhs: &'a str) -> Self::Output { - self += rhs; - self - } -} - -#[cfg(not(no_global_oom_handling))] -#[stable(feature = "cow_add", since = "1.14.0")] -impl<'a> Add<Cow<'a, str>> for Cow<'a, str> { - type Output = Cow<'a, str>; - - #[inline] - fn add(mut self, rhs: Cow<'a, str>) -> Self::Output { - self += rhs; - self - } -} - -#[cfg(not(no_global_oom_handling))] -#[stable(feature = "cow_add", since = "1.14.0")] -impl<'a> AddAssign<&'a str> for Cow<'a, str> { - fn add_assign(&mut self, rhs: &'a str) { - if self.is_empty() { - *self = Cow::Borrowed(rhs) - } else if !rhs.is_empty() { - if let Cow::Borrowed(lhs) = *self { - let mut s = String::with_capacity(lhs.len() + rhs.len()); - s.push_str(lhs); - *self = Cow::Owned(s); - } - self.to_mut().push_str(rhs); - } - } -} - -#[cfg(not(no_global_oom_handling))] -#[stable(feature = "cow_add", since = "1.14.0")] -impl<'a> AddAssign<Cow<'a, str>> for Cow<'a, str> { - fn add_assign(&mut self, rhs: Cow<'a, str>) { - if self.is_empty() { - *self = rhs - } else if !rhs.is_empty() { - if let Cow::Borrowed(lhs) = *self { - let mut s = String::with_capacity(lhs.len() + rhs.len()); - s.push_str(lhs); - *self = Cow::Owned(s); - } - self.to_mut().push_str(&rhs); - } - } -} diff --git a/rust/alloc/boxed.rs b/rust/alloc/boxed.rs index 5e7518bffbfb..d4a03edd7d89 100644 --- a/rust/alloc/boxed.rs +++ b/rust/alloc/boxed.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -//! A pointer type for heap allocation. +//! The `Box<T>` type for heap allocation. //! //! [`Box<T>`], casually referred to as a 'box', provides the simplest form of //! heap allocation in Rust. Boxes provide ownership for this allocation, and @@ -124,7 +124,21 @@ //! definition is just using `T*` can lead to undefined behavior, as //! described in [rust-lang/unsafe-code-guidelines#198][ucg#198]. //! +//! # Considerations for unsafe code +//! +//! **Warning: This section is not normative and is subject to change, possibly +//! being relaxed in the future! It is a simplified summary of the rules +//! currently implemented in the compiler.** +//! +//! The aliasing rules for `Box<T>` are the same as for `&mut T`. `Box<T>` +//! asserts uniqueness over its content. Using raw pointers derived from a box +//! after that box has been mutated through, moved or borrowed as `&mut T` +//! is not allowed. For more guidance on working with box from unsafe code, see +//! [rust-lang/unsafe-code-guidelines#326][ucg#326]. +//! +//! //! [ucg#198]: https://github.com/rust-lang/unsafe-code-guidelines/issues/198 +//! [ucg#326]: https://github.com/rust-lang/unsafe-code-guidelines/issues/326 //! [dereferencing]: core::ops::Deref //! [`Box::<T>::from_raw(value)`]: Box::from_raw //! [`Global`]: crate::alloc::Global @@ -139,6 +153,7 @@ use core::async_iter::AsyncIterator; use core::borrow; use core::cmp::Ordering; use core::convert::{From, TryFrom}; +use core::error::Error; use core::fmt; use core::future::Future; use core::hash::{Hash, Hasher}; @@ -163,6 +178,8 @@ use crate::raw_vec::RawVec; #[cfg(not(no_global_oom_handling))] use crate::str::from_boxed_utf8_unchecked; #[cfg(not(no_global_oom_handling))] +use crate::string::String; +#[cfg(not(no_global_oom_handling))] use crate::vec::Vec; #[cfg(not(no_thin))] @@ -196,12 +213,13 @@ impl<T> Box<T> { /// ``` /// let five = Box::new(5); /// ``` - #[cfg(not(no_global_oom_handling))] + #[cfg(all(not(no_global_oom_handling)))] #[inline(always)] #[stable(feature = "rust1", since = "1.0.0")] #[must_use] pub fn new(x: T) -> Self { - box x + #[rustc_box] + Box::new(x) } /// Constructs a new box with uninitialized contents. @@ -256,14 +274,21 @@ impl<T> Box<T> { Self::new_zeroed_in(Global) } - /// Constructs a new `Pin<Box<T>>`. If `T` does not implement `Unpin`, then + /// Constructs a new `Pin<Box<T>>`. If `T` does not implement [`Unpin`], then /// `x` will be pinned in memory and unable to be moved. + /// + /// Constructing and pinning of the `Box` can also be done in two steps: `Box::pin(x)` + /// does the same as <code>[Box::into_pin]\([Box::new]\(x))</code>. Consider using + /// [`into_pin`](Box::into_pin) if you already have a `Box<T>`, or if you want to + /// construct a (pinned) `Box` in a different way than with [`Box::new`]. #[cfg(not(no_global_oom_handling))] #[stable(feature = "pin", since = "1.33.0")] #[must_use] #[inline(always)] pub fn pin(x: T) -> Pin<Box<T>> { - (box x).into() + (#[rustc_box] + Box::new(x)) + .into() } /// Allocates memory on the heap then places `x` into it, @@ -543,8 +568,13 @@ impl<T, A: Allocator> Box<T, A> { unsafe { Ok(Box::from_raw_in(ptr.as_ptr(), alloc)) } } - /// Constructs a new `Pin<Box<T, A>>`. If `T` does not implement `Unpin`, then + /// Constructs a new `Pin<Box<T, A>>`. If `T` does not implement [`Unpin`], then /// `x` will be pinned in memory and unable to be moved. + /// + /// Constructing and pinning of the `Box` can also be done in two steps: `Box::pin_in(x, alloc)` + /// does the same as <code>[Box::into_pin]\([Box::new_in]\(x, alloc))</code>. Consider using + /// [`into_pin`](Box::into_pin) if you already have a `Box<T, A>`, or if you want to + /// construct a (pinned) `Box` in a different way than with [`Box::new_in`]. #[cfg(not(no_global_oom_handling))] #[unstable(feature = "allocator_api", issue = "32838")] #[rustc_const_unstable(feature = "const_box", issue = "92521")] @@ -926,6 +956,7 @@ impl<T: ?Sized> Box<T> { /// [`Layout`]: crate::Layout #[stable(feature = "box_raw", since = "1.4.0")] #[inline] + #[must_use = "call `drop(from_raw(ptr))` if you intend to drop the `Box`"] pub unsafe fn from_raw(raw: *mut T) -> Self { unsafe { Self::from_raw_in(raw, Global) } } @@ -1160,19 +1191,44 @@ impl<T: ?Sized, A: Allocator> Box<T, A> { unsafe { &mut *mem::ManuallyDrop::new(b).0.as_ptr() } } - /// Converts a `Box<T>` into a `Pin<Box<T>>` + /// Converts a `Box<T>` into a `Pin<Box<T>>`. If `T` does not implement [`Unpin`], then + /// `*boxed` will be pinned in memory and unable to be moved. /// /// This conversion does not allocate on the heap and happens in place. /// /// This is also available via [`From`]. - #[unstable(feature = "box_into_pin", issue = "62370")] + /// + /// Constructing and pinning a `Box` with <code>Box::into_pin([Box::new]\(x))</code> + /// can also be written more concisely using <code>[Box::pin]\(x)</code>. + /// This `into_pin` method is useful if you already have a `Box<T>`, or you are + /// constructing a (pinned) `Box` in a different way than with [`Box::new`]. + /// + /// # Notes + /// + /// It's not recommended that crates add an impl like `From<Box<T>> for Pin<T>`, + /// as it'll introduce an ambiguity when calling `Pin::from`. + /// A demonstration of such a poor impl is shown below. + /// + /// ```compile_fail + /// # use std::pin::Pin; + /// struct Foo; // A type defined in this crate. + /// impl From<Box<()>> for Pin<Foo> { + /// fn from(_: Box<()>) -> Pin<Foo> { + /// Pin::new(Foo) + /// } + /// } + /// + /// let foo = Box::new(()); + /// let bar = Pin::from(foo); + /// ``` + #[stable(feature = "box_into_pin", since = "1.63.0")] #[rustc_const_unstable(feature = "const_box", issue = "92521")] pub const fn into_pin(boxed: Self) -> Pin<Self> where A: 'static, { // It's not possible to move or replace the insides of a `Pin<Box<T>>` - // when `T: !Unpin`, so it's safe to pin it directly without any + // when `T: !Unpin`, so it's safe to pin it directly without any // additional requirements. unsafe { Pin::new_unchecked(boxed) } } @@ -1190,7 +1246,8 @@ unsafe impl<#[may_dangle] T: ?Sized, A: Allocator> Drop for Box<T, A> { impl<T: Default> Default for Box<T> { /// Creates a `Box<T>`, with the `Default` value for T. fn default() -> Self { - box T::default() + #[rustc_box] + Box::new(T::default()) } } @@ -1408,9 +1465,17 @@ impl<T: ?Sized, A: Allocator> const From<Box<T, A>> for Pin<Box<T, A>> where A: 'static, { - /// Converts a `Box<T>` into a `Pin<Box<T>>` + /// Converts a `Box<T>` into a `Pin<Box<T>>`. If `T` does not implement [`Unpin`], then + /// `*boxed` will be pinned in memory and unable to be moved. /// /// This conversion does not allocate on the heap and happens in place. + /// + /// This is also available via [`Box::into_pin`]. + /// + /// Constructing and pinning a `Box` with <code><Pin<Box\<T>>>::from([Box::new]\(x))</code> + /// can also be written more concisely using <code>[Box::pin]\(x)</code>. + /// This `From` implementation is useful if you already have a `Box<T>`, or you are + /// constructing a (pinned) `Box` in a different way than with [`Box::new`]. fn from(boxed: Box<T, A>) -> Self { Box::into_pin(boxed) } @@ -1422,7 +1487,7 @@ impl<T: Copy> From<&[T]> for Box<[T]> { /// Converts a `&[T]` into a `Box<[T]>` /// /// This conversion allocates on the heap - /// and performs a copy of `slice`. + /// and performs a copy of `slice` and its contents. /// /// # Examples /// ```rust @@ -1554,10 +1619,27 @@ impl<T, const N: usize> From<[T; N]> for Box<[T]> { /// println!("{boxed:?}"); /// ``` fn from(array: [T; N]) -> Box<[T]> { - box array + #[rustc_box] + Box::new(array) } } +/// Casts a boxed slice to a boxed array. +/// +/// # Safety +/// +/// `boxed_slice.len()` must be exactly `N`. +unsafe fn boxed_slice_as_array_unchecked<T, A: Allocator, const N: usize>( + boxed_slice: Box<[T], A>, +) -> Box<[T; N], A> { + debug_assert_eq!(boxed_slice.len(), N); + + let (ptr, alloc) = Box::into_raw_with_allocator(boxed_slice); + // SAFETY: Pointer and allocator came from an existing box, + // and our safety condition requires that the length is exactly `N` + unsafe { Box::from_raw_in(ptr as *mut [T; N], alloc) } +} + #[stable(feature = "boxed_slice_try_from", since = "1.43.0")] impl<T, const N: usize> TryFrom<Box<[T]>> for Box<[T; N]> { type Error = Box<[T]>; @@ -1573,13 +1655,46 @@ impl<T, const N: usize> TryFrom<Box<[T]>> for Box<[T; N]> { /// `boxed_slice.len()` does not equal `N`. fn try_from(boxed_slice: Box<[T]>) -> Result<Self, Self::Error> { if boxed_slice.len() == N { - Ok(unsafe { Box::from_raw(Box::into_raw(boxed_slice) as *mut [T; N]) }) + Ok(unsafe { boxed_slice_as_array_unchecked(boxed_slice) }) } else { Err(boxed_slice) } } } +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "boxed_array_try_from_vec", since = "1.66.0")] +impl<T, const N: usize> TryFrom<Vec<T>> for Box<[T; N]> { + type Error = Vec<T>; + + /// Attempts to convert a `Vec<T>` into a `Box<[T; N]>`. + /// + /// Like [`Vec::into_boxed_slice`], this is in-place if `vec.capacity() == N`, + /// but will require a reallocation otherwise. + /// + /// # Errors + /// + /// Returns the original `Vec<T>` in the `Err` variant if + /// `boxed_slice.len()` does not equal `N`. + /// + /// # Examples + /// + /// This can be used with [`vec!`] to create an array on the heap: + /// + /// ``` + /// let state: Box<[f32; 100]> = vec![1.0; 100].try_into().unwrap(); + /// assert_eq!(state.len(), 100); + /// ``` + fn try_from(vec: Vec<T>) -> Result<Self, Self::Error> { + if vec.len() == N { + let boxed_slice = vec.into_boxed_slice(); + Ok(unsafe { boxed_slice_as_array_unchecked(boxed_slice) }) + } else { + Err(vec) + } + } +} + impl<A: Allocator> Box<dyn Any, A> { /// Attempt to downcast the box to a concrete type. /// @@ -1973,8 +2088,7 @@ impl<T: ?Sized, A: Allocator> AsMut<T> for Box<T, A> { * could have a method to project a Pin<T> from it. */ #[stable(feature = "pin", since = "1.33.0")] -#[rustc_const_unstable(feature = "const_box", issue = "92521")] -impl<T: ?Sized, A: Allocator> const Unpin for Box<T, A> where A: 'static {} +impl<T: ?Sized, A: Allocator> Unpin for Box<T, A> where A: 'static {} #[unstable(feature = "generator_trait", issue = "43122")] impl<G: ?Sized + Generator<R> + Unpin, R, A: Allocator> Generator<R> for Box<G, A> @@ -2026,3 +2140,292 @@ impl<S: ?Sized + AsyncIterator + Unpin> AsyncIterator for Box<S> { (**self).size_hint() } } + +impl dyn Error { + #[inline] + #[stable(feature = "error_downcast", since = "1.3.0")] + #[rustc_allow_incoherent_impl] + /// Attempts to downcast the box to a concrete type. + pub fn downcast<T: Error + 'static>(self: Box<Self>) -> Result<Box<T>, Box<dyn Error>> { + if self.is::<T>() { + unsafe { + let raw: *mut dyn Error = Box::into_raw(self); + Ok(Box::from_raw(raw as *mut T)) + } + } else { + Err(self) + } + } +} + +impl dyn Error + Send { + #[inline] + #[stable(feature = "error_downcast", since = "1.3.0")] + #[rustc_allow_incoherent_impl] + /// Attempts to downcast the box to a concrete type. + pub fn downcast<T: Error + 'static>(self: Box<Self>) -> Result<Box<T>, Box<dyn Error + Send>> { + let err: Box<dyn Error> = self; + <dyn Error>::downcast(err).map_err(|s| unsafe { + // Reapply the `Send` marker. + mem::transmute::<Box<dyn Error>, Box<dyn Error + Send>>(s) + }) + } +} + +impl dyn Error + Send + Sync { + #[inline] + #[stable(feature = "error_downcast", since = "1.3.0")] + #[rustc_allow_incoherent_impl] + /// Attempts to downcast the box to a concrete type. + pub fn downcast<T: Error + 'static>(self: Box<Self>) -> Result<Box<T>, Box<Self>> { + let err: Box<dyn Error> = self; + <dyn Error>::downcast(err).map_err(|s| unsafe { + // Reapply the `Send + Sync` marker. + mem::transmute::<Box<dyn Error>, Box<dyn Error + Send + Sync>>(s) + }) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, E: Error + 'a> From<E> for Box<dyn Error + 'a> { + /// Converts a type of [`Error`] into a box of dyn [`Error`]. + /// + /// # Examples + /// + /// ``` + /// use std::error::Error; + /// use std::fmt; + /// use std::mem; + /// + /// #[derive(Debug)] + /// struct AnError; + /// + /// impl fmt::Display for AnError { + /// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + /// write!(f, "An error") + /// } + /// } + /// + /// impl Error for AnError {} + /// + /// let an_error = AnError; + /// assert!(0 == mem::size_of_val(&an_error)); + /// let a_boxed_error = Box::<dyn Error>::from(an_error); + /// assert!(mem::size_of::<Box<dyn Error>>() == mem::size_of_val(&a_boxed_error)) + /// ``` + fn from(err: E) -> Box<dyn Error + 'a> { + Box::new(err) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a, E: Error + Send + Sync + 'a> From<E> for Box<dyn Error + Send + Sync + 'a> { + /// Converts a type of [`Error`] + [`Send`] + [`Sync`] into a box of + /// dyn [`Error`] + [`Send`] + [`Sync`]. + /// + /// # Examples + /// + /// ``` + /// use std::error::Error; + /// use std::fmt; + /// use std::mem; + /// + /// #[derive(Debug)] + /// struct AnError; + /// + /// impl fmt::Display for AnError { + /// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + /// write!(f, "An error") + /// } + /// } + /// + /// impl Error for AnError {} + /// + /// unsafe impl Send for AnError {} + /// + /// unsafe impl Sync for AnError {} + /// + /// let an_error = AnError; + /// assert!(0 == mem::size_of_val(&an_error)); + /// let a_boxed_error = Box::<dyn Error + Send + Sync>::from(an_error); + /// assert!( + /// mem::size_of::<Box<dyn Error + Send + Sync>>() == mem::size_of_val(&a_boxed_error)) + /// ``` + fn from(err: E) -> Box<dyn Error + Send + Sync + 'a> { + Box::new(err) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl From<String> for Box<dyn Error + Send + Sync> { + /// Converts a [`String`] into a box of dyn [`Error`] + [`Send`] + [`Sync`]. + /// + /// # Examples + /// + /// ``` + /// use std::error::Error; + /// use std::mem; + /// + /// let a_string_error = "a string error".to_string(); + /// let a_boxed_error = Box::<dyn Error + Send + Sync>::from(a_string_error); + /// assert!( + /// mem::size_of::<Box<dyn Error + Send + Sync>>() == mem::size_of_val(&a_boxed_error)) + /// ``` + #[inline] + fn from(err: String) -> Box<dyn Error + Send + Sync> { + struct StringError(String); + + impl Error for StringError { + #[allow(deprecated)] + fn description(&self) -> &str { + &self.0 + } + } + + impl fmt::Display for StringError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.0, f) + } + } + + // Purposefully skip printing "StringError(..)" + impl fmt::Debug for StringError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.0, f) + } + } + + Box::new(StringError(err)) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "string_box_error", since = "1.6.0")] +impl From<String> for Box<dyn Error> { + /// Converts a [`String`] into a box of dyn [`Error`]. + /// + /// # Examples + /// + /// ``` + /// use std::error::Error; + /// use std::mem; + /// + /// let a_string_error = "a string error".to_string(); + /// let a_boxed_error = Box::<dyn Error>::from(a_string_error); + /// assert!(mem::size_of::<Box<dyn Error>>() == mem::size_of_val(&a_boxed_error)) + /// ``` + fn from(str_err: String) -> Box<dyn Error> { + let err1: Box<dyn Error + Send + Sync> = From::from(str_err); + let err2: Box<dyn Error> = err1; + err2 + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> From<&str> for Box<dyn Error + Send + Sync + 'a> { + /// Converts a [`str`] into a box of dyn [`Error`] + [`Send`] + [`Sync`]. + /// + /// [`str`]: prim@str + /// + /// # Examples + /// + /// ``` + /// use std::error::Error; + /// use std::mem; + /// + /// let a_str_error = "a str error"; + /// let a_boxed_error = Box::<dyn Error + Send + Sync>::from(a_str_error); + /// assert!( + /// mem::size_of::<Box<dyn Error + Send + Sync>>() == mem::size_of_val(&a_boxed_error)) + /// ``` + #[inline] + fn from(err: &str) -> Box<dyn Error + Send + Sync + 'a> { + From::from(String::from(err)) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "string_box_error", since = "1.6.0")] +impl From<&str> for Box<dyn Error> { + /// Converts a [`str`] into a box of dyn [`Error`]. + /// + /// [`str`]: prim@str + /// + /// # Examples + /// + /// ``` + /// use std::error::Error; + /// use std::mem; + /// + /// let a_str_error = "a str error"; + /// let a_boxed_error = Box::<dyn Error>::from(a_str_error); + /// assert!(mem::size_of::<Box<dyn Error>>() == mem::size_of_val(&a_boxed_error)) + /// ``` + fn from(err: &str) -> Box<dyn Error> { + From::from(String::from(err)) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_box_error", since = "1.22.0")] +impl<'a, 'b> From<Cow<'b, str>> for Box<dyn Error + Send + Sync + 'a> { + /// Converts a [`Cow`] into a box of dyn [`Error`] + [`Send`] + [`Sync`]. + /// + /// # Examples + /// + /// ``` + /// use std::error::Error; + /// use std::mem; + /// use std::borrow::Cow; + /// + /// let a_cow_str_error = Cow::from("a str error"); + /// let a_boxed_error = Box::<dyn Error + Send + Sync>::from(a_cow_str_error); + /// assert!( + /// mem::size_of::<Box<dyn Error + Send + Sync>>() == mem::size_of_val(&a_boxed_error)) + /// ``` + fn from(err: Cow<'b, str>) -> Box<dyn Error + Send + Sync + 'a> { + From::from(String::from(err)) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_box_error", since = "1.22.0")] +impl<'a> From<Cow<'a, str>> for Box<dyn Error> { + /// Converts a [`Cow`] into a box of dyn [`Error`]. + /// + /// # Examples + /// + /// ``` + /// use std::error::Error; + /// use std::mem; + /// use std::borrow::Cow; + /// + /// let a_cow_str_error = Cow::from("a str error"); + /// let a_boxed_error = Box::<dyn Error>::from(a_cow_str_error); + /// assert!(mem::size_of::<Box<dyn Error>>() == mem::size_of_val(&a_boxed_error)) + /// ``` + fn from(err: Cow<'a, str>) -> Box<dyn Error> { + From::from(String::from(err)) + } +} + +#[stable(feature = "box_error", since = "1.8.0")] +impl<T: core::error::Error> core::error::Error for Box<T> { + #[allow(deprecated, deprecated_in_future)] + fn description(&self) -> &str { + core::error::Error::description(&**self) + } + + #[allow(deprecated)] + fn cause(&self) -> Option<&dyn core::error::Error> { + core::error::Error::cause(&**self) + } + + fn source(&self) -> Option<&(dyn core::error::Error + 'static)> { + core::error::Error::source(&**self) + } +} diff --git a/rust/alloc/collections/mod.rs b/rust/alloc/collections/mod.rs index 1eec265b28f8..da6154412416 100644 --- a/rust/alloc/collections/mod.rs +++ b/rust/alloc/collections/mod.rs @@ -154,3 +154,6 @@ trait SpecExtend<I: IntoIterator> { /// Extends `self` with the contents of the given iterator. fn spec_extend(&mut self, iter: I); } + +#[stable(feature = "try_reserve", since = "1.57.0")] +impl core::error::Error for TryReserveError {} diff --git a/rust/alloc/lib.rs b/rust/alloc/lib.rs index b2a13a53f19a..cc3850c3b519 100644 --- a/rust/alloc/lib.rs +++ b/rust/alloc/lib.rs @@ -58,10 +58,6 @@ //! [`Rc`]: rc //! [`RefCell`]: core::cell -// To run liballoc tests without x.py without ending up with two copies of liballoc, Miri needs to be -// able to "empty" this crate. See <https://github.com/rust-lang/miri-test-libstd/issues/4>. -// rustc itself never sets the feature, so this line has no affect there. -#![cfg(any(not(feature = "miri-test-libstd"), test, doctest))] #![allow(unused_attributes)] #![stable(feature = "alloc", since = "1.36.0")] #![doc( @@ -75,10 +71,16 @@ any(not(feature = "miri-test-libstd"), test, doctest), no_global_oom_handling, not(no_global_oom_handling), + not(no_rc), + not(no_sync), target_has_atomic = "ptr" ))] #![no_std] #![needs_allocator] +// To run liballoc tests without x.py without ending up with two copies of liballoc, Miri needs to be +// able to "empty" this crate. See <https://github.com/rust-lang/miri-test-libstd/issues/4>. +// rustc itself never sets the feature, so this line has no affect there. +#![cfg(any(not(feature = "miri-test-libstd"), test, doctest))] // // Lints: #![deny(unsafe_op_in_unsafe_fn)] @@ -88,10 +90,10 @@ #![allow(explicit_outlives_requirements)] // // Library features: -#![cfg_attr(not(no_global_oom_handling), feature(alloc_c_string))] #![feature(alloc_layout_extra)] #![feature(allocator_api)] #![feature(array_chunks)] +#![feature(array_into_iter_constructors)] #![feature(array_methods)] #![feature(array_windows)] #![feature(assert_matches)] @@ -99,8 +101,8 @@ #![feature(coerce_unsized)] #![cfg_attr(not(no_global_oom_handling), feature(const_alloc_error))] #![feature(const_box)] -#![cfg_attr(not(no_global_oom_handling), feature(const_btree_new))] -#![feature(const_cow_is_borrowed)] +#![cfg_attr(not(no_global_oom_handling), feature(const_btree_len))] +#![cfg_attr(not(no_borrow), feature(const_cow_is_borrowed))] #![feature(const_convert)] #![feature(const_size_of_val)] #![feature(const_align_of_val)] @@ -108,13 +110,14 @@ #![feature(const_maybe_uninit_write)] #![feature(const_maybe_uninit_as_mut_ptr)] #![feature(const_refs_to_cell)] -#![feature(core_c_str)] #![feature(core_intrinsics)] -#![feature(core_ffi_c)] #![feature(const_eval_select)] #![feature(const_pin)] +#![feature(const_waker)] #![feature(cstr_from_bytes_until_nul)] #![feature(dispatch_from_dyn)] +#![feature(error_generic_member_access)] +#![feature(error_in_core)] #![feature(exact_size_is_empty)] #![feature(extend_one)] #![feature(fmt_internals)] @@ -122,16 +125,24 @@ #![feature(hasher_prefixfree_extras)] #![feature(inplace_iteration)] #![feature(iter_advance_by)] +#![feature(iter_next_chunk)] #![feature(layout_for_ptr)] #![feature(maybe_uninit_slice)] +#![feature(maybe_uninit_uninit_array)] +#![feature(maybe_uninit_uninit_array_transpose)] #![cfg_attr(test, feature(new_uninit))] #![feature(nonnull_slice_from_raw_parts)] #![feature(pattern)] +#![feature(pointer_byte_offsets)] +#![feature(provide_any)] #![feature(ptr_internals)] #![feature(ptr_metadata)] #![feature(ptr_sub_ptr)] #![feature(receiver_trait)] +#![feature(saturating_int_impl)] #![feature(set_ptr_value)] +#![feature(sized_type_properties)] +#![feature(slice_from_ptr_range)] #![feature(slice_group_by)] #![feature(slice_ptr_get)] #![feature(slice_ptr_len)] @@ -145,12 +156,13 @@ #![feature(unchecked_math)] #![feature(unicode_internals)] #![feature(unsize)] +#![feature(utf8_chunks)] +#![feature(std_internals)] // // Language features: #![feature(allocator_internals)] #![feature(allow_internal_unstable)] #![feature(associated_type_bounds)] -#![feature(box_syntax)] #![feature(cfg_sanitize)] #![feature(const_deref)] #![feature(const_mut_refs)] @@ -164,19 +176,20 @@ #![cfg_attr(not(test), feature(generator_trait))] #![feature(hashmap_internals)] #![feature(lang_items)] -#![feature(let_else)] #![feature(min_specialization)] #![feature(negative_impls)] #![feature(never_type)] -#![feature(nll)] // Not necessary, but here to test the `nll` feature. #![feature(rustc_allow_const_fn_unstable)] #![feature(rustc_attrs)] +#![feature(pointer_is_aligned)] #![feature(slice_internals)] #![feature(staged_api)] +#![feature(stmt_expr_attributes)] #![cfg_attr(test, feature(test))] #![feature(unboxed_closures)] #![feature(unsized_fn_params)] #![feature(c_unwind)] +#![feature(with_negative_coherence)] // // Rustdoc features: #![feature(doc_cfg)] @@ -216,9 +229,10 @@ pub mod boxed; mod boxed { pub use std::boxed::Box; } +#[cfg(not(no_borrow))] pub mod borrow; pub mod collections; -#[cfg(not(no_global_oom_handling))] +#[cfg(all(not(no_rc), not(no_sync), not(no_global_oom_handling)))] pub mod ffi; #[cfg(not(no_fmt))] pub mod fmt; @@ -230,9 +244,9 @@ pub mod str; #[cfg(not(no_string))] pub mod string; #[cfg(not(no_sync))] -#[cfg(target_has_atomic = "ptr")] +#[cfg(all(not(no_rc), target_has_atomic = "ptr"))] pub mod sync; -#[cfg(all(not(no_global_oom_handling), target_has_atomic = "ptr"))] +#[cfg(all(not(no_global_oom_handling), not(no_rc), not(no_sync), target_has_atomic = "ptr"))] pub mod task; #[cfg(test)] mod tests; diff --git a/rust/alloc/raw_vec.rs b/rust/alloc/raw_vec.rs index eb77db5def55..f02faff67a80 100644 --- a/rust/alloc/raw_vec.rs +++ b/rust/alloc/raw_vec.rs @@ -5,7 +5,7 @@ use core::alloc::LayoutError; use core::cmp; use core::intrinsics; -use core::mem::{self, ManuallyDrop, MaybeUninit}; +use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties}; use core::ops::Drop; use core::ptr::{self, NonNull, Unique}; use core::slice; @@ -177,7 +177,7 @@ impl<T, A: Allocator> RawVec<T, A> { #[cfg(not(no_global_oom_handling))] fn allocate_in(capacity: usize, init: AllocInit, alloc: A) -> Self { // Don't allocate here because `Drop` will not deallocate when `capacity` is 0. - if mem::size_of::<T>() == 0 || capacity == 0 { + if T::IS_ZST || capacity == 0 { Self::new_in(alloc) } else { // We avoid `unwrap_or_else` here because it bloats the amount of @@ -212,7 +212,7 @@ impl<T, A: Allocator> RawVec<T, A> { fn try_allocate_in(capacity: usize, init: AllocInit, alloc: A) -> Result<Self, TryReserveError> { // Don't allocate here because `Drop` will not deallocate when `capacity` is 0. - if mem::size_of::<T>() == 0 || capacity == 0 { + if T::IS_ZST || capacity == 0 { return Ok(Self::new_in(alloc)); } @@ -262,7 +262,7 @@ impl<T, A: Allocator> RawVec<T, A> { /// This will always be `usize::MAX` if `T` is zero-sized. #[inline(always)] pub fn capacity(&self) -> usize { - if mem::size_of::<T>() == 0 { usize::MAX } else { self.cap } + if T::IS_ZST { usize::MAX } else { self.cap } } /// Returns a shared reference to the allocator backing this `RawVec`. @@ -271,7 +271,7 @@ impl<T, A: Allocator> RawVec<T, A> { } fn current_memory(&self) -> Option<(NonNull<u8>, Layout)> { - if mem::size_of::<T>() == 0 || self.cap == 0 { + if T::IS_ZST || self.cap == 0 { None } else { // We have an allocated chunk of memory, so we can bypass runtime @@ -419,7 +419,7 @@ impl<T, A: Allocator> RawVec<T, A> { // This is ensured by the calling contexts. debug_assert!(additional > 0); - if mem::size_of::<T>() == 0 { + if T::IS_ZST { // Since we return a capacity of `usize::MAX` when `elem_size` is // 0, getting to here necessarily means the `RawVec` is overfull. return Err(CapacityOverflow.into()); @@ -445,7 +445,7 @@ impl<T, A: Allocator> RawVec<T, A> { // `grow_amortized`, but this method is usually instantiated less often so // it's less critical. fn grow_exact(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> { - if mem::size_of::<T>() == 0 { + if T::IS_ZST { // Since we return a capacity of `usize::MAX` when the type size is // 0, getting to here necessarily means the `RawVec` is overfull. return Err(CapacityOverflow.into()); diff --git a/rust/alloc/slice.rs b/rust/alloc/slice.rs index e444e97fa145..467935cf9a06 100644 --- a/rust/alloc/slice.rs +++ b/rust/alloc/slice.rs @@ -1,84 +1,14 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT -//! A dynamically-sized view into a contiguous sequence, `[T]`. +//! Utilities for the slice primitive type. //! //! *[See also the slice primitive type](slice).* //! -//! Slices are a view into a block of memory represented as a pointer and a -//! length. +//! Most of the structs in this module are iterator types which can only be created +//! using a certain function. For example, `slice.iter()` yields an [`Iter`]. //! -//! ``` -//! // slicing a Vec -//! let vec = vec![1, 2, 3]; -//! let int_slice = &vec[..]; -//! // coercing an array to a slice -//! let str_slice: &[&str] = &["one", "two", "three"]; -//! ``` -//! -//! Slices are either mutable or shared. The shared slice type is `&[T]`, -//! while the mutable slice type is `&mut [T]`, where `T` represents the element -//! type. For example, you can mutate the block of memory that a mutable slice -//! points to: -//! -//! ``` -//! let x = &mut [1, 2, 3]; -//! x[1] = 7; -//! assert_eq!(x, &[1, 7, 3]); -//! ``` -//! -//! Here are some of the things this module contains: -//! -//! ## Structs -//! -//! There are several structs that are useful for slices, such as [`Iter`], which -//! represents iteration over a slice. -//! -//! ## Trait Implementations -//! -//! There are several implementations of common traits for slices. Some examples -//! include: -//! -//! * [`Clone`] -//! * [`Eq`], [`Ord`] - for slices whose element type are [`Eq`] or [`Ord`]. -//! * [`Hash`] - for slices whose element type is [`Hash`]. -//! -//! ## Iteration -//! -//! The slices implement `IntoIterator`. The iterator yields references to the -//! slice elements. -//! -//! ``` -//! let numbers = &[0, 1, 2]; -//! for n in numbers { -//! println!("{n} is a number!"); -//! } -//! ``` -//! -//! The mutable slice yields mutable references to the elements: -//! -//! ``` -//! let mut scores = [7, 8, 9]; -//! for score in &mut scores[..] { -//! *score += 1; -//! } -//! ``` -//! -//! This iterator yields mutable references to the slice's elements, so while -//! the element type of the slice is `i32`, the element type of the iterator is -//! `&mut i32`. -//! -//! * [`.iter`] and [`.iter_mut`] are the explicit methods to return the default -//! iterators. -//! * Further methods that return iterators are [`.split`], [`.splitn`], -//! [`.chunks`], [`.windows`] and more. -//! -//! [`Hash`]: core::hash::Hash -//! [`.iter`]: slice::iter -//! [`.iter_mut`]: slice::iter_mut -//! [`.split`]: slice::split -//! [`.splitn`]: slice::splitn -//! [`.chunks`]: slice::chunks -//! [`.windows`]: slice::windows +//! A few functions are provided to create a slice from a value reference +//! or from a raw pointer. #![stable(feature = "rust1", since = "1.0.0")] // Many of the usings in this module are only used in the test configuration. // It's cleaner to just turn off the unused_imports warning than to fix them. @@ -88,9 +18,7 @@ use core::borrow::{Borrow, BorrowMut}; #[cfg(not(no_global_oom_handling))] use core::cmp::Ordering::{self, Less}; #[cfg(not(no_global_oom_handling))] -use core::mem; -#[cfg(not(no_global_oom_handling))] -use core::mem::size_of; +use core::mem::{self, SizedTypeProperties}; #[cfg(not(no_global_oom_handling))] use core::ptr; @@ -116,6 +44,8 @@ pub use core::slice::EscapeAscii; pub use core::slice::SliceIndex; #[stable(feature = "from_ref", since = "1.28.0")] pub use core::slice::{from_mut, from_ref}; +#[unstable(feature = "slice_from_ptr_range", issue = "89792")] +pub use core::slice::{from_mut_ptr_range, from_ptr_range}; #[stable(feature = "rust1", since = "1.0.0")] pub use core::slice::{from_raw_parts, from_raw_parts_mut}; #[stable(feature = "rust1", since = "1.0.0")] @@ -275,7 +205,7 @@ impl<T> [T] { where T: Ord, { - merge_sort(self, |a, b| a.lt(b)); + merge_sort(self, T::lt); } /// Sorts the slice with a comparator function. @@ -836,14 +766,14 @@ impl<T: Clone, V: Borrow<[T]>> Join<&[T]> for [V] { //////////////////////////////////////////////////////////////////////////////// #[stable(feature = "rust1", since = "1.0.0")] -impl<T> Borrow<[T]> for Vec<T> { +impl<T, A: Allocator> Borrow<[T]> for Vec<T, A> { fn borrow(&self) -> &[T] { &self[..] } } #[stable(feature = "rust1", since = "1.0.0")] -impl<T> BorrowMut<[T]> for Vec<T> { +impl<T, A: Allocator> BorrowMut<[T]> for Vec<T, A> { fn borrow_mut(&mut self) -> &mut [T] { &mut self[..] } @@ -1024,7 +954,7 @@ where // Consume the greater side. // If equal, prefer the right run to maintain stability. unsafe { - let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) { + let to_copy = if is_less(&*right.sub(1), &*left.sub(1)) { decrement_and_get(left) } else { decrement_and_get(right) @@ -1038,12 +968,12 @@ where unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T { let old = *ptr; - *ptr = unsafe { ptr.offset(1) }; + *ptr = unsafe { ptr.add(1) }; old } unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T { - *ptr = unsafe { ptr.offset(-1) }; + *ptr = unsafe { ptr.sub(1) }; *ptr } @@ -1088,7 +1018,7 @@ where const MIN_RUN: usize = 10; // Sorting has no meaningful behavior on zero-sized types. - if size_of::<T>() == 0 { + if T::IS_ZST { return; } diff --git a/rust/alloc/vec/drain.rs b/rust/alloc/vec/drain.rs index b6a5f98e4fcd..3594ad890c3d 100644 --- a/rust/alloc/vec/drain.rs +++ b/rust/alloc/vec/drain.rs @@ -3,7 +3,7 @@ use crate::alloc::{Allocator, Global}; use core::fmt; use core::iter::{FusedIterator, TrustedLen}; -use core::mem; +use core::mem::{self, ManuallyDrop, SizedTypeProperties}; use core::ptr::{self, NonNull}; use core::slice::{self}; @@ -67,6 +67,77 @@ impl<'a, T, A: Allocator> Drain<'a, T, A> { pub fn allocator(&self) -> &A { unsafe { self.vec.as_ref().allocator() } } + + /// Keep unyielded elements in the source `Vec`. + /// + /// # Examples + /// + /// ``` + /// #![feature(drain_keep_rest)] + /// + /// let mut vec = vec!['a', 'b', 'c']; + /// let mut drain = vec.drain(..); + /// + /// assert_eq!(drain.next().unwrap(), 'a'); + /// + /// // This call keeps 'b' and 'c' in the vec. + /// drain.keep_rest(); + /// + /// // If we wouldn't call `keep_rest()`, + /// // `vec` would be empty. + /// assert_eq!(vec, ['b', 'c']); + /// ``` + #[unstable(feature = "drain_keep_rest", issue = "101122")] + pub fn keep_rest(self) { + // At this moment layout looks like this: + // + // [head] [yielded by next] [unyielded] [yielded by next_back] [tail] + // ^-- start \_________/-- unyielded_len \____/-- self.tail_len + // ^-- unyielded_ptr ^-- tail + // + // Normally `Drop` impl would drop [unyielded] and then move [tail] to the `start`. + // Here we want to + // 1. Move [unyielded] to `start` + // 2. Move [tail] to a new start at `start + len(unyielded)` + // 3. Update length of the original vec to `len(head) + len(unyielded) + len(tail)` + // a. In case of ZST, this is the only thing we want to do + // 4. Do *not* drop self, as everything is put in a consistent state already, there is nothing to do + let mut this = ManuallyDrop::new(self); + + unsafe { + let source_vec = this.vec.as_mut(); + + let start = source_vec.len(); + let tail = this.tail_start; + + let unyielded_len = this.iter.len(); + let unyielded_ptr = this.iter.as_slice().as_ptr(); + + // ZSTs have no identity, so we don't need to move them around. + let needs_move = mem::size_of::<T>() != 0; + + if needs_move { + let start_ptr = source_vec.as_mut_ptr().add(start); + + // memmove back unyielded elements + if unyielded_ptr != start_ptr { + let src = unyielded_ptr; + let dst = start_ptr; + + ptr::copy(src, dst, unyielded_len); + } + + // memmove back untouched tail + if tail != (start + unyielded_len) { + let src = source_vec.as_ptr().add(tail); + let dst = start_ptr.add(unyielded_len); + ptr::copy(src, dst, this.tail_len); + } + } + + source_vec.set_len(start + unyielded_len + this.tail_len); + } + } } #[stable(feature = "vec_drain_as_slice", since = "1.46.0")] @@ -133,7 +204,7 @@ impl<T, A: Allocator> Drop for Drain<'_, T, A> { let mut vec = self.vec; - if mem::size_of::<T>() == 0 { + if T::IS_ZST { // ZSTs have no identity, so we don't need to move them around, we only need to drop the correct amount. // this can be achieved by manipulating the Vec length instead of moving values out from `iter`. unsafe { diff --git a/rust/alloc/vec/drain_filter.rs b/rust/alloc/vec/drain_filter.rs index b04fce041622..4b019220657d 100644 --- a/rust/alloc/vec/drain_filter.rs +++ b/rust/alloc/vec/drain_filter.rs @@ -1,8 +1,9 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT use crate::alloc::{Allocator, Global}; -use core::ptr::{self}; -use core::slice::{self}; +use core::mem::{self, ManuallyDrop}; +use core::ptr; +use core::slice; use super::Vec; @@ -56,6 +57,61 @@ where pub fn allocator(&self) -> &A { self.vec.allocator() } + + /// Keep unyielded elements in the source `Vec`. + /// + /// # Examples + /// + /// ``` + /// #![feature(drain_filter)] + /// #![feature(drain_keep_rest)] + /// + /// let mut vec = vec!['a', 'b', 'c']; + /// let mut drain = vec.drain_filter(|_| true); + /// + /// assert_eq!(drain.next().unwrap(), 'a'); + /// + /// // This call keeps 'b' and 'c' in the vec. + /// drain.keep_rest(); + /// + /// // If we wouldn't call `keep_rest()`, + /// // `vec` would be empty. + /// assert_eq!(vec, ['b', 'c']); + /// ``` + #[unstable(feature = "drain_keep_rest", issue = "101122")] + pub fn keep_rest(self) { + // At this moment layout looks like this: + // + // _____________________/-- old_len + // / \ + // [kept] [yielded] [tail] + // \_______/ ^-- idx + // \-- del + // + // Normally `Drop` impl would drop [tail] (via .for_each(drop), ie still calling `pred`) + // + // 1. Move [tail] after [kept] + // 2. Update length of the original vec to `old_len - del` + // a. In case of ZST, this is the only thing we want to do + // 3. Do *not* drop self, as everything is put in a consistent state already, there is nothing to do + let mut this = ManuallyDrop::new(self); + + unsafe { + // ZSTs have no identity, so we don't need to move them around. + let needs_move = mem::size_of::<T>() != 0; + + if needs_move && this.idx < this.old_len && this.del > 0 { + let ptr = this.vec.as_mut_ptr(); + let src = ptr.add(this.idx); + let dst = src.sub(this.del); + let tail_len = this.old_len - this.idx; + src.copy_to(dst, tail_len); + } + + let new_len = this.old_len - this.del; + this.vec.set_len(new_len); + } + } } #[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")] diff --git a/rust/alloc/vec/into_iter.rs b/rust/alloc/vec/into_iter.rs index f7a50e76691e..a8a2a8b66bfd 100644 --- a/rust/alloc/vec/into_iter.rs +++ b/rust/alloc/vec/into_iter.rs @@ -4,13 +4,13 @@ use super::AsVecIntoIter; use crate::alloc::{Allocator, Global}; use crate::raw_vec::RawVec; +use core::array; use core::fmt; -use core::intrinsics::arith_offset; use core::iter::{ FusedIterator, InPlaceIterable, SourceIter, TrustedLen, TrustedRandomAccessNoCoerce, }; use core::marker::PhantomData; -use core::mem::{self, ManuallyDrop}; +use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties}; #[cfg(not(no_global_oom_handling))] use core::ops::Deref; use core::ptr::{self, NonNull}; @@ -97,13 +97,16 @@ impl<T, A: Allocator> IntoIter<T, A> { } /// Drops remaining elements and relinquishes the backing allocation. + /// This method guarantees it won't panic before relinquishing + /// the backing allocation. /// /// This is roughly equivalent to the following, but more efficient /// /// ``` /// # let mut into_iter = Vec::<u8>::with_capacity(10).into_iter(); + /// let mut into_iter = std::mem::replace(&mut into_iter, Vec::new().into_iter()); /// (&mut into_iter).for_each(core::mem::drop); - /// unsafe { core::ptr::write(&mut into_iter, Vec::new().into_iter()); } + /// std::mem::forget(into_iter); /// ``` /// /// This method is used by in-place iteration, refer to the vec::in_place_collect @@ -120,6 +123,8 @@ impl<T, A: Allocator> IntoIter<T, A> { self.ptr = self.buf.as_ptr(); self.end = self.buf.as_ptr(); + // Dropping the remaining elements can panic, so this needs to be + // done only after updating the other fields. unsafe { ptr::drop_in_place(remaining); } @@ -150,19 +155,19 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> { #[inline] fn next(&mut self) -> Option<T> { - if self.ptr as *const _ == self.end { + if self.ptr == self.end { None - } else if mem::size_of::<T>() == 0 { + } else if T::IS_ZST { // purposefully don't use 'ptr.offset' because for // vectors with 0-size elements this would return the // same pointer. - self.ptr = unsafe { arith_offset(self.ptr as *const i8, 1) as *mut T }; + self.ptr = self.ptr.wrapping_byte_add(1); // Make up a value of this ZST. Some(unsafe { mem::zeroed() }) } else { let old = self.ptr; - self.ptr = unsafe { self.ptr.offset(1) }; + self.ptr = unsafe { self.ptr.add(1) }; Some(unsafe { ptr::read(old) }) } @@ -170,7 +175,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> { #[inline] fn size_hint(&self) -> (usize, Option<usize>) { - let exact = if mem::size_of::<T>() == 0 { + let exact = if T::IS_ZST { self.end.addr().wrapping_sub(self.ptr.addr()) } else { unsafe { self.end.sub_ptr(self.ptr) } @@ -182,11 +187,11 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> { fn advance_by(&mut self, n: usize) -> Result<(), usize> { let step_size = self.len().min(n); let to_drop = ptr::slice_from_raw_parts_mut(self.ptr as *mut T, step_size); - if mem::size_of::<T>() == 0 { + if T::IS_ZST { // SAFETY: due to unchecked casts of unsigned amounts to signed offsets the wraparound // effectively results in unsigned pointers representing positions 0..usize::MAX, // which is valid for ZSTs. - self.ptr = unsafe { arith_offset(self.ptr as *const i8, step_size as isize) as *mut T } + self.ptr = self.ptr.wrapping_byte_add(step_size); } else { // SAFETY: the min() above ensures that step_size is in bounds self.ptr = unsafe { self.ptr.add(step_size) }; @@ -206,6 +211,43 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> { self.len() } + #[inline] + fn next_chunk<const N: usize>(&mut self) -> Result<[T; N], core::array::IntoIter<T, N>> { + let mut raw_ary = MaybeUninit::uninit_array(); + + let len = self.len(); + + if T::IS_ZST { + if len < N { + self.forget_remaining_elements(); + // Safety: ZSTs can be conjured ex nihilo, only the amount has to be correct + return Err(unsafe { array::IntoIter::new_unchecked(raw_ary, 0..len) }); + } + + self.ptr = self.ptr.wrapping_byte_add(N); + // Safety: ditto + return Ok(unsafe { raw_ary.transpose().assume_init() }); + } + + if len < N { + // Safety: `len` indicates that this many elements are available and we just checked that + // it fits into the array. + unsafe { + ptr::copy_nonoverlapping(self.ptr, raw_ary.as_mut_ptr() as *mut T, len); + self.forget_remaining_elements(); + return Err(array::IntoIter::new_unchecked(raw_ary, 0..len)); + } + } + + // Safety: `len` is larger than the array size. Copy a fixed amount here to fully initialize + // the array. + return unsafe { + ptr::copy_nonoverlapping(self.ptr, raw_ary.as_mut_ptr() as *mut T, N); + self.ptr = self.ptr.add(N); + Ok(raw_ary.transpose().assume_init()) + }; + } + unsafe fn __iterator_get_unchecked(&mut self, i: usize) -> Self::Item where Self: TrustedRandomAccessNoCoerce, @@ -219,7 +261,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> { // that `T: Copy` so reading elements from the buffer doesn't invalidate // them for `Drop`. unsafe { - if mem::size_of::<T>() == 0 { mem::zeroed() } else { ptr::read(self.ptr.add(i)) } + if T::IS_ZST { mem::zeroed() } else { ptr::read(self.ptr.add(i)) } } } } @@ -230,14 +272,14 @@ impl<T, A: Allocator> DoubleEndedIterator for IntoIter<T, A> { fn next_back(&mut self) -> Option<T> { if self.end == self.ptr { None - } else if mem::size_of::<T>() == 0 { + } else if T::IS_ZST { // See above for why 'ptr.offset' isn't used - self.end = unsafe { arith_offset(self.end as *const i8, -1) as *mut T }; + self.end = self.end.wrapping_byte_sub(1); // Make up a value of this ZST. Some(unsafe { mem::zeroed() }) } else { - self.end = unsafe { self.end.offset(-1) }; + self.end = unsafe { self.end.sub(1) }; Some(unsafe { ptr::read(self.end) }) } @@ -246,14 +288,12 @@ impl<T, A: Allocator> DoubleEndedIterator for IntoIter<T, A> { #[inline] fn advance_back_by(&mut self, n: usize) -> Result<(), usize> { let step_size = self.len().min(n); - if mem::size_of::<T>() == 0 { + if T::IS_ZST { // SAFETY: same as for advance_by() - self.end = unsafe { - arith_offset(self.end as *const i8, step_size.wrapping_neg() as isize) as *mut T - } + self.end = self.end.wrapping_byte_sub(step_size); } else { // SAFETY: same as for advance_by() - self.end = unsafe { self.end.offset(step_size.wrapping_neg() as isize) }; + self.end = unsafe { self.end.sub(step_size) }; } let to_drop = ptr::slice_from_raw_parts_mut(self.end as *mut T, step_size); // SAFETY: same as for advance_by() diff --git a/rust/alloc/vec/is_zero.rs b/rust/alloc/vec/is_zero.rs index 377f3d172777..426bb2c9f6ff 100644 --- a/rust/alloc/vec/is_zero.rs +++ b/rust/alloc/vec/is_zero.rs @@ -1,4 +1,5 @@ // SPDX-License-Identifier: Apache-2.0 OR MIT +use core::num::{Saturating, Wrapping}; use crate::boxed::Box; @@ -19,12 +20,14 @@ macro_rules! impl_is_zero { }; } +impl_is_zero!(i8, |x| x == 0); // It is needed to impl for arrays and tuples of i8. impl_is_zero!(i16, |x| x == 0); impl_is_zero!(i32, |x| x == 0); impl_is_zero!(i64, |x| x == 0); impl_is_zero!(i128, |x| x == 0); impl_is_zero!(isize, |x| x == 0); +impl_is_zero!(u8, |x| x == 0); // It is needed to impl for arrays and tuples of u8. impl_is_zero!(u16, |x| x == 0); impl_is_zero!(u32, |x| x == 0); impl_is_zero!(u64, |x| x == 0); @@ -56,15 +59,41 @@ unsafe impl<T: IsZero, const N: usize> IsZero for [T; N] { fn is_zero(&self) -> bool { // Because this is generated as a runtime check, it's not obvious that // it's worth doing if the array is really long. The threshold here - // is largely arbitrary, but was picked because as of 2022-05-01 LLVM - // can const-fold the check in `vec![[0; 32]; n]` but not in - // `vec![[0; 64]; n]`: https://godbolt.org/z/WTzjzfs5b + // is largely arbitrary, but was picked because as of 2022-07-01 LLVM + // fails to const-fold the check in `vec![[1; 32]; n]` + // See https://github.com/rust-lang/rust/pull/97581#issuecomment-1166628022 // Feel free to tweak if you have better evidence. - N <= 32 && self.iter().all(IsZero::is_zero) + N <= 16 && self.iter().all(IsZero::is_zero) } } +// This is recursive macro. +macro_rules! impl_for_tuples { + // Stopper + () => { + // No use for implementing for empty tuple because it is ZST. + }; + ($first_arg:ident $(,$rest:ident)*) => { + unsafe impl <$first_arg: IsZero, $($rest: IsZero,)*> IsZero for ($first_arg, $($rest,)*){ + #[inline] + fn is_zero(&self) -> bool{ + // Destructure tuple to N references + // Rust allows to hide generic params by local variable names. + #[allow(non_snake_case)] + let ($first_arg, $($rest,)*) = self; + + $first_arg.is_zero() + $( && $rest.is_zero() )* + } + } + + impl_for_tuples!($($rest),*); + } +} + +impl_for_tuples!(A, B, C, D, E, F, G, H); + // `Option<&T>` and `Option<Box<T>>` are guaranteed to represent `None` as null. // For fat pointers, the bytes that would be the pointer metadata in the `Some` // variant are padding in the `None` variant, so ignoring them and @@ -118,3 +147,39 @@ impl_is_zero_option_of_nonzero!( NonZeroUsize, NonZeroIsize, ); + +unsafe impl<T: IsZero> IsZero for Wrapping<T> { + #[inline] + fn is_zero(&self) -> bool { + self.0.is_zero() + } +} + +unsafe impl<T: IsZero> IsZero for Saturating<T> { + #[inline] + fn is_zero(&self) -> bool { + self.0.is_zero() + } +} + +macro_rules! impl_for_optional_bool { + ($($t:ty,)+) => {$( + unsafe impl IsZero for $t { + #[inline] + fn is_zero(&self) -> bool { + // SAFETY: This is *not* a stable layout guarantee, but + // inside `core` we're allowed to rely on the current rustc + // behaviour that options of bools will be one byte with + // no padding, so long as they're nested less than 254 deep. + let raw: u8 = unsafe { core::mem::transmute(*self) }; + raw == 0 + } + } + )+}; +} +impl_for_optional_bool! { + Option<bool>, + Option<Option<bool>>, + Option<Option<Option<bool>>>, + // Could go further, but not worth the metadata overhead +} diff --git a/rust/alloc/vec/mod.rs b/rust/alloc/vec/mod.rs index 8ac6c1e3b2a8..da65deb296cd 100644 --- a/rust/alloc/vec/mod.rs +++ b/rust/alloc/vec/mod.rs @@ -61,17 +61,18 @@ use core::cmp::Ordering; use core::convert::TryFrom; use core::fmt; use core::hash::{Hash, Hasher}; -use core::intrinsics::{arith_offset, assume}; +use core::intrinsics::assume; use core::iter; #[cfg(not(no_global_oom_handling))] use core::iter::FromIterator; use core::marker::PhantomData; -use core::mem::{self, ManuallyDrop, MaybeUninit}; +use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties}; use core::ops::{self, Index, IndexMut, Range, RangeBounds}; use core::ptr::{self, NonNull}; use core::slice::{self, SliceIndex}; use crate::alloc::{Allocator, Global}; +#[cfg(not(no_borrow))] use crate::borrow::{Cow, ToOwned}; use crate::boxed::Box; use crate::collections::TryReserveError; @@ -94,6 +95,7 @@ pub use self::drain::Drain; mod drain; +#[cfg(not(no_borrow))] #[cfg(not(no_global_oom_handling))] mod cow; @@ -120,14 +122,12 @@ use self::spec_from_elem::SpecFromElem; #[cfg(not(no_global_oom_handling))] mod spec_from_elem; -#[cfg(not(no_global_oom_handling))] use self::set_len_on_drop::SetLenOnDrop; -#[cfg(not(no_global_oom_handling))] mod set_len_on_drop; #[cfg(not(no_global_oom_handling))] -use self::in_place_drop::InPlaceDrop; +use self::in_place_drop::{InPlaceDrop, InPlaceDstBufDrop}; #[cfg(not(no_global_oom_handling))] mod in_place_drop; @@ -147,7 +147,8 @@ mod spec_from_iter; #[cfg(not(no_global_oom_handling))] use self::spec_extend::SpecExtend; -#[cfg(not(no_global_oom_handling))] +use self::spec_extend::TrySpecExtend; + mod spec_extend; /// A contiguous growable array type, written as `Vec<T>`, short for 'vector'. @@ -427,17 +428,25 @@ impl<T> Vec<T> { Vec { buf: RawVec::NEW, len: 0 } } - /// Constructs a new, empty `Vec<T>` with the specified capacity. + /// Constructs a new, empty `Vec<T>` with at least the specified capacity. /// - /// The vector will be able to hold exactly `capacity` elements without - /// reallocating. If `capacity` is 0, the vector will not allocate. + /// The vector will be able to hold at least `capacity` elements without + /// reallocating. This method is allowed to allocate for more elements than + /// `capacity`. If `capacity` is 0, the vector will not allocate. /// /// It is important to note that although the returned vector has the - /// *capacity* specified, the vector will have a zero *length*. For an - /// explanation of the difference between length and capacity, see + /// minimum *capacity* specified, the vector will have a zero *length*. For + /// an explanation of the difference between length and capacity, see /// *[Capacity and reallocation]*. /// + /// If it is important to know the exact allocated capacity of a `Vec`, + /// always use the [`capacity`] method after construction. + /// + /// For `Vec<T>` where `T` is a zero-sized type, there will be no allocation + /// and the capacity will always be `usize::MAX`. + /// /// [Capacity and reallocation]: #capacity-and-reallocation + /// [`capacity`]: Vec::capacity /// /// # Panics /// @@ -450,19 +459,24 @@ impl<T> Vec<T> { /// /// // The vector contains no items, even though it has capacity for more /// assert_eq!(vec.len(), 0); - /// assert_eq!(vec.capacity(), 10); + /// assert!(vec.capacity() >= 10); /// /// // These are all done without reallocating... /// for i in 0..10 { /// vec.push(i); /// } /// assert_eq!(vec.len(), 10); - /// assert_eq!(vec.capacity(), 10); + /// assert!(vec.capacity() >= 10); /// /// // ...but this may make the vector reallocate /// vec.push(11); /// assert_eq!(vec.len(), 11); /// assert!(vec.capacity() >= 11); + /// + /// // A vector of a zero-sized type will always over-allocate, since no + /// // allocation is necessary + /// let vec_units = Vec::<()>::with_capacity(10); + /// assert_eq!(vec_units.capacity(), usize::MAX); /// ``` #[cfg(not(no_global_oom_handling))] #[inline] @@ -472,17 +486,25 @@ impl<T> Vec<T> { Self::with_capacity_in(capacity, Global) } - /// Tries to construct a new, empty `Vec<T>` with the specified capacity. + /// Tries to construct a new, empty `Vec<T>` with at least the specified capacity. /// - /// The vector will be able to hold exactly `capacity` elements without - /// reallocating. If `capacity` is 0, the vector will not allocate. + /// The vector will be able to hold at least `capacity` elements without + /// reallocating. This method is allowed to allocate for more elements than + /// `capacity`. If `capacity` is 0, the vector will not allocate. /// /// It is important to note that although the returned vector has the - /// *capacity* specified, the vector will have a zero *length*. For an - /// explanation of the difference between length and capacity, see + /// minimum *capacity* specified, the vector will have a zero *length*. For + /// an explanation of the difference between length and capacity, see /// *[Capacity and reallocation]*. /// + /// If it is important to know the exact allocated capacity of a `Vec`, + /// always use the [`capacity`] method after construction. + /// + /// For `Vec<T>` where `T` is a zero-sized type, there will be no allocation + /// and the capacity will always be `usize::MAX`. + /// /// [Capacity and reallocation]: #capacity-and-reallocation + /// [`capacity`]: Vec::capacity /// /// # Examples /// @@ -491,22 +513,24 @@ impl<T> Vec<T> { /// /// // The vector contains no items, even though it has capacity for more /// assert_eq!(vec.len(), 0); - /// assert_eq!(vec.capacity(), 10); + /// assert!(vec.capacity() >= 10); /// /// // These are all done without reallocating... /// for i in 0..10 { /// vec.push(i); /// } /// assert_eq!(vec.len(), 10); - /// assert_eq!(vec.capacity(), 10); + /// assert!(vec.capacity() >= 10); /// /// // ...but this may make the vector reallocate /// vec.push(11); /// assert_eq!(vec.len(), 11); /// assert!(vec.capacity() >= 11); /// - /// let mut result = Vec::try_with_capacity(usize::MAX); - /// assert!(result.is_err()); + /// // A vector of a zero-sized type will always over-allocate, since no + /// // allocation is necessary + /// let vec_units = Vec::<()>::try_with_capacity(10).unwrap(); + /// assert_eq!(vec_units.capacity(), usize::MAX); /// ``` #[inline] #[stable(feature = "kernel", since = "1.0.0")] @@ -514,15 +538,13 @@ impl<T> Vec<T> { Self::try_with_capacity_in(capacity, Global) } - /// Creates a `Vec<T>` directly from the raw components of another vector. + /// Creates a `Vec<T>` directly from a pointer, a capacity, and a length. /// /// # Safety /// /// This is highly unsafe, due to the number of invariants that aren't /// checked: /// - /// * `ptr` needs to have been previously allocated via [`String`]/`Vec<T>` - /// (at least, it's highly likely to be incorrect if it wasn't). /// * `T` needs to have the same alignment as what `ptr` was allocated with. /// (`T` having a less strict alignment is not sufficient, the alignment really /// needs to be equal to satisfy the [`dealloc`] requirement that memory must be @@ -531,6 +553,14 @@ impl<T> Vec<T> { /// to be the same size as the pointer was allocated with. (Because similar to /// alignment, [`dealloc`] must be called with the same layout `size`.) /// * `length` needs to be less than or equal to `capacity`. + /// * The first `length` values must be properly initialized values of type `T`. + /// * `capacity` needs to be the capacity that the pointer was allocated with. + /// * The allocated size in bytes must be no larger than `isize::MAX`. + /// See the safety documentation of [`pointer::offset`]. + /// + /// These requirements are always upheld by any `ptr` that has been allocated + /// via `Vec<T>`. Other allocation sources are allowed if the invariants are + /// upheld. /// /// Violating these may cause problems like corrupting the allocator's /// internal data structures. For example it is normally **not** safe @@ -573,8 +603,8 @@ impl<T> Vec<T> { /// /// unsafe { /// // Overwrite memory with 4, 5, 6 - /// for i in 0..len as isize { - /// ptr::write(p.offset(i), 4 + i); + /// for i in 0..len { + /// ptr::write(p.add(i), 4 + i); /// } /// /// // Put everything back together into a Vec @@ -582,6 +612,32 @@ impl<T> Vec<T> { /// assert_eq!(rebuilt, [4, 5, 6]); /// } /// ``` + /// + /// Using memory that was allocated elsewhere: + /// + /// ```rust + /// #![feature(allocator_api)] + /// + /// use std::alloc::{AllocError, Allocator, Global, Layout}; + /// + /// fn main() { + /// let layout = Layout::array::<u32>(16).expect("overflow cannot happen"); + /// + /// let vec = unsafe { + /// let mem = match Global.allocate(layout) { + /// Ok(mem) => mem.cast::<u32>().as_ptr(), + /// Err(AllocError) => return, + /// }; + /// + /// mem.write(1_000_000); + /// + /// Vec::from_raw_parts_in(mem, 1, 16, Global) + /// }; + /// + /// assert_eq!(vec, &[1_000_000]); + /// assert_eq!(vec.capacity(), 16); + /// } + /// ``` #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub unsafe fn from_raw_parts(ptr: *mut T, length: usize, capacity: usize) -> Self { @@ -610,18 +666,26 @@ impl<T, A: Allocator> Vec<T, A> { Vec { buf: RawVec::new_in(alloc), len: 0 } } - /// Constructs a new, empty `Vec<T, A>` with the specified capacity with the provided - /// allocator. + /// Constructs a new, empty `Vec<T, A>` with at least the specified capacity + /// with the provided allocator. /// - /// The vector will be able to hold exactly `capacity` elements without - /// reallocating. If `capacity` is 0, the vector will not allocate. + /// The vector will be able to hold at least `capacity` elements without + /// reallocating. This method is allowed to allocate for more elements than + /// `capacity`. If `capacity` is 0, the vector will not allocate. /// /// It is important to note that although the returned vector has the - /// *capacity* specified, the vector will have a zero *length*. For an - /// explanation of the difference between length and capacity, see + /// minimum *capacity* specified, the vector will have a zero *length*. For + /// an explanation of the difference between length and capacity, see /// *[Capacity and reallocation]*. /// + /// If it is important to know the exact allocated capacity of a `Vec`, + /// always use the [`capacity`] method after construction. + /// + /// For `Vec<T, A>` where `T` is a zero-sized type, there will be no allocation + /// and the capacity will always be `usize::MAX`. + /// /// [Capacity and reallocation]: #capacity-and-reallocation + /// [`capacity`]: Vec::capacity /// /// # Panics /// @@ -651,6 +715,11 @@ impl<T, A: Allocator> Vec<T, A> { /// vec.push(11); /// assert_eq!(vec.len(), 11); /// assert!(vec.capacity() >= 11); + /// + /// // A vector of a zero-sized type will always over-allocate, since no + /// // allocation is necessary + /// let vec_units = Vec::<(), System>::with_capacity_in(10, System); + /// assert_eq!(vec_units.capacity(), usize::MAX); /// ``` #[cfg(not(no_global_oom_handling))] #[inline] @@ -659,18 +728,26 @@ impl<T, A: Allocator> Vec<T, A> { Vec { buf: RawVec::with_capacity_in(capacity, alloc), len: 0 } } - /// Tries to construct a new, empty `Vec<T, A>` with the specified capacity + /// Tries to construct a new, empty `Vec<T, A>` with at least the specified capacity /// with the provided allocator. /// - /// The vector will be able to hold exactly `capacity` elements without - /// reallocating. If `capacity` is 0, the vector will not allocate. + /// The vector will be able to hold at least `capacity` elements without + /// reallocating. This method is allowed to allocate for more elements than + /// `capacity`. If `capacity` is 0, the vector will not allocate. /// /// It is important to note that although the returned vector has the - /// *capacity* specified, the vector will have a zero *length*. For an - /// explanation of the difference between length and capacity, see + /// minimum *capacity* specified, the vector will have a zero *length*. For + /// an explanation of the difference between length and capacity, see /// *[Capacity and reallocation]*. /// + /// If it is important to know the exact allocated capacity of a `Vec`, + /// always use the [`capacity`] method after construction. + /// + /// For `Vec<T, A>` where `T` is a zero-sized type, there will be no allocation + /// and the capacity will always be `usize::MAX`. + /// /// [Capacity and reallocation]: #capacity-and-reallocation + /// [`capacity`]: Vec::capacity /// /// # Examples /// @@ -697,8 +774,10 @@ impl<T, A: Allocator> Vec<T, A> { /// assert_eq!(vec.len(), 11); /// assert!(vec.capacity() >= 11); /// - /// let mut result = Vec::try_with_capacity_in(usize::MAX, System); - /// assert!(result.is_err()); + /// // A vector of a zero-sized type will always over-allocate, since no + /// // allocation is necessary + /// let vec_units = Vec::<(), System>::try_with_capacity_in(10, System).unwrap(); + /// assert_eq!(vec_units.capacity(), usize::MAX); /// ``` #[inline] #[stable(feature = "kernel", since = "1.0.0")] @@ -706,21 +785,30 @@ impl<T, A: Allocator> Vec<T, A> { Ok(Vec { buf: RawVec::try_with_capacity_in(capacity, alloc)?, len: 0 }) } - /// Creates a `Vec<T, A>` directly from the raw components of another vector. + /// Creates a `Vec<T, A>` directly from a pointer, a capacity, a length, + /// and an allocator. /// /// # Safety /// /// This is highly unsafe, due to the number of invariants that aren't /// checked: /// - /// * `ptr` needs to have been previously allocated via [`String`]/`Vec<T>` - /// (at least, it's highly likely to be incorrect if it wasn't). - /// * `T` needs to have the same size and alignment as what `ptr` was allocated with. + /// * `T` needs to have the same alignment as what `ptr` was allocated with. /// (`T` having a less strict alignment is not sufficient, the alignment really /// needs to be equal to satisfy the [`dealloc`] requirement that memory must be /// allocated and deallocated with the same layout.) + /// * The size of `T` times the `capacity` (ie. the allocated size in bytes) needs + /// to be the same size as the pointer was allocated with. (Because similar to + /// alignment, [`dealloc`] must be called with the same layout `size`.) /// * `length` needs to be less than or equal to `capacity`. - /// * `capacity` needs to be the capacity that the pointer was allocated with. + /// * The first `length` values must be properly initialized values of type `T`. + /// * `capacity` needs to [*fit*] the layout size that the pointer was allocated with. + /// * The allocated size in bytes must be no larger than `isize::MAX`. + /// See the safety documentation of [`pointer::offset`]. + /// + /// These requirements are always upheld by any `ptr` that has been allocated + /// via `Vec<T, A>`. Other allocation sources are allowed if the invariants are + /// upheld. /// /// Violating these may cause problems like corrupting the allocator's /// internal data structures. For example it is **not** safe @@ -738,6 +826,7 @@ impl<T, A: Allocator> Vec<T, A> { /// /// [`String`]: crate::string::String /// [`dealloc`]: crate::alloc::GlobalAlloc::dealloc + /// [*fit*]: crate::alloc::Allocator#memory-fitting /// /// # Examples /// @@ -767,8 +856,8 @@ impl<T, A: Allocator> Vec<T, A> { /// /// unsafe { /// // Overwrite memory with 4, 5, 6 - /// for i in 0..len as isize { - /// ptr::write(p.offset(i), 4 + i); + /// for i in 0..len { + /// ptr::write(p.add(i), 4 + i); /// } /// /// // Put everything back together into a Vec @@ -776,6 +865,29 @@ impl<T, A: Allocator> Vec<T, A> { /// assert_eq!(rebuilt, [4, 5, 6]); /// } /// ``` + /// + /// Using memory that was allocated elsewhere: + /// + /// ```rust + /// use std::alloc::{alloc, Layout}; + /// + /// fn main() { + /// let layout = Layout::array::<u32>(16).expect("overflow cannot happen"); + /// let vec = unsafe { + /// let mem = alloc(layout).cast::<u32>(); + /// if mem.is_null() { + /// return; + /// } + /// + /// mem.write(1_000_000); + /// + /// Vec::from_raw_parts(mem, 1, 16) + /// }; + /// + /// assert_eq!(vec, &[1_000_000]); + /// assert_eq!(vec.capacity(), 16); + /// } + /// ``` #[inline] #[unstable(feature = "allocator_api", issue = "32838")] pub unsafe fn from_raw_parts_in(ptr: *mut T, length: usize, capacity: usize, alloc: A) -> Self { @@ -868,13 +980,14 @@ impl<T, A: Allocator> Vec<T, A> { (ptr, len, capacity, alloc) } - /// Returns the number of elements the vector can hold without + /// Returns the total number of elements the vector can hold without /// reallocating. /// /// # Examples /// /// ``` - /// let vec: Vec<i32> = Vec::with_capacity(10); + /// let mut vec: Vec<i32> = Vec::with_capacity(10); + /// vec.push(42); /// assert_eq!(vec.capacity(), 10); /// ``` #[inline] @@ -884,10 +997,10 @@ impl<T, A: Allocator> Vec<T, A> { } /// Reserves capacity for at least `additional` more elements to be inserted - /// in the given `Vec<T>`. The collection may reserve more space to avoid - /// frequent reallocations. After calling `reserve`, capacity will be - /// greater than or equal to `self.len() + additional`. Does nothing if - /// capacity is already sufficient. + /// in the given `Vec<T>`. The collection may reserve more space to + /// speculatively avoid frequent reallocations. After calling `reserve`, + /// capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if capacity is already sufficient. /// /// # Panics /// @@ -906,10 +1019,12 @@ impl<T, A: Allocator> Vec<T, A> { self.buf.reserve(self.len, additional); } - /// Reserves the minimum capacity for exactly `additional` more elements to - /// be inserted in the given `Vec<T>`. After calling `reserve_exact`, - /// capacity will be greater than or equal to `self.len() + additional`. - /// Does nothing if the capacity is already sufficient. + /// Reserves the minimum capacity for at least `additional` more elements to + /// be inserted in the given `Vec<T>`. Unlike [`reserve`], this will not + /// deliberately over-allocate to speculatively avoid frequent allocations. + /// After calling `reserve_exact`, capacity will be greater than or equal to + /// `self.len() + additional`. Does nothing if the capacity is already + /// sufficient. /// /// Note that the allocator may give the collection more space than it /// requests. Therefore, capacity can not be relied upon to be precisely @@ -935,10 +1050,11 @@ impl<T, A: Allocator> Vec<T, A> { } /// Tries to reserve capacity for at least `additional` more elements to be inserted - /// in the given `Vec<T>`. The collection may reserve more space to avoid + /// in the given `Vec<T>`. The collection may reserve more space to speculatively avoid /// frequent reallocations. After calling `try_reserve`, capacity will be - /// greater than or equal to `self.len() + additional`. Does nothing if - /// capacity is already sufficient. + /// greater than or equal to `self.len() + additional` if it returns + /// `Ok(())`. Does nothing if capacity is already sufficient. This method + /// preserves the contents even if an error occurs. /// /// # Errors /// @@ -970,10 +1086,11 @@ impl<T, A: Allocator> Vec<T, A> { self.buf.try_reserve(self.len, additional) } - /// Tries to reserve the minimum capacity for exactly `additional` - /// elements to be inserted in the given `Vec<T>`. After calling - /// `try_reserve_exact`, capacity will be greater than or equal to - /// `self.len() + additional` if it returns `Ok(())`. + /// Tries to reserve the minimum capacity for at least `additional` + /// elements to be inserted in the given `Vec<T>`. Unlike [`try_reserve`], + /// this will not deliberately over-allocate to speculatively avoid frequent + /// allocations. After calling `try_reserve_exact`, capacity will be greater + /// than or equal to `self.len() + additional` if it returns `Ok(())`. /// Does nothing if the capacity is already sufficient. /// /// Note that the allocator may give the collection more space than it @@ -1198,7 +1315,8 @@ impl<T, A: Allocator> Vec<T, A> { self } - /// Returns a raw pointer to the vector's buffer. + /// Returns a raw pointer to the vector's buffer, or a dangling raw pointer + /// valid for zero sized reads if the vector didn't allocate. /// /// The caller must ensure that the vector outlives the pointer this /// function returns, or else it will end up pointing to garbage. @@ -1235,7 +1353,8 @@ impl<T, A: Allocator> Vec<T, A> { ptr } - /// Returns an unsafe mutable pointer to the vector's buffer. + /// Returns an unsafe mutable pointer to the vector's buffer, or a dangling + /// raw pointer valid for zero sized reads if the vector didn't allocate. /// /// The caller must ensure that the vector outlives the pointer this /// function returns, or else it will end up pointing to garbage. @@ -1439,9 +1558,6 @@ impl<T, A: Allocator> Vec<T, A> { } let len = self.len(); - if index > len { - assert_failed(index, len); - } // space for the new element if len == self.buf.capacity() { @@ -1453,9 +1569,15 @@ impl<T, A: Allocator> Vec<T, A> { // The spot to put the new value { let p = self.as_mut_ptr().add(index); - // Shift everything over to make space. (Duplicating the - // `index`th element into two consecutive places.) - ptr::copy(p, p.offset(1), len - index); + if index < len { + // Shift everything over to make space. (Duplicating the + // `index`th element into two consecutive places.) + ptr::copy(p, p.add(1), len - index); + } else if index == len { + // No elements need shifting. + } else { + assert_failed(index, len); + } // Write it in, overwriting the first copy of the `index`th // element. ptr::write(p, element); @@ -1512,7 +1634,7 @@ impl<T, A: Allocator> Vec<T, A> { ret = ptr::read(ptr); // Shift everything down to fill in that spot. - ptr::copy(ptr.offset(1), ptr, len - index - 1); + ptr::copy(ptr.add(1), ptr, len - index - 1); } self.set_len(len - 1); ret @@ -1561,11 +1683,11 @@ impl<T, A: Allocator> Vec<T, A> { /// /// ``` /// let mut vec = vec![1, 2, 3, 4]; - /// vec.retain_mut(|x| if *x > 3 { - /// false - /// } else { + /// vec.retain_mut(|x| if *x <= 3 { /// *x += 1; /// true + /// } else { + /// false /// }); /// assert_eq!(vec, [2, 3, 4]); /// ``` @@ -1853,6 +1975,51 @@ impl<T, A: Allocator> Vec<T, A> { Ok(()) } + /// Appends an element if there is sufficient spare capacity, otherwise an error is returned + /// with the element. + /// + /// Unlike [`push`] this method will not reallocate when there's insufficient capacity. + /// The caller should use [`reserve`] or [`try_reserve`] to ensure that there is enough capacity. + /// + /// [`push`]: Vec::push + /// [`reserve`]: Vec::reserve + /// [`try_reserve`]: Vec::try_reserve + /// + /// # Examples + /// + /// A manual, panic-free alternative to [`FromIterator`]: + /// + /// ``` + /// #![feature(vec_push_within_capacity)] + /// + /// use std::collections::TryReserveError; + /// fn from_iter_fallible<T>(iter: impl Iterator<Item=T>) -> Result<Vec<T>, TryReserveError> { + /// let mut vec = Vec::new(); + /// for value in iter { + /// if let Err(value) = vec.push_within_capacity(value) { + /// vec.try_reserve(1)?; + /// // this cannot fail, the previous line either returned or added at least 1 free slot + /// let _ = vec.push_within_capacity(value); + /// } + /// } + /// Ok(vec) + /// } + /// assert_eq!(from_iter_fallible(0..100), Ok(Vec::from_iter(0..100))); + /// ``` + #[inline] + #[unstable(feature = "vec_push_within_capacity", issue = "100486")] + pub fn push_within_capacity(&mut self, value: T) -> Result<(), T> { + if self.len == self.buf.capacity() { + return Err(value); + } + unsafe { + let end = self.as_mut_ptr().add(self.len); + ptr::write(end, value); + self.len += 1; + } + Ok(()) + } + /// Removes the last element from a vector and returns it, or [`None`] if it /// is empty. /// @@ -1885,7 +2052,7 @@ impl<T, A: Allocator> Vec<T, A> { /// /// # Panics /// - /// Panics if the number of elements in the vector overflows a `usize`. + /// Panics if the new capacity exceeds `isize::MAX` bytes. /// /// # Examples /// @@ -1917,6 +2084,17 @@ impl<T, A: Allocator> Vec<T, A> { self.len += count; } + /// Tries to append elements to `self` from other buffer. + #[inline] + unsafe fn try_append_elements(&mut self, other: *const [T]) -> Result<(), TryReserveError> { + let count = unsafe { (*other).len() }; + self.try_reserve(count)?; + let len = self.len(); + unsafe { ptr::copy_nonoverlapping(other as *const T, self.as_mut_ptr().add(len), count) }; + self.len += count; + Ok(()) + } + /// Removes the specified range from the vector in bulk, returning all /// removed elements as an iterator. If the iterator is dropped before /// being fully consumed, it drops the remaining removed elements. @@ -1968,9 +2146,7 @@ impl<T, A: Allocator> Vec<T, A> { unsafe { // set self.vec length's to start, to be safe in case Drain is leaked self.set_len(start); - // Use the borrow in the IterMut to indicate borrowing behavior of the - // whole Drain iterator (like &mut T). - let range_slice = slice::from_raw_parts_mut(self.as_mut_ptr().add(start), end - start); + let range_slice = slice::from_raw_parts(self.as_ptr().add(start), end - start); Drain { tail_start: end, tail_len: len - end, @@ -2162,7 +2338,6 @@ impl<T, A: Allocator> Vec<T, A> { /// static_ref[0] += 1; /// assert_eq!(static_ref, &[2, 2, 3]); /// ``` - #[cfg(not(no_global_oom_handling))] #[stable(feature = "vec_leak", since = "1.47.0")] #[inline] pub fn leak<'a>(self) -> &'a mut [T] @@ -2338,6 +2513,45 @@ impl<T: Clone, A: Allocator> Vec<T, A> { } } + /// Tries to resize the `Vec` in-place so that `len` is equal to `new_len`. + /// + /// If `new_len` is greater than `len`, the `Vec` is extended by the + /// difference, with each additional slot filled with `value`. + /// If `new_len` is less than `len`, the `Vec` is simply truncated. + /// + /// This method requires `T` to implement [`Clone`], + /// in order to be able to clone the passed value. + /// If you need more flexibility (or want to rely on [`Default`] instead of + /// [`Clone`]), use [`Vec::resize_with`]. + /// If you only need to resize to a smaller size, use [`Vec::truncate`]. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec!["hello"]; + /// vec.try_resize(3, "world").unwrap(); + /// assert_eq!(vec, ["hello", "world", "world"]); + /// + /// let mut vec = vec![1, 2, 3, 4]; + /// vec.try_resize(2, 0).unwrap(); + /// assert_eq!(vec, [1, 2]); + /// + /// let mut vec = vec![42]; + /// let result = vec.try_resize(usize::MAX, 0); + /// assert!(result.is_err()); + /// ``` + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_resize(&mut self, new_len: usize, value: T) -> Result<(), TryReserveError> { + let len = self.len(); + + if new_len > len { + self.try_extend_with(new_len - len, ExtendElement(value)) + } else { + self.truncate(new_len); + Ok(()) + } + } + /// Clones and appends all elements in a slice to the `Vec`. /// /// Iterates over the slice `other`, clones each element, and then appends @@ -2363,6 +2577,30 @@ impl<T: Clone, A: Allocator> Vec<T, A> { self.spec_extend(other.iter()) } + /// Tries to clone and append all elements in a slice to the `Vec`. + /// + /// Iterates over the slice `other`, clones each element, and then appends + /// it to this `Vec`. The `other` slice is traversed in-order. + /// + /// Note that this function is same as [`extend`] except that it is + /// specialized to work with slices instead. If and when Rust gets + /// specialization this function will likely be deprecated (but still + /// available). + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1]; + /// vec.try_extend_from_slice(&[2, 3, 4]).unwrap(); + /// assert_eq!(vec, [1, 2, 3, 4]); + /// ``` + /// + /// [`extend`]: Vec::extend + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_extend_from_slice(&mut self, other: &[T]) -> Result<(), TryReserveError> { + self.try_spec_extend(other.iter()) + } + /// Copies elements from `src` range to the end of the vector. /// /// # Panics @@ -2426,7 +2664,7 @@ impl<T, A: Allocator, const N: usize> Vec<[T; N], A> { #[unstable(feature = "slice_flatten", issue = "95629")] pub fn into_flattened(self) -> Vec<T, A> { let (ptr, len, cap, alloc) = self.into_raw_parts_with_alloc(); - let (new_len, new_cap) = if mem::size_of::<T>() == 0 { + let (new_len, new_cap) = if T::IS_ZST { (len.checked_mul(N).expect("vec len overflow"), usize::MAX) } else { // SAFETY: @@ -2488,7 +2726,7 @@ impl<T, A: Allocator> Vec<T, A> { // Write all elements except the last one for _ in 1..n { ptr::write(ptr, value.next()); - ptr = ptr.offset(1); + ptr = ptr.add(1); // Increment the length in every step in case next() panics local_len.increment_len(1); } @@ -2502,6 +2740,36 @@ impl<T, A: Allocator> Vec<T, A> { // len set by scope guard } } + + /// Try to extend the vector by `n` values, using the given generator. + fn try_extend_with<E: ExtendWith<T>>(&mut self, n: usize, mut value: E) -> Result<(), TryReserveError> { + self.try_reserve(n)?; + + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + // Use SetLenOnDrop to work around bug where compiler + // might not realize the store through `ptr` through self.set_len() + // don't alias. + let mut local_len = SetLenOnDrop::new(&mut self.len); + + // Write all elements except the last one + for _ in 1..n { + ptr::write(ptr, value.next()); + ptr = ptr.add(1); + // Increment the length in every step in case next() panics + local_len.increment_len(1); + } + + if n > 0 { + // We can write the last element directly without cloning needlessly + ptr::write(ptr, value.last()); + local_len.increment_len(1); + } + + // len set by scope guard + Ok(()) + } + } } impl<T: PartialEq, A: Allocator> Vec<T, A> { @@ -2584,7 +2852,7 @@ impl<T: Copy, A: Allocator> ExtendFromWithinSpec for Vec<T, A> { // SAFETY: // - Both pointers are created from unique slice references (`&mut [_]`) // so they are valid and do not overlap. - // - Elements are :Copy so it's OK to to copy them, without doing + // - Elements are :Copy so it's OK to copy them, without doing // anything with the original values // - `count` is equal to the len of `source`, so source is valid for // `count` reads @@ -2607,6 +2875,7 @@ impl<T: Copy, A: Allocator> ExtendFromWithinSpec for Vec<T, A> { impl<T, A: Allocator> ops::Deref for Vec<T, A> { type Target = [T]; + #[inline] fn deref(&self) -> &[T] { unsafe { slice::from_raw_parts(self.as_ptr(), self.len) } } @@ -2614,6 +2883,7 @@ impl<T, A: Allocator> ops::Deref for Vec<T, A> { #[stable(feature = "rust1", since = "1.0.0")] impl<T, A: Allocator> ops::DerefMut for Vec<T, A> { + #[inline] fn deref_mut(&mut self) -> &mut [T] { unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) } } @@ -2740,10 +3010,13 @@ impl<T, A: Allocator> IntoIterator for Vec<T, A> { /// /// ``` /// let v = vec!["a".to_string(), "b".to_string()]; - /// for s in v.into_iter() { - /// // s has type String, not &String - /// println!("{s}"); - /// } + /// let mut v_iter = v.into_iter(); + /// + /// let first_element: Option<String> = v_iter.next(); + /// + /// assert_eq!(first_element, Some("a".to_string())); + /// assert_eq!(v_iter.next(), Some("b".to_string())); + /// assert_eq!(v_iter.next(), None); /// ``` #[inline] fn into_iter(self) -> IntoIter<T, A> { @@ -2751,8 +3024,8 @@ impl<T, A: Allocator> IntoIterator for Vec<T, A> { let mut me = ManuallyDrop::new(self); let alloc = ManuallyDrop::new(ptr::read(me.allocator())); let begin = me.as_mut_ptr(); - let end = if mem::size_of::<T>() == 0 { - arith_offset(begin as *const i8, me.len() as isize) as *const T + let end = if T::IS_ZST { + begin.wrapping_byte_add(me.len()) } else { begin.add(me.len()) as *const T }; @@ -2836,6 +3109,34 @@ impl<T, A: Allocator> Vec<T, A> { } } + // leaf method to which various SpecFrom/SpecExtend implementations delegate when + // they have no further optimizations to apply + fn try_extend_desugared<I: Iterator<Item = T>>(&mut self, mut iterator: I) -> Result<(), TryReserveError> { + // This is the case for a general iterator. + // + // This function should be the moral equivalent of: + // + // for item in iterator { + // self.push(item); + // } + while let Some(element) = iterator.next() { + let len = self.len(); + if len == self.capacity() { + let (lower, _) = iterator.size_hint(); + self.try_reserve(lower.saturating_add(1))?; + } + unsafe { + ptr::write(self.as_mut_ptr().add(len), element); + // Since next() executes user code which can panic we have to bump the length + // after each step. + // NB can't overflow since we would have had to alloc the address space + self.set_len(len + 1); + } + } + + Ok(()) + } + /// Creates a splicing iterator that replaces the specified range in the vector /// with the given `replace_with` iterator and yields the removed items. /// `replace_with` does not need to be the same length as `range`. @@ -3002,6 +3303,8 @@ unsafe impl<#[may_dangle] T, A: Allocator> Drop for Vec<T, A> { #[rustc_const_unstable(feature = "const_default_impls", issue = "87864")] impl<T> const Default for Vec<T> { /// Creates an empty `Vec<T>`. + /// + /// The vector will not allocate until elements are pushed onto it. fn default() -> Vec<T> { Vec::new() } @@ -3094,15 +3397,19 @@ impl<T, const N: usize> From<[T; N]> for Vec<T> { /// ``` #[cfg(not(test))] fn from(s: [T; N]) -> Vec<T> { - <[T]>::into_vec(box s) + <[T]>::into_vec( + #[rustc_box] + Box::new(s), + ) } #[cfg(test)] fn from(s: [T; N]) -> Vec<T> { - crate::slice::into_vec(box s) + crate::slice::into_vec(Box::new(s)) } } +#[cfg(not(no_borrow))] #[stable(feature = "vec_from_cow_slice", since = "1.14.0")] impl<'a, T> From<Cow<'a, [T]>> for Vec<T> where diff --git a/rust/alloc/vec/set_len_on_drop.rs b/rust/alloc/vec/set_len_on_drop.rs new file mode 100644 index 000000000000..8b66bc812129 --- /dev/null +++ b/rust/alloc/vec/set_len_on_drop.rs @@ -0,0 +1,28 @@ +// Set the length of the vec when the `SetLenOnDrop` value goes out of scope. +// +// The idea is: The length field in SetLenOnDrop is a local variable +// that the optimizer will see does not alias with any stores through the Vec's data +// pointer. This is a workaround for alias analysis issue #32155 +pub(super) struct SetLenOnDrop<'a> { + len: &'a mut usize, + local_len: usize, +} + +impl<'a> SetLenOnDrop<'a> { + #[inline] + pub(super) fn new(len: &'a mut usize) -> Self { + SetLenOnDrop { local_len: *len, len } + } + + #[inline] + pub(super) fn increment_len(&mut self, increment: usize) { + self.local_len += increment; + } +} + +impl Drop for SetLenOnDrop<'_> { + #[inline] + fn drop(&mut self) { + *self.len = self.local_len; + } +} diff --git a/rust/alloc/vec/spec_extend.rs b/rust/alloc/vec/spec_extend.rs new file mode 100644 index 000000000000..73e69fec87f5 --- /dev/null +++ b/rust/alloc/vec/spec_extend.rs @@ -0,0 +1,170 @@ +use crate::alloc::Allocator; +use crate::collections::{TryReserveError, TryReserveErrorKind}; +use core::iter::TrustedLen; +use core::ptr::{self}; +use core::slice::{self}; + +use super::{IntoIter, SetLenOnDrop, Vec}; + +// Specialization trait used for Vec::extend +#[cfg(not(no_global_oom_handling))] +pub(super) trait SpecExtend<T, I> { + fn spec_extend(&mut self, iter: I); +} + +// Specialization trait used for Vec::try_extend +pub(super) trait TrySpecExtend<T, I> { + fn try_spec_extend(&mut self, iter: I) -> Result<(), TryReserveError>; +} + +#[cfg(not(no_global_oom_handling))] +impl<T, I, A: Allocator> SpecExtend<T, I> for Vec<T, A> +where + I: Iterator<Item = T>, +{ + default fn spec_extend(&mut self, iter: I) { + self.extend_desugared(iter) + } +} + +impl<T, I, A: Allocator> TrySpecExtend<T, I> for Vec<T, A> +where + I: Iterator<Item = T>, +{ + default fn try_spec_extend(&mut self, iter: I) -> Result<(), TryReserveError> { + self.try_extend_desugared(iter) + } +} + +#[cfg(not(no_global_oom_handling))] +impl<T, I, A: Allocator> SpecExtend<T, I> for Vec<T, A> +where + I: TrustedLen<Item = T>, +{ + default fn spec_extend(&mut self, iterator: I) { + // This is the case for a TrustedLen iterator. + let (low, high) = iterator.size_hint(); + if let Some(additional) = high { + debug_assert_eq!( + low, + additional, + "TrustedLen iterator's size hint is not exact: {:?}", + (low, high) + ); + self.reserve(additional); + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + let mut local_len = SetLenOnDrop::new(&mut self.len); + iterator.for_each(move |element| { + ptr::write(ptr, element); + ptr = ptr.add(1); + // Since the loop executes user code which can panic we have to bump the pointer + // after each step. + // NB can't overflow since we would have had to alloc the address space + local_len.increment_len(1); + }); + } + } else { + // Per TrustedLen contract a `None` upper bound means that the iterator length + // truly exceeds usize::MAX, which would eventually lead to a capacity overflow anyway. + // Since the other branch already panics eagerly (via `reserve()`) we do the same here. + // This avoids additional codegen for a fallback code path which would eventually + // panic anyway. + panic!("capacity overflow"); + } + } +} + +impl<T, I, A: Allocator> TrySpecExtend<T, I> for Vec<T, A> +where + I: TrustedLen<Item = T>, +{ + default fn try_spec_extend(&mut self, iterator: I) -> Result<(), TryReserveError> { + // This is the case for a TrustedLen iterator. + let (low, high) = iterator.size_hint(); + if let Some(additional) = high { + debug_assert_eq!( + low, + additional, + "TrustedLen iterator's size hint is not exact: {:?}", + (low, high) + ); + self.try_reserve(additional)?; + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + let mut local_len = SetLenOnDrop::new(&mut self.len); + iterator.for_each(move |element| { + ptr::write(ptr, element); + ptr = ptr.add(1); + // NB can't overflow since we would have had to alloc the address space + local_len.increment_len(1); + }); + } + Ok(()) + } else { + Err(TryReserveErrorKind::CapacityOverflow.into()) + } + } +} + +#[cfg(not(no_global_oom_handling))] +impl<T, A: Allocator> SpecExtend<T, IntoIter<T>> for Vec<T, A> { + fn spec_extend(&mut self, mut iterator: IntoIter<T>) { + unsafe { + self.append_elements(iterator.as_slice() as _); + } + iterator.forget_remaining_elements(); + } +} + +impl<T, A: Allocator> TrySpecExtend<T, IntoIter<T>> for Vec<T, A> { + fn try_spec_extend(&mut self, mut iterator: IntoIter<T>) -> Result<(), TryReserveError> { + unsafe { + self.try_append_elements(iterator.as_slice() as _)?; + } + iterator.ptr = iterator.end; + Ok(()) + } +} + +#[cfg(not(no_global_oom_handling))] +impl<'a, T: 'a, I, A: Allocator + 'a> SpecExtend<&'a T, I> for Vec<T, A> +where + I: Iterator<Item = &'a T>, + T: Clone, +{ + default fn spec_extend(&mut self, iterator: I) { + self.spec_extend(iterator.cloned()) + } +} + +impl<'a, T: 'a, I, A: Allocator + 'a> TrySpecExtend<&'a T, I> for Vec<T, A> +where + I: Iterator<Item = &'a T>, + T: Clone, +{ + default fn try_spec_extend(&mut self, iterator: I) -> Result<(), TryReserveError> { + self.try_spec_extend(iterator.cloned()) + } +} + +#[cfg(not(no_global_oom_handling))] +impl<'a, T: 'a, A: Allocator + 'a> SpecExtend<&'a T, slice::Iter<'a, T>> for Vec<T, A> +where + T: Copy, +{ + fn spec_extend(&mut self, iterator: slice::Iter<'a, T>) { + let slice = iterator.as_slice(); + unsafe { self.append_elements(slice) }; + } +} + +impl<'a, T: 'a, A: Allocator + 'a> TrySpecExtend<&'a T, slice::Iter<'a, T>> for Vec<T, A> +where + T: Copy, +{ + fn try_spec_extend(&mut self, iterator: slice::Iter<'a, T>) -> Result<(), TryReserveError> { + let slice = iterator.as_slice(); + unsafe { self.try_append_elements(slice) } + } +} diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h index c48bc284214a..b3ef609d0947 100644 --- a/rust/bindings/bindings_helper.h +++ b/rust/bindings/bindings_helper.h @@ -6,8 +6,59 @@ * Sorted alphabetically. */ +#include <drm/drm_device.h> +#include <drm/drm_drv.h> +#include <drm/drm_file.h> +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_ioctl.h> +#include <drm/drm_syncobj.h> +#include <drm/gpu_scheduler.h> +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/dma-fence.h> +#include <linux/dma-fence-chain.h> +#include <linux/dma-mapping.h> +#include <linux/fs.h> +#include <linux/iosys-map.h> +#include <linux/io-pgtable.h> +#include <linux/ktime.h> +#include <linux/of.h> +#include <linux/of_address.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/poll.h> +#include <linux/refcount.h> #include <linux/slab.h> +#include <linux/soc/apple/rtkit.h> +#include <linux/sysctl.h> +#include <linux/timekeeping.h> +#include <linux/xarray.h> +#include <uapi/asm-generic/ioctl.h> +#include <uapi/drm/asahi_drm.h> +#include <uapi/drm/drm.h> /* `bindgen` gets confused at certain things. */ const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL; const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO; + +const gfp_t BINDINGS_XA_FLAGS_LOCK_IRQ = XA_FLAGS_LOCK_IRQ; +const gfp_t BINDINGS_XA_FLAGS_LOCK_BH = XA_FLAGS_LOCK_BH; +const gfp_t BINDINGS_XA_FLAGS_TRACK_FREE = XA_FLAGS_TRACK_FREE; +const gfp_t BINDINGS_XA_FLAGS_ZERO_BUSY = XA_FLAGS_ZERO_BUSY; +const gfp_t BINDINGS_XA_FLAGS_ALLOC_WRAPPED = XA_FLAGS_ALLOC_WRAPPED; +const gfp_t BINDINGS_XA_FLAGS_ACCOUNT = XA_FLAGS_ACCOUNT; +const gfp_t BINDINGS_XA_FLAGS_ALLOC = XA_FLAGS_ALLOC; +const gfp_t BINDINGS_XA_FLAGS_ALLOC1 = XA_FLAGS_ALLOC1; + +const xa_mark_t BINDINGS_XA_MARK_0 = XA_MARK_0; +const xa_mark_t BINDINGS_XA_MARK_1 = XA_MARK_1; +const xa_mark_t BINDINGS_XA_MARK_2 = XA_MARK_2; +const xa_mark_t BINDINGS_XA_PRESENT = XA_PRESENT; +const xa_mark_t BINDINGS_XA_MARK_MAX = XA_MARK_MAX; +const xa_mark_t BINDINGS_XA_FREE_MARK = XA_FREE_MARK; + +const __poll_t BINDINGS_EPOLLIN = EPOLLIN; +const __poll_t BINDINGS_EPOLLOUT = EPOLLOUT; +const __poll_t BINDINGS_EPOLLERR = EPOLLERR; +const __poll_t BINDINGS_EPOLLHUP = EPOLLHUP; diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs index 6c50ee62c56b..9bcbea04dac3 100644 --- a/rust/bindings/lib.rs +++ b/rust/bindings/lib.rs @@ -9,7 +9,6 @@ //! using this crate. #![no_std] -#![feature(core_ffi_c)] // See <https://github.com/rust-lang/rust-bindgen/issues/1651>. #![cfg_attr(test, allow(deref_nullptr))] #![cfg_attr(test, allow(unaligned_references))] @@ -41,6 +40,7 @@ mod bindings_raw { #[allow(dead_code)] mod bindings_helper { // Import the generated bindings for types. + use super::bindings_raw::*; include!(concat!( env!("OBJTREE"), "/rust/bindings/bindings_helpers_generated.rs" diff --git a/rust/compiler_builtins.rs b/rust/compiler_builtins.rs index f8f39a3e6855..43378357ece9 100644 --- a/rust/compiler_builtins.rs +++ b/rust/compiler_builtins.rs @@ -28,7 +28,7 @@ macro_rules! define_panicking_intrinsics( ($reason: tt, { $($ident: ident, )* }) => { $( #[doc(hidden)] - #[no_mangle] + #[export_name = concat!("__rust", stringify!($ident))] pub extern "C" fn $ident() { panic!($reason); } @@ -61,3 +61,6 @@ define_panicking_intrinsics!("`u128` should not be used", { __udivti3, __umodti3, }); + +// NOTE: if you are adding a new intrinsic here, you should also add it to +// `redirect-intrinsics` in `rust/Makefile`. diff --git a/rust/helpers.c b/rust/helpers.c index b4f15eee2ffd..e023b1265b58 100644 --- a/rust/helpers.c +++ b/rust/helpers.c @@ -18,8 +18,25 @@ * accidentally exposed. */ +#include <drm/drm_gem.h> +#include <drm/drm_gem_shmem_helper.h> +#include <drm/drm_syncobj.h> #include <linux/bug.h> #include <linux/build_bug.h> +#include <linux/device.h> +#include <linux/dma-fence.h> +#include <linux/dma-fence-chain.h> +#include <linux/dma-mapping.h> +#include <linux/err.h> +#include <linux/errname.h> +#include <linux/mutex.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> +#include <linux/rcupdate.h> +#include <linux/refcount.h> +#include <linux/xarray.h> __noreturn void rust_helper_BUG(void) { @@ -27,6 +44,504 @@ __noreturn void rust_helper_BUG(void) } EXPORT_SYMBOL_GPL(rust_helper_BUG); +refcount_t rust_helper_REFCOUNT_INIT(int n) +{ + return (refcount_t)REFCOUNT_INIT(n); +} +EXPORT_SYMBOL_GPL(rust_helper_REFCOUNT_INIT); + +void rust_helper_refcount_inc(refcount_t *r) +{ + refcount_inc(r); +} +EXPORT_SYMBOL_GPL(rust_helper_refcount_inc); + +bool rust_helper_refcount_dec_and_test(refcount_t *r) +{ + return refcount_dec_and_test(r); +} +EXPORT_SYMBOL_GPL(rust_helper_refcount_dec_and_test); + +__force void *rust_helper_ERR_PTR(long err) +{ + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(rust_helper_ERR_PTR); + +bool rust_helper_IS_ERR(__force const void *ptr) +{ + return IS_ERR(ptr); +} +EXPORT_SYMBOL_GPL(rust_helper_IS_ERR); + +long rust_helper_PTR_ERR(__force const void *ptr) +{ + return PTR_ERR(ptr); +} +EXPORT_SYMBOL_GPL(rust_helper_PTR_ERR); + +const char *rust_helper_errname(int err) +{ + return errname(err); +} +EXPORT_SYMBOL_GPL(rust_helper_errname); + +void rust_helper_xa_init_flags(struct xarray *xa, gfp_t flags) +{ + xa_init_flags(xa, flags); +} +EXPORT_SYMBOL_GPL(rust_helper_xa_init_flags); + +bool rust_helper_xa_empty(struct xarray *xa) +{ + return xa_empty(xa); +} +EXPORT_SYMBOL_GPL(rust_helper_xa_empty); + +int rust_helper_xa_alloc(struct xarray *xa, u32 *id, void *entry, struct xa_limit limit, gfp_t gfp) +{ + return xa_alloc(xa, id, entry, limit, gfp); +} +EXPORT_SYMBOL_GPL(rust_helper_xa_alloc); + +void rust_helper_xa_lock(struct xarray *xa) +{ + xa_lock(xa); +} +EXPORT_SYMBOL_GPL(rust_helper_xa_lock); + +void rust_helper_xa_unlock(struct xarray *xa) +{ + xa_unlock(xa); +} +EXPORT_SYMBOL_GPL(rust_helper_xa_unlock); + +int rust_helper_xa_err(void *entry) +{ + return xa_err(entry); +} +EXPORT_SYMBOL_GPL(rust_helper_xa_err); + +void *rust_helper_dev_get_drvdata(struct device *dev) +{ + return dev_get_drvdata(dev); +} +EXPORT_SYMBOL_GPL(rust_helper_dev_get_drvdata); + +const char *rust_helper_dev_name(const struct device *dev) +{ + return dev_name(dev); +} +EXPORT_SYMBOL_GPL(rust_helper_dev_name); + +struct task_struct *rust_helper_get_current(void) +{ + return current; +} +EXPORT_SYMBOL_GPL(rust_helper_get_current); + +int rust_helper_signal_pending(struct task_struct *t) +{ + return signal_pending(t); +} +EXPORT_SYMBOL_GPL(rust_helper_signal_pending); + +void rust_helper_init_wait(struct wait_queue_entry *wq_entry) +{ + init_wait(wq_entry); +} +EXPORT_SYMBOL_GPL(rust_helper_init_wait); + +unsigned long rust_helper_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return copy_from_user(to, from, n); +} +EXPORT_SYMBOL_GPL(rust_helper_copy_from_user); + +unsigned long rust_helper_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return copy_to_user(to, from, n); +} +EXPORT_SYMBOL_GPL(rust_helper_copy_to_user); + +unsigned long rust_helper_clear_user(void __user *to, unsigned long n) +{ + return clear_user(to, n); +} +EXPORT_SYMBOL_GPL(rust_helper_clear_user); + +void rust_helper_rcu_read_lock(void) +{ + rcu_read_lock(); +} +EXPORT_SYMBOL_GPL(rust_helper_rcu_read_lock); + +void rust_helper_rcu_read_unlock(void) +{ + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(rust_helper_rcu_read_unlock); + +void rust_helper_synchronize_rcu(void) +{ + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(rust_helper_synchronize_rcu); + +void __iomem *rust_helper_ioremap(resource_size_t offset, unsigned long size) +{ + return ioremap(offset, size); +} +EXPORT_SYMBOL_GPL(rust_helper_ioremap); + +void __iomem *rust_helper_ioremap_np(resource_size_t offset, unsigned long size) +{ + return ioremap_np(offset, size); +} +EXPORT_SYMBOL_GPL(rust_helper_ioremap_np); + +u8 rust_helper_readb(const volatile void __iomem *addr) +{ + return readb(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_readb); + +u16 rust_helper_readw(const volatile void __iomem *addr) +{ + return readw(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_readw); + +u32 rust_helper_readl(const volatile void __iomem *addr) +{ + return readl(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_readl); + +#ifdef CONFIG_64BIT +u64 rust_helper_readq(const volatile void __iomem *addr) +{ + return readq(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_readq); +#endif + +void rust_helper_writeb(u8 value, volatile void __iomem *addr) +{ + writeb(value, addr); +} +EXPORT_SYMBOL_GPL(rust_helper_writeb); + +void rust_helper_writew(u16 value, volatile void __iomem *addr) +{ + writew(value, addr); +} +EXPORT_SYMBOL_GPL(rust_helper_writew); + +void rust_helper_writel(u32 value, volatile void __iomem *addr) +{ + writel(value, addr); +} +EXPORT_SYMBOL_GPL(rust_helper_writel); + +#ifdef CONFIG_64BIT +void rust_helper_writeq(u64 value, volatile void __iomem *addr) +{ + writeq(value, addr); +} +EXPORT_SYMBOL_GPL(rust_helper_writeq); +#endif + +u8 rust_helper_readb_relaxed(const volatile void __iomem *addr) +{ + return readb_relaxed(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_readb_relaxed); + +u16 rust_helper_readw_relaxed(const volatile void __iomem *addr) +{ + return readw_relaxed(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_readw_relaxed); + +u32 rust_helper_readl_relaxed(const volatile void __iomem *addr) +{ + return readl_relaxed(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_readl_relaxed); + +#ifdef CONFIG_64BIT +u64 rust_helper_readq_relaxed(const volatile void __iomem *addr) +{ + return readq_relaxed(addr); +} +EXPORT_SYMBOL_GPL(rust_helper_readq_relaxed); +#endif + +void rust_helper_writeb_relaxed(u8 value, volatile void __iomem *addr) +{ + writeb_relaxed(value, addr); +} +EXPORT_SYMBOL_GPL(rust_helper_writeb_relaxed); + +void rust_helper_writew_relaxed(u16 value, volatile void __iomem *addr) +{ + writew_relaxed(value, addr); +} +EXPORT_SYMBOL_GPL(rust_helper_writew_relaxed); + +void rust_helper_writel_relaxed(u32 value, volatile void __iomem *addr) +{ + writel_relaxed(value, addr); +} +EXPORT_SYMBOL_GPL(rust_helper_writel_relaxed); + +#ifdef CONFIG_64BIT +void rust_helper_writeq_relaxed(u64 value, volatile void __iomem *addr) +{ + writeq_relaxed(value, addr); +} +EXPORT_SYMBOL_GPL(rust_helper_writeq_relaxed); +#endif + +void rust_helper_memcpy_fromio(void *to, const volatile void __iomem *from, long count) +{ + memcpy_fromio(to, from, count); +} +EXPORT_SYMBOL_GPL(rust_helper_memcpy_fromio); + +void * +rust_helper_platform_get_drvdata(const struct platform_device *pdev) +{ + return platform_get_drvdata(pdev); +} +EXPORT_SYMBOL_GPL(rust_helper_platform_get_drvdata); + +void +rust_helper_platform_set_drvdata(struct platform_device *pdev, + void *data) +{ + platform_set_drvdata(pdev, data); +} +EXPORT_SYMBOL_GPL(rust_helper_platform_set_drvdata); + +const struct of_device_id *rust_helper_of_match_device( + const struct of_device_id *matches, const struct device *dev) +{ + return of_match_device(matches, dev); +} +EXPORT_SYMBOL_GPL(rust_helper_of_match_device); + +bool rust_helper_of_node_is_root(const struct device_node *np) +{ + return of_node_is_root(np); +} +EXPORT_SYMBOL_GPL(rust_helper_of_node_is_root); + +struct device_node *rust_helper_of_parse_phandle(const struct device_node *np, + const char *phandle_name, + int index) +{ + return of_parse_phandle(np, phandle_name, index); +} +EXPORT_SYMBOL_GPL(rust_helper_of_parse_phandle); + +int rust_helper_dma_set_mask_and_coherent(struct device *dev, u64 mask) +{ + return dma_set_mask_and_coherent(dev, mask); +} +EXPORT_SYMBOL_GPL(rust_helper_dma_set_mask_and_coherent); + +resource_size_t rust_helper_resource_size(const struct resource *res) +{ + return resource_size(res); +} +EXPORT_SYMBOL_GPL(rust_helper_resource_size); + +void rust_helper_init_completion(struct completion *c) +{ + init_completion(c); +} +EXPORT_SYMBOL_GPL(rust_helper_init_completion); + +dma_addr_t rust_helper_sg_dma_address(const struct scatterlist *sg) +{ + return sg_dma_address(sg); +} +EXPORT_SYMBOL_GPL(rust_helper_sg_dma_address); + +int rust_helper_sg_dma_len(const struct scatterlist *sg) +{ + return sg_dma_len(sg); +} +EXPORT_SYMBOL_GPL(rust_helper_sg_dma_len); + +void rust_helper___spin_lock_init(spinlock_t *lock, const char *name, + struct lock_class_key *key) +{ +#ifdef CONFIG_DEBUG_SPINLOCK +# ifndef CONFIG_PREEMPT_RT + __raw_spin_lock_init(spinlock_check(lock), name, key, LD_WAIT_CONFIG); +# else + rt_mutex_base_init(&lock->lock); + __rt_spin_lock_init(lock, name, key, false); +# endif +#else + spin_lock_init(lock); +#endif +} +EXPORT_SYMBOL_GPL(rust_helper___spin_lock_init); + +unsigned long rust_helper_msecs_to_jiffies(const unsigned int m) +{ + return msecs_to_jiffies(m); +} +EXPORT_SYMBOL_GPL(rust_helper_msecs_to_jiffies); + +#ifdef CONFIG_DMA_SHARED_BUFFER + +void rust_helper_dma_fence_get(struct dma_fence *fence) +{ + dma_fence_get(fence); +} +EXPORT_SYMBOL_GPL(rust_helper_dma_fence_get); + +void rust_helper_dma_fence_put(struct dma_fence *fence) +{ + dma_fence_put(fence); +} +EXPORT_SYMBOL_GPL(rust_helper_dma_fence_put); + +struct dma_fence_chain *rust_helper_dma_fence_chain_alloc(void) +{ + return dma_fence_chain_alloc(); +} +EXPORT_SYMBOL_GPL(rust_helper_dma_fence_chain_alloc); + +void rust_helper_dma_fence_chain_free(struct dma_fence_chain *chain) +{ + dma_fence_chain_free(chain); +} +EXPORT_SYMBOL_GPL(rust_helper_dma_fence_chain_free); + +void rust_helper_dma_fence_set_error(struct dma_fence *fence, int error) +{ + dma_fence_set_error(fence, error); +} +EXPORT_SYMBOL_GPL(rust_helper_dma_fence_set_error); + +#endif + +#ifdef CONFIG_DRM + +void rust_helper_drm_gem_object_get(struct drm_gem_object *obj) +{ + drm_gem_object_get(obj); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_gem_object_get); + +void rust_helper_drm_gem_object_put(struct drm_gem_object *obj) +{ + drm_gem_object_put(obj); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_gem_object_put); + +__u64 rust_helper_drm_vma_node_offset_addr(struct drm_vma_offset_node *node) +{ + return drm_vma_node_offset_addr(node); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_vma_node_offset_addr); + +void rust_helper_drm_syncobj_get(struct drm_syncobj *obj) +{ + drm_syncobj_get(obj); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_syncobj_get); + +void rust_helper_drm_syncobj_put(struct drm_syncobj *obj) +{ + drm_syncobj_put(obj); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_syncobj_put); + +struct dma_fence *rust_helper_drm_syncobj_fence_get(struct drm_syncobj *syncobj) +{ + return drm_syncobj_fence_get(syncobj); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_syncobj_fence_get); + +#ifdef CONFIG_DRM_GEM_SHMEM_HELPER + +void rust_helper_drm_gem_shmem_object_free(struct drm_gem_object *obj) +{ + return drm_gem_shmem_object_free(obj); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_free); + +void rust_helper_drm_gem_shmem_object_print_info(struct drm_printer *p, unsigned int indent, + const struct drm_gem_object *obj) +{ + drm_gem_shmem_object_print_info(p, indent, obj); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_print_info); + +int rust_helper_drm_gem_shmem_object_pin(struct drm_gem_object *obj) +{ + return drm_gem_shmem_object_pin(obj); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_pin); + +void rust_helper_drm_gem_shmem_object_unpin(struct drm_gem_object *obj) +{ + drm_gem_shmem_object_unpin(obj); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_unpin); + +struct sg_table *rust_helper_drm_gem_shmem_object_get_sg_table(struct drm_gem_object *obj) +{ + return drm_gem_shmem_object_get_sg_table(obj); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_get_sg_table); + +int rust_helper_drm_gem_shmem_object_vmap(struct drm_gem_object *obj, + struct iosys_map *map) +{ + return drm_gem_shmem_object_vmap(obj, map); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_vmap); + +void rust_helper_drm_gem_shmem_object_vunmap(struct drm_gem_object *obj, + struct iosys_map *map) +{ + drm_gem_shmem_object_vunmap(obj, map); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_vunmap); + +int rust_helper_drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + return drm_gem_shmem_object_mmap(obj, vma); +} +EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_mmap); + +#endif +#endif + +#ifdef mutex_lock +void rust_helper_mutex_lock(struct mutex *lock) +{ + return mutex_lock(lock); +} +EXPORT_SYMBOL_GPL(rust_helper_mutex_lock); +#endif + +#ifdef mutex_unlock +void rust_helper_mutex_unlock(struct mutex *lock) +{ + return mutex_unlock(lock); +} +EXPORT_SYMBOL_GPL(rust_helper_mutex_unlock); +#endif + /* * We use `bindgen`'s `--size_t-is-usize` option to bind the C `size_t` type * as the Rust `usize` type, so we can use it in contexts where Rust diff --git a/rust/kernel/build_assert.rs b/rust/kernel/build_assert.rs index 659542393c09..970980827214 100644 --- a/rust/kernel/build_assert.rs +++ b/rust/kernel/build_assert.rs @@ -67,6 +67,7 @@ macro_rules! build_error { /// assert!(n > 1); // Run-time check /// } /// ``` +#[allow(rustdoc::broken_intra_doc_links)] #[macro_export] macro_rules! build_assert { ($cond:expr $(,)?) => {{ diff --git a/rust/kernel/delay.rs b/rust/kernel/delay.rs new file mode 100644 index 000000000000..1e987fa65941 --- /dev/null +++ b/rust/kernel/delay.rs @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Delay functions for operations like sleeping. +//! +//! C header: [`include/linux/delay.h`](../../../../include/linux/delay.h) + +use crate::bindings; +use core::{cmp::min, time::Duration}; + +const MILLIS_PER_SEC: u64 = 1_000; + +fn coarse_sleep_conversion(duration: Duration) -> core::ffi::c_uint { + let milli_as_nanos = Duration::MILLISECOND.subsec_nanos(); + + // Rounds the nanosecond component of `duration` up to the nearest millisecond. + let nanos_as_millis = duration.subsec_nanos().wrapping_add(milli_as_nanos - 1) / milli_as_nanos; + + // Saturates the second component of `duration` to `c_uint::MAX`. + let seconds_as_millis = min( + duration.as_secs().saturating_mul(MILLIS_PER_SEC), + u64::from(core::ffi::c_uint::MAX), + ) as core::ffi::c_uint; + + seconds_as_millis.saturating_add(nanos_as_millis) +} + +/// Sleeps safely even with waitqueue interruptions. +/// +/// This function forwards the call to the C side `msleep` function. As a result, +/// `duration` will be rounded up to the nearest millisecond if granularity less +/// than a millisecond is provided. Any [`Duration`] that exceeds +/// [`c_uint::MAX`][core::ffi::c_uint::MAX] in milliseconds is saturated. +/// +/// # Examples +/// +// Keep these in sync with `test_coarse_sleep_examples`. +/// ``` +/// # use core::time::Duration; +/// # use kernel::delay::coarse_sleep; +/// coarse_sleep(Duration::ZERO); // Equivalent to `msleep(0)`. +/// coarse_sleep(Duration::from_nanos(1)); // Equivalent to `msleep(1)`. +/// +/// coarse_sleep(Duration::from_nanos(1_000_000)); // Equivalent to `msleep(1)`. +/// coarse_sleep(Duration::from_nanos(1_000_001)); // Equivalent to `msleep(2)`. +/// coarse_sleep(Duration::from_nanos(1_999_999)); // Equivalent to `msleep(2)`. +/// +/// coarse_sleep(Duration::from_millis(1)); // Equivalent to `msleep(1)`. +/// coarse_sleep(Duration::from_millis(2)); // Equivalent to `msleep(2)`. +/// +/// coarse_sleep(Duration::from_secs(1)); // Equivalent to `msleep(1000)`. +/// coarse_sleep(Duration::new(1, 1)); // Equivalent to `msleep(1001)`. +/// coarse_sleep(Duration::new(1, 2)); // Equivalent to `msleep(1001)`. +/// ``` +pub fn coarse_sleep(duration: Duration) { + // SAFETY: `msleep` is safe for all values of its argument. + unsafe { bindings::msleep(coarse_sleep_conversion(duration)) } +} + +#[cfg(test)] +mod tests { + use super::{coarse_sleep_conversion, MILLIS_PER_SEC}; + use core::time::Duration; + + #[test] + fn test_coarse_sleep_examples() { + // Keep these in sync with `coarse_sleep`'s `# Examples` section. + + assert_eq!(coarse_sleep_conversion(Duration::ZERO), 0); + assert_eq!(coarse_sleep_conversion(Duration::from_nanos(1)), 1); + + assert_eq!(coarse_sleep_conversion(Duration::from_nanos(1_000_000)), 1); + assert_eq!(coarse_sleep_conversion(Duration::from_nanos(1_000_001)), 2); + assert_eq!(coarse_sleep_conversion(Duration::from_nanos(1_999_999)), 2); + + assert_eq!(coarse_sleep_conversion(Duration::from_millis(1)), 1); + assert_eq!(coarse_sleep_conversion(Duration::from_millis(2)), 2); + + assert_eq!(coarse_sleep_conversion(Duration::from_secs(1)), 1000); + assert_eq!(coarse_sleep_conversion(Duration::new(1, 1)), 1001); + assert_eq!(coarse_sleep_conversion(Duration::new(1, 2)), 1001); + } + + #[test] + fn test_coarse_sleep_saturation() { + assert!( + coarse_sleep_conversion(Duration::new( + core::ffi::c_uint::MAX as u64 / MILLIS_PER_SEC, + 0 + )) < core::ffi::c_uint::MAX + ); + assert_eq!( + coarse_sleep_conversion(Duration::new( + core::ffi::c_uint::MAX as u64 / MILLIS_PER_SEC, + 999_999_999 + )), + core::ffi::c_uint::MAX + ); + + assert_eq!( + coarse_sleep_conversion(Duration::MAX), + core::ffi::c_uint::MAX + ); + } +} diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs new file mode 100644 index 000000000000..c8b7d69b1426 --- /dev/null +++ b/rust/kernel/device.rs @@ -0,0 +1,536 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Generic devices that are part of the kernel's driver model. +//! +//! C header: [`include/linux/device.h`](../../../../include/linux/device.h) + +use crate::{ + bindings, + error::Result, + of, + revocable::{Revocable, RevocableGuard}, + str::CStr, + sync::{LockClassKey, NeedsLockClass, RevocableMutex, RevocableMutexGuard, UniqueArc}, +}; +use core::{ + fmt, + ops::{Deref, DerefMut}, + pin::Pin, +}; + +#[cfg(CONFIG_PRINTK)] +use crate::c_str; + +/// A raw device. +/// +/// # Safety +/// +/// Implementers must ensure that the `*mut device` returned by [`RawDevice::raw_device`] is +/// related to `self`, that is, actions on it will affect `self`. For example, if one calls +/// `get_device`, then the refcount on the device represented by `self` will be incremented. +/// +/// Additionally, implementers must ensure that the device is never renamed. Commit a5462516aa99 +/// ("driver-core: document restrictions on device_rename()") has details on why `device_rename` +/// should not be used. +pub unsafe trait RawDevice { + /// Returns the raw `struct device` related to `self`. + fn raw_device(&self) -> *mut bindings::device; + + /// Returns the name of the device. + fn name(&self) -> &CStr { + let ptr = self.raw_device(); + + // SAFETY: `ptr` is valid because `self` keeps it alive. + let name = unsafe { bindings::dev_name(ptr) }; + + // SAFETY: The name of the device remains valid while it is alive (because the device is + // never renamed, per the safety requirement of this trait). This is guaranteed to be the + // case because the reference to `self` outlives the one of the returned `CStr` (enforced + // by the compiler because of their lifetimes). + unsafe { CStr::from_char_ptr(name) } + } + + /// Gets the OpenFirmware node attached to this device + fn of_node(&self) -> Option<of::Node> { + let ptr = self.raw_device(); + + unsafe { of::Node::get_from_raw((*ptr).of_node) } + } + + /// Prints an emergency-level message (level 0) prefixed with device information. + /// + /// More details are available from [`dev_emerg`]. + /// + /// [`dev_emerg`]: crate::dev_emerg + fn pr_emerg(&self, args: fmt::Arguments<'_>) { + // SAFETY: `klevel` is null-terminated, uses one of the kernel constants. + unsafe { self.printk(bindings::KERN_EMERG, args) }; + } + + /// Prints an alert-level message (level 1) prefixed with device information. + /// + /// More details are available from [`dev_alert`]. + /// + /// [`dev_alert`]: crate::dev_alert + fn pr_alert(&self, args: fmt::Arguments<'_>) { + // SAFETY: `klevel` is null-terminated, uses one of the kernel constants. + unsafe { self.printk(bindings::KERN_ALERT, args) }; + } + + /// Prints a critical-level message (level 2) prefixed with device information. + /// + /// More details are available from [`dev_crit`]. + /// + /// [`dev_crit`]: crate::dev_crit + fn pr_crit(&self, args: fmt::Arguments<'_>) { + // SAFETY: `klevel` is null-terminated, uses one of the kernel constants. + unsafe { self.printk(bindings::KERN_CRIT, args) }; + } + + /// Prints an error-level message (level 3) prefixed with device information. + /// + /// More details are available from [`dev_err`]. + /// + /// [`dev_err`]: crate::dev_err + fn pr_err(&self, args: fmt::Arguments<'_>) { + // SAFETY: `klevel` is null-terminated, uses one of the kernel constants. + unsafe { self.printk(bindings::KERN_ERR, args) }; + } + + /// Prints a warning-level message (level 4) prefixed with device information. + /// + /// More details are available from [`dev_warn`]. + /// + /// [`dev_warn`]: crate::dev_warn + fn pr_warn(&self, args: fmt::Arguments<'_>) { + // SAFETY: `klevel` is null-terminated, uses one of the kernel constants. + unsafe { self.printk(bindings::KERN_WARNING, args) }; + } + + /// Prints a notice-level message (level 5) prefixed with device information. + /// + /// More details are available from [`dev_notice`]. + /// + /// [`dev_notice`]: crate::dev_notice + fn pr_notice(&self, args: fmt::Arguments<'_>) { + // SAFETY: `klevel` is null-terminated, uses one of the kernel constants. + unsafe { self.printk(bindings::KERN_NOTICE, args) }; + } + + /// Prints an info-level message (level 6) prefixed with device information. + /// + /// More details are available from [`dev_info`]. + /// + /// [`dev_info`]: crate::dev_info + fn pr_info(&self, args: fmt::Arguments<'_>) { + // SAFETY: `klevel` is null-terminated, uses one of the kernel constants. + unsafe { self.printk(bindings::KERN_INFO, args) }; + } + + /// Prints a debug-level message (level 7) prefixed with device information. + /// + /// More details are available from [`dev_dbg`]. + /// + /// [`dev_dbg`]: crate::dev_dbg + fn pr_dbg(&self, args: fmt::Arguments<'_>) { + if cfg!(debug_assertions) { + // SAFETY: `klevel` is null-terminated, uses one of the kernel constants. + unsafe { self.printk(bindings::KERN_DEBUG, args) }; + } + } + + /// Prints the provided message to the console. + /// + /// # Safety + /// + /// Callers must ensure that `klevel` is null-terminated; in particular, one of the + /// `KERN_*`constants, for example, `KERN_CRIT`, `KERN_ALERT`, etc. + #[cfg_attr(not(CONFIG_PRINTK), allow(unused_variables))] + unsafe fn printk(&self, klevel: &[u8], msg: fmt::Arguments<'_>) { + // SAFETY: `klevel` is null-terminated and one of the kernel constants. `self.raw_device` + // is valid because `self` is valid. The "%pA" format string expects a pointer to + // `fmt::Arguments`, which is what we're passing as the last argument. + #[cfg(CONFIG_PRINTK)] + unsafe { + bindings::_dev_printk( + klevel as *const _ as *const core::ffi::c_char, + self.raw_device(), + c_str!("%pA").as_char_ptr(), + &msg as *const _ as *const core::ffi::c_void, + ) + }; + } +} + +/// A ref-counted device. +/// +/// # Invariants +/// +/// `ptr` is valid, non-null, and has a non-zero reference count. One of the references is owned by +/// `self`, and will be decremented when `self` is dropped. +pub struct Device { + pub(crate) ptr: *mut bindings::device, +} + +// SAFETY: `Device` only holds a pointer to a C device, which is safe to be used from any thread. +unsafe impl Send for Device {} + +// SAFETY: `Device` only holds a pointer to a C device, references to which are safe to be used +// from any thread. +unsafe impl Sync for Device {} + +impl Device { + /// Creates a new device instance. + /// + /// # Safety + /// + /// Callers must ensure that `ptr` is valid, non-null, and has a non-zero reference count. + pub unsafe fn new(ptr: *mut bindings::device) -> Self { + // SAFETY: By the safety requirements, ptr is valid and its refcounted will be incremented. + unsafe { bindings::get_device(ptr) }; + // INVARIANT: The safety requirements satisfy all but one invariant, which is that `self` + // owns a reference. This is satisfied by the call to `get_device` above. + Self { ptr } + } + + /// Creates a new device instance from an existing [`RawDevice`] instance. + pub fn from_dev(dev: &dyn RawDevice) -> Self { + // SAFETY: The requirements are satisfied by the existence of `RawDevice` and its safety + // requirements. + unsafe { Self::new(dev.raw_device()) } + } +} + +// SAFETY: The device returned by `raw_device` is the one for which we hold a reference. +unsafe impl RawDevice for Device { + fn raw_device(&self) -> *mut bindings::device { + self.ptr + } +} + +impl Drop for Device { + fn drop(&mut self) { + // SAFETY: By the type invariants, we know that `self` owns a reference, so it is safe to + // relinquish it now. + unsafe { bindings::put_device(self.ptr) }; + } +} + +impl Clone for Device { + fn clone(&self) -> Self { + Device::from_dev(self) + } +} + +/// Device data. +/// +/// When a device is removed (for whatever reason, for example, because the device was unplugged or +/// because the user decided to unbind the driver), the driver is given a chance to clean its state +/// up, and all io resources should ideally not be used anymore. +/// +/// However, the device data is reference-counted because other subsystems hold pointers to it. So +/// some device state must be freed and not used anymore, while others must remain accessible. +/// +/// This struct separates the device data into three categories: +/// 1. Registrations: are destroyed when the device is removed, but before the io resources +/// become inaccessible. +/// 2. Io resources: are available until the device is removed. +/// 3. General data: remain available as long as the ref count is nonzero. +/// +/// This struct implements the `DeviceRemoval` trait so that it can clean resources up even if not +/// explicitly called by the device drivers. +pub struct Data<T, U, V> { + registrations: RevocableMutex<T>, + resources: Revocable<U>, + general: V, +} + +/// Safely creates an new reference-counted instance of [`Data`]. +#[doc(hidden)] +#[macro_export] +macro_rules! new_device_data { + ($reg:expr, $res:expr, $gen:expr, $name:literal) => {{ + static CLASS1: $crate::sync::LockClassKey = $crate::sync::LockClassKey::new(); + static CLASS2: $crate::sync::LockClassKey = $crate::sync::LockClassKey::new(); + let regs = $reg; + let res = $res; + let gen = $gen; + let name = $crate::c_str!($name); + $crate::device::Data::try_new(regs, res, gen, name, &CLASS1, &CLASS2) + }}; +} + +impl<T, U, V> Data<T, U, V> { + /// Creates a new instance of `Data`. + /// + /// It is recommended that the [`new_device_data`] macro be used as it automatically creates + /// the lock classes. + pub fn try_new( + registrations: T, + resources: U, + general: V, + name: &'static CStr, + key1: &'static LockClassKey, + key2: &'static LockClassKey, + ) -> Result<Pin<UniqueArc<Self>>> { + let mut ret = Pin::from(UniqueArc::try_new(Self { + // SAFETY: We call `RevocableMutex::init` below. + registrations: unsafe { RevocableMutex::new(registrations) }, + resources: Revocable::new(resources), + general, + })?); + + // SAFETY: `Data::registrations` is pinned when `Data` is. + let pinned = unsafe { ret.as_mut().map_unchecked_mut(|d| &mut d.registrations) }; + pinned.init(name, key1, key2); + Ok(ret) + } + + /// Returns the resources if they're still available. + pub fn resources(&self) -> Option<RevocableGuard<'_, U>> { + self.resources.try_access() + } + + /// Returns the locked registrations if they're still available. + pub fn registrations(&self) -> Option<RevocableMutexGuard<'_, T>> { + self.registrations.try_write() + } +} + +impl<T, U, V> crate::driver::DeviceRemoval for Data<T, U, V> { + fn device_remove(&self) { + // We revoke the registrations first so that resources are still available to them during + // unregistration. + self.registrations.revoke(); + + // Release resources now. General data remains available. + self.resources.revoke(); + } +} + +impl<T, U, V> Deref for Data<T, U, V> { + type Target = V; + + fn deref(&self) -> &V { + &self.general + } +} + +impl<T, U, V> DerefMut for Data<T, U, V> { + fn deref_mut(&mut self) -> &mut V { + &mut self.general + } +} + +#[doc(hidden)] +#[macro_export] +macro_rules! dev_printk { + ($method:ident, $dev:expr, $($f:tt)*) => { + { + // We have an explicity `use` statement here so that callers of this macro are not + // required to explicitly use the `RawDevice` trait to use its functions. + use $crate::device::RawDevice; + ($dev).$method(core::format_args!($($f)*)); + } + } +} + +/// Prints an emergency-level message (level 0) prefixed with device information. +/// +/// This level should be used if the system is unusable. +/// +/// Equivalent to the kernel's `dev_emerg` macro. +/// +/// Mimics the interface of [`std::print!`]. More information about the syntax is available from +/// [`core::fmt`] and [`alloc::format!`]. +/// +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::device::Device; +/// +/// fn example(dev: &Device) { +/// dev_emerg!(dev, "hello {}\n", "there"); +/// } +/// ``` +#[macro_export] +macro_rules! dev_emerg { + ($($f:tt)*) => { $crate::dev_printk!(pr_emerg, $($f)*); } +} + +/// Prints an alert-level message (level 1) prefixed with device information. +/// +/// This level should be used if action must be taken immediately. +/// +/// Equivalent to the kernel's `dev_alert` macro. +/// +/// Mimics the interface of [`std::print!`]. More information about the syntax is available from +/// [`core::fmt`] and [`alloc::format!`]. +/// +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::device::Device; +/// +/// fn example(dev: &Device) { +/// dev_alert!(dev, "hello {}\n", "there"); +/// } +/// ``` +#[macro_export] +macro_rules! dev_alert { + ($($f:tt)*) => { $crate::dev_printk!(pr_alert, $($f)*); } +} + +/// Prints a critical-level message (level 2) prefixed with device information. +/// +/// This level should be used in critical conditions. +/// +/// Equivalent to the kernel's `dev_crit` macro. +/// +/// Mimics the interface of [`std::print!`]. More information about the syntax is available from +/// [`core::fmt`] and [`alloc::format!`]. +/// +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::device::Device; +/// +/// fn example(dev: &Device) { +/// dev_crit!(dev, "hello {}\n", "there"); +/// } +/// ``` +#[macro_export] +macro_rules! dev_crit { + ($($f:tt)*) => { $crate::dev_printk!(pr_crit, $($f)*); } +} + +/// Prints an error-level message (level 3) prefixed with device information. +/// +/// This level should be used in error conditions. +/// +/// Equivalent to the kernel's `dev_err` macro. +/// +/// Mimics the interface of [`std::print!`]. More information about the syntax is available from +/// [`core::fmt`] and [`alloc::format!`]. +/// +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::device::Device; +/// +/// fn example(dev: &Device) { +/// dev_err!(dev, "hello {}\n", "there"); +/// } +/// ``` +#[macro_export] +macro_rules! dev_err { + ($($f:tt)*) => { $crate::dev_printk!(pr_err, $($f)*); } +} + +/// Prints a warning-level message (level 4) prefixed with device information. +/// +/// This level should be used in warning conditions. +/// +/// Equivalent to the kernel's `dev_warn` macro. +/// +/// Mimics the interface of [`std::print!`]. More information about the syntax is available from +/// [`core::fmt`] and [`alloc::format!`]. +/// +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::device::Device; +/// +/// fn example(dev: &Device) { +/// dev_warn!(dev, "hello {}\n", "there"); +/// } +/// ``` +#[macro_export] +macro_rules! dev_warn { + ($($f:tt)*) => { $crate::dev_printk!(pr_warn, $($f)*); } +} + +/// Prints a notice-level message (level 5) prefixed with device information. +/// +/// This level should be used in normal but significant conditions. +/// +/// Equivalent to the kernel's `dev_notice` macro. +/// +/// Mimics the interface of [`std::print!`]. More information about the syntax is available from +/// [`core::fmt`] and [`alloc::format!`]. +/// +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::device::Device; +/// +/// fn example(dev: &Device) { +/// dev_notice!(dev, "hello {}\n", "there"); +/// } +/// ``` +#[macro_export] +macro_rules! dev_notice { + ($($f:tt)*) => { $crate::dev_printk!(pr_notice, $($f)*); } +} + +/// Prints an info-level message (level 6) prefixed with device information. +/// +/// This level should be used for informational messages. +/// +/// Equivalent to the kernel's `dev_info` macro. +/// +/// Mimics the interface of [`std::print!`]. More information about the syntax is available from +/// [`core::fmt`] and [`alloc::format!`]. +/// +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::device::Device; +/// +/// fn example(dev: &Device) { +/// dev_info!(dev, "hello {}\n", "there"); +/// } +/// ``` +#[macro_export] +macro_rules! dev_info { + ($($f:tt)*) => { $crate::dev_printk!(pr_info, $($f)*); } +} + +/// Prints a debug-level message (level 7) prefixed with device information. +/// +/// This level should be used for debug messages. +/// +/// Equivalent to the kernel's `dev_dbg` macro, except that it doesn't support dynamic debug yet. +/// +/// Mimics the interface of [`std::print!`]. More information about the syntax is available from +/// [`core::fmt`] and [`alloc::format!`]. +/// +/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html +/// +/// # Examples +/// +/// ``` +/// # use kernel::device::Device; +/// +/// fn example(dev: &Device) { +/// dev_dbg!(dev, "hello {}\n", "there"); +/// } +/// ``` +#[macro_export] +macro_rules! dev_dbg { + ($($f:tt)*) => { $crate::dev_printk!(pr_dbg, $($f)*); } +} diff --git a/rust/kernel/dma_fence.rs b/rust/kernel/dma_fence.rs new file mode 100644 index 000000000000..ca93380d9da2 --- /dev/null +++ b/rust/kernel/dma_fence.rs @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! DMA fence abstraction. +//! +//! C header: [`include/linux/dma_fence.h`](../../include/linux/dma_fence.h) + +use crate::{ + bindings, + error::{to_result, Result}, + prelude::*, + sync::LockClassKey, + types::Opaque, +}; +use core::fmt::Write; +use core::ops::{Deref, DerefMut}; +use core::ptr::addr_of_mut; +use core::sync::atomic::{AtomicU64, Ordering}; + +/// Any kind of DMA Fence Object +/// +/// # Invariants +/// raw() returns a valid pointer to a dma_fence and we own a reference to it. +pub trait RawDmaFence: crate::private::Sealed { + /// Returns the raw `struct dma_fence` pointer. + fn raw(&self) -> *mut bindings::dma_fence; + + /// Returns the raw `struct dma_fence` pointer and consumes the object. + /// + /// The caller is responsible for dropping the reference. + fn into_raw(self) -> *mut bindings::dma_fence + where + Self: Sized, + { + let ptr = self.raw(); + core::mem::forget(self); + ptr + } + + /// Advances this fence to the chain node which will signal this sequence number. + /// If no sequence number is provided, this returns `self` again. + fn chain_find_seqno(self, seqno: u64) -> Result<Fence> + where + Self: Sized, + { + let mut ptr = self.into_raw(); + + // SAFETY: This will safely fail if this DmaFence is not a chain. + // `ptr` is valid per the type invariant. + let ret = unsafe { bindings::dma_fence_chain_find_seqno(&mut ptr, seqno) }; + + if ret != 0 { + // SAFETY: This is either an owned reference or NULL, dma_fence_put can handle both. + unsafe { bindings::dma_fence_put(ptr) }; + Err(Error::from_kernel_errno(ret)) + } else if ptr.is_null() { + Err(EINVAL) // When can this happen? + } else { + // SAFETY: ptr is valid and non-NULL as checked above. + Ok(unsafe { Fence::from_raw(ptr) }) + } + } + + /// Signal completion of this fence + fn signal(&self) -> Result { + to_result(unsafe { bindings::dma_fence_signal(self.raw()) }) + } + + /// Set the error flag on this fence + fn set_error(&self, err: Error) { + unsafe { bindings::dma_fence_set_error(self.raw(), err.to_kernel_errno()) }; + } +} + +/// A generic DMA Fence Object +/// +/// # Invariants +/// ptr is a valid pointer to a dma_fence and we own a reference to it. +pub struct Fence { + ptr: *mut bindings::dma_fence, +} + +impl Fence { + /// Create a new Fence object from a raw pointer to a dma_fence. + /// + /// # Safety + /// The caller must own a reference to the dma_fence, which is transferred to the new object. + pub(crate) unsafe fn from_raw(ptr: *mut bindings::dma_fence) -> Fence { + Fence { ptr } + } + + /// Create a new Fence object from a raw pointer to a dma_fence. + /// + /// # Safety + /// Takes a borrowed reference to the dma_fence, and increments the reference count. + pub(crate) unsafe fn get_raw(ptr: *mut bindings::dma_fence) -> Fence { + // SAFETY: Pointer is valid per the safety contract + unsafe { bindings::dma_fence_get(ptr) }; + Fence { ptr } + } + + /// Create a new Fence object from a RawDmaFence. + pub fn from_fence(fence: &dyn RawDmaFence) -> Fence { + // SAFETY: Pointer is valid per the RawDmaFence contract + unsafe { Self::get_raw(fence.raw()) } + } +} + +impl crate::private::Sealed for Fence {} + +impl RawDmaFence for Fence { + fn raw(&self) -> *mut bindings::dma_fence { + self.ptr + } +} + +impl Drop for Fence { + fn drop(&mut self) { + // SAFETY: We own a reference to this syncobj. + unsafe { bindings::dma_fence_put(self.ptr) }; + } +} + +impl Clone for Fence { + fn clone(&self) -> Self { + // SAFETY: `ptr` is valid per the type invariant and we own a reference to it. + unsafe { + bindings::dma_fence_get(self.ptr); + Self::from_raw(self.ptr) + } + } +} + +unsafe impl Sync for Fence {} +unsafe impl Send for Fence {} + +/// Trait which must be implemented by driver-specific fence objects. +#[vtable] +pub trait FenceOps: Sized + Send + Sync { + /// True if this dma_fence implementation uses 64bit seqno, false otherwise. + const USE_64BIT_SEQNO: bool; + + /// Returns the driver name. This is a callback to allow drivers to compute the name at + /// runtime, without having it to store permanently for each fence, or build a cache of + /// some sort. + fn get_driver_name<'a>(self: &'a FenceObject<Self>) -> &'a CStr; + + /// Return the name of the context this fence belongs to. This is a callback to allow drivers + /// to compute the name at runtime, without having it to store permanently for each fence, or + /// build a cache of some sort. + fn get_timeline_name<'a>(self: &'a FenceObject<Self>) -> &'a CStr; + + /// Enable software signaling of fence. + fn enable_signaling(self: &FenceObject<Self>) -> bool { + false + } + + /// Peek whether the fence is signaled, as a fastpath optimization for e.g. dma_fence_wait() or + /// dma_fence_add_callback(). + fn signaled(self: &FenceObject<Self>) -> bool { + false + } + + /// Callback to fill in free-form debug info specific to this fence, like the sequence number. + fn fence_value_str(self: &FenceObject<Self>, _output: &mut dyn Write) {} + + /// Fills in the current value of the timeline as a string, like the sequence number. Note that + /// the specific fence passed to this function should not matter, drivers should only use it to + /// look up the corresponding timeline structures. + fn timeline_value_str(self: &FenceObject<Self>, _output: &mut dyn Write) {} +} + +unsafe extern "C" fn get_driver_name_cb<T: FenceOps>( + fence: *mut bindings::dma_fence, +) -> *const core::ffi::c_char { + // SAFETY: All of our fences are FenceObject<T>. + let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>; + + // SAFETY: The caller is responsible for passing a valid dma_fence subtype + T::get_driver_name(unsafe { &mut *p }).as_char_ptr() +} + +unsafe extern "C" fn get_timeline_name_cb<T: FenceOps>( + fence: *mut bindings::dma_fence, +) -> *const core::ffi::c_char { + // SAFETY: All of our fences are FenceObject<T>. + let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>; + + // SAFETY: The caller is responsible for passing a valid dma_fence subtype + T::get_timeline_name(unsafe { &mut *p }).as_char_ptr() +} + +unsafe extern "C" fn enable_signaling_cb<T: FenceOps>(fence: *mut bindings::dma_fence) -> bool { + // SAFETY: All of our fences are FenceObject<T>. + let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>; + + // SAFETY: The caller is responsible for passing a valid dma_fence subtype + T::enable_signaling(unsafe { &mut *p }) +} + +unsafe extern "C" fn signaled_cb<T: FenceOps>(fence: *mut bindings::dma_fence) -> bool { + // SAFETY: All of our fences are FenceObject<T>. + let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>; + + // SAFETY: The caller is responsible for passing a valid dma_fence subtype + T::signaled(unsafe { &mut *p }) +} + +unsafe extern "C" fn release_cb<T: FenceOps>(fence: *mut bindings::dma_fence) { + // SAFETY: All of our fences are FenceObject<T>. + let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>; + + // SAFETY: p is never used after this + unsafe { + core::ptr::drop_in_place(&mut (*p).inner); + } + + // SAFETY: All of our fences are allocated using kmalloc, so this is safe. + unsafe { bindings::dma_fence_free(fence) }; +} + +unsafe extern "C" fn fence_value_str_cb<T: FenceOps>( + fence: *mut bindings::dma_fence, + string: *mut core::ffi::c_char, + size: core::ffi::c_int, +) { + let size: usize = size.try_into().unwrap_or(0); + + if size == 0 { + return; + } + + // SAFETY: All of our fences are FenceObject<T>. + let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>; + + // SAFETY: The caller is responsible for the validity of string/size + let mut f = unsafe { crate::str::Formatter::from_buffer(string as *mut _, size) }; + + // SAFETY: The caller is responsible for passing a valid dma_fence subtype + T::fence_value_str(unsafe { &mut *p }, &mut f); + let _ = f.write_str("\0"); + + // SAFETY: `size` is at least 1 per the check above + unsafe { *string.add(size - 1) = 0 }; +} + +unsafe extern "C" fn timeline_value_str_cb<T: FenceOps>( + fence: *mut bindings::dma_fence, + string: *mut core::ffi::c_char, + size: core::ffi::c_int, +) { + let size: usize = size.try_into().unwrap_or(0); + + if size == 0 { + return; + } + + // SAFETY: All of our fences are FenceObject<T>. + let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>; + + // SAFETY: The caller is responsible for the validity of string/size + let mut f = unsafe { crate::str::Formatter::from_buffer(string as *mut _, size) }; + + // SAFETY: The caller is responsible for passing a valid dma_fence subtype + T::timeline_value_str(unsafe { &mut *p }, &mut f); + let _ = f.write_str("\0"); + + // SAFETY: `size` is at least 1 per the check above + unsafe { *string.add(size - 1) = 0 }; +} + +// Allow FenceObject<Self> to be used as a self argument, for ergonomics +impl<T: FenceOps> core::ops::Receiver for FenceObject<T> {} + +/// A driver-specific DMA Fence Object +/// +/// # Invariants +/// ptr is a valid pointer to a dma_fence and we own a reference to it. +#[repr(C)] +pub struct FenceObject<T: FenceOps> { + fence: bindings::dma_fence, + lock: Opaque<bindings::spinlock>, + inner: T, +} + +impl<T: FenceOps> FenceObject<T> { + const SIZE: usize = core::mem::size_of::<Self>(); + + const VTABLE: bindings::dma_fence_ops = bindings::dma_fence_ops { + use_64bit_seqno: T::USE_64BIT_SEQNO, + get_driver_name: Some(get_driver_name_cb::<T>), + get_timeline_name: Some(get_timeline_name_cb::<T>), + enable_signaling: if T::HAS_ENABLE_SIGNALING { + Some(enable_signaling_cb::<T>) + } else { + None + }, + signaled: if T::HAS_SIGNALED { + Some(signaled_cb::<T>) + } else { + None + }, + wait: None, // Deprecated + release: Some(release_cb::<T>), + fence_value_str: if T::HAS_FENCE_VALUE_STR { + Some(fence_value_str_cb::<T>) + } else { + None + }, + timeline_value_str: if T::HAS_TIMELINE_VALUE_STR { + Some(timeline_value_str_cb::<T>) + } else { + None + }, + }; +} + +impl<T: FenceOps> Deref for FenceObject<T> { + type Target = T; + + fn deref(&self) -> &T { + &self.inner + } +} + +impl<T: FenceOps> DerefMut for FenceObject<T> { + fn deref_mut(&mut self) -> &mut T { + &mut self.inner + } +} + +impl<T: FenceOps> crate::private::Sealed for FenceObject<T> {} +impl<T: FenceOps> RawDmaFence for FenceObject<T> { + fn raw(&self) -> *mut bindings::dma_fence { + &self.fence as *const _ as *mut _ + } +} + +/// A unique reference to a driver-specific fence object +pub struct UniqueFence<T: FenceOps>(*mut FenceObject<T>); + +impl<T: FenceOps> Deref for UniqueFence<T> { + type Target = FenceObject<T>; + + fn deref(&self) -> &FenceObject<T> { + unsafe { &*self.0 } + } +} + +impl<T: FenceOps> DerefMut for UniqueFence<T> { + fn deref_mut(&mut self) -> &mut FenceObject<T> { + unsafe { &mut *self.0 } + } +} + +impl<T: FenceOps> crate::private::Sealed for UniqueFence<T> {} +impl<T: FenceOps> RawDmaFence for UniqueFence<T> { + fn raw(&self) -> *mut bindings::dma_fence { + unsafe { addr_of_mut!((*self.0).fence) } + } +} + +impl<T: FenceOps> From<UniqueFence<T>> for UserFence<T> { + fn from(value: UniqueFence<T>) -> Self { + let ptr = value.0; + core::mem::forget(value); + + UserFence(ptr) + } +} + +impl<T: FenceOps> Drop for UniqueFence<T> { + fn drop(&mut self) { + // SAFETY: We own a reference to this fence. + unsafe { bindings::dma_fence_put(self.raw()) }; + } +} + +unsafe impl<T: FenceOps> Sync for UniqueFence<T> {} +unsafe impl<T: FenceOps> Send for UniqueFence<T> {} + +/// A shared reference to a driver-specific fence object +pub struct UserFence<T: FenceOps>(*mut FenceObject<T>); + +impl<T: FenceOps> Deref for UserFence<T> { + type Target = FenceObject<T>; + + fn deref(&self) -> &FenceObject<T> { + unsafe { &*self.0 } + } +} + +impl<T: FenceOps> Clone for UserFence<T> { + fn clone(&self) -> Self { + // SAFETY: `ptr` is valid per the type invariant and we own a reference to it. + unsafe { + bindings::dma_fence_get(self.raw()); + Self(self.0) + } + } +} + +impl<T: FenceOps> crate::private::Sealed for UserFence<T> {} +impl<T: FenceOps> RawDmaFence for UserFence<T> { + fn raw(&self) -> *mut bindings::dma_fence { + unsafe { addr_of_mut!((*self.0).fence) } + } +} + +impl<T: FenceOps> Drop for UserFence<T> { + fn drop(&mut self) { + // SAFETY: We own a reference to this fence. + unsafe { bindings::dma_fence_put(self.raw()) }; + } +} + +unsafe impl<T: FenceOps> Sync for UserFence<T> {} +unsafe impl<T: FenceOps> Send for UserFence<T> {} + +/// An array of fence contexts, out of which fences can be created. +pub struct FenceContexts { + start: u64, + count: u32, + seqnos: Vec<AtomicU64>, + lock_name: &'static CStr, + lock_key: &'static LockClassKey, +} + +impl FenceContexts { + /// Create a new set of fence contexts. + pub fn new( + count: u32, + name: &'static CStr, + key: &'static LockClassKey, + ) -> Result<FenceContexts> { + let mut seqnos: Vec<AtomicU64> = Vec::new(); + + seqnos.try_reserve(count as usize)?; + + for _ in 0..count { + seqnos.try_push(Default::default())?; + } + + let start = unsafe { bindings::dma_fence_context_alloc(count as core::ffi::c_uint) }; + + Ok(FenceContexts { + start, + count, + seqnos, + lock_name: name, + lock_key: key, + }) + } + + /// Create a new fence in a given context index. + pub fn new_fence<T: FenceOps>(&self, context: u32, inner: T) -> Result<UniqueFence<T>> { + if context > self.count { + return Err(EINVAL); + } + + let p = unsafe { + bindings::krealloc( + core::ptr::null_mut(), + FenceObject::<T>::SIZE, + bindings::GFP_KERNEL | bindings::__GFP_ZERO, + ) as *mut FenceObject<T> + }; + + if p.is_null() { + return Err(ENOMEM); + } + + let seqno = self.seqnos[context as usize].fetch_add(1, Ordering::Relaxed); + + // SAFETY: The pointer is valid, so pointers to members are too. + // After this, all fields are initialized. + unsafe { + addr_of_mut!((*p).inner).write(inner); + bindings::__spin_lock_init( + addr_of_mut!((*p).lock) as *mut _, + self.lock_name.as_char_ptr(), + self.lock_key.get(), + ); + bindings::dma_fence_init( + addr_of_mut!((*p).fence), + &FenceObject::<T>::VTABLE, + addr_of_mut!((*p).lock) as *mut _, + self.start + context as u64, + seqno, + ); + }; + + Ok(UniqueFence(p)) + } +} + +/// A DMA Fence Chain Object +/// +/// # Invariants +/// ptr is a valid pointer to a dma_fence_chain which we own. +pub struct FenceChain { + ptr: *mut bindings::dma_fence_chain, +} + +impl FenceChain { + /// Create a new DmaFenceChain object. + pub fn new() -> Result<Self> { + // SAFETY: This function is safe to call and takes no arguments. + let ptr = unsafe { bindings::dma_fence_chain_alloc() }; + + if ptr.is_null() { + Err(ENOMEM) + } else { + Ok(FenceChain { ptr }) + } + } + + /// Convert the DmaFenceChain into the underlying raw pointer. + /// + /// This assumes the caller will take ownership of the object. + pub(crate) fn into_raw(self) -> *mut bindings::dma_fence_chain { + let ptr = self.ptr; + core::mem::forget(self); + ptr + } +} + +impl Drop for FenceChain { + fn drop(&mut self) { + // SAFETY: We own this dma_fence_chain. + unsafe { bindings::dma_fence_chain_free(self.ptr) }; + } +} diff --git a/rust/kernel/driver.rs b/rust/kernel/driver.rs new file mode 100644 index 000000000000..aa1441ae809b --- /dev/null +++ b/rust/kernel/driver.rs @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Generic support for drivers of different buses (e.g., PCI, Platform, Amba, etc.). +//! +//! Each bus/subsystem is expected to implement [`DriverOps`], which allows drivers to register +//! using the [`Registration`] class. + +use crate::{error::code::*, error::Result, str::CStr, sync::Arc, ThisModule}; +use alloc::boxed::Box; +use core::{cell::UnsafeCell, marker::PhantomData, ops::Deref, pin::Pin}; + +/// A subsystem (e.g., PCI, Platform, Amba, etc.) that allows drivers to be written for it. +pub trait DriverOps { + /// The type that holds information about the registration. This is typically a struct defined + /// by the C portion of the kernel. + type RegType: Default; + + /// Registers a driver. + /// + /// # Safety + /// + /// `reg` must point to valid, initialised, and writable memory. It may be modified by this + /// function to hold registration state. + /// + /// On success, `reg` must remain pinned and valid until the matching call to + /// [`DriverOps::unregister`]. + unsafe fn register( + reg: *mut Self::RegType, + name: &'static CStr, + module: &'static ThisModule, + ) -> Result; + + /// Unregisters a driver previously registered with [`DriverOps::register`]. + /// + /// # Safety + /// + /// `reg` must point to valid writable memory, initialised by a previous successful call to + /// [`DriverOps::register`]. + unsafe fn unregister(reg: *mut Self::RegType); +} + +/// The registration of a driver. +pub struct Registration<T: DriverOps> { + is_registered: bool, + concrete_reg: UnsafeCell<T::RegType>, +} + +// SAFETY: `Registration` has no fields or methods accessible via `&Registration`, so it is safe to +// share references to it with multiple threads as nothing can be done. +unsafe impl<T: DriverOps> Sync for Registration<T> {} + +impl<T: DriverOps> Registration<T> { + /// Creates a new instance of the registration object. + pub fn new() -> Self { + Self { + is_registered: false, + concrete_reg: UnsafeCell::new(T::RegType::default()), + } + } + + /// Allocates a pinned registration object and registers it. + /// + /// Returns a pinned heap-allocated representation of the registration. + pub fn new_pinned(name: &'static CStr, module: &'static ThisModule) -> Result<Pin<Box<Self>>> { + let mut reg = Pin::from(Box::try_new(Self::new())?); + reg.as_mut().register(name, module)?; + Ok(reg) + } + + /// Registers a driver with its subsystem. + /// + /// It must be pinned because the memory block that represents the registration is potentially + /// self-referential. + pub fn register( + self: Pin<&mut Self>, + name: &'static CStr, + module: &'static ThisModule, + ) -> Result { + // SAFETY: We never move out of `this`. + let this = unsafe { self.get_unchecked_mut() }; + if this.is_registered { + // Already registered. + return Err(EINVAL); + } + + // SAFETY: `concrete_reg` was initialised via its default constructor. It is only freed + // after `Self::drop` is called, which first calls `T::unregister`. + unsafe { T::register(this.concrete_reg.get(), name, module) }?; + + this.is_registered = true; + Ok(()) + } +} + +impl<T: DriverOps> Default for Registration<T> { + fn default() -> Self { + Self::new() + } +} + +impl<T: DriverOps> Drop for Registration<T> { + fn drop(&mut self) { + if self.is_registered { + // SAFETY: This path only runs if a previous call to `T::register` completed + // successfully. + unsafe { T::unregister(self.concrete_reg.get()) }; + } + } +} + +/// Conversion from a device id to a raw device id. +/// +/// This is meant to be implemented by buses/subsystems so that they can use [`IdTable`] to +/// guarantee (at compile-time) zero-termination of device id tables provided by drivers. +/// +/// # Safety +/// +/// Implementers must ensure that: +/// - [`RawDeviceId::ZERO`] is actually a zeroed-out version of the raw device id. +/// - [`RawDeviceId::to_rawid`] stores `offset` in the context/data field of the raw device id so +/// that buses can recover the pointer to the data. +#[const_trait] +pub unsafe trait RawDeviceId { + /// The raw type that holds the device id. + /// + /// Id tables created from [`Self`] are going to hold this type in its zero-terminated array. + type RawType: Copy; + + /// A zeroed-out representation of the raw device id. + /// + /// Id tables created from [`Self`] use [`Self::ZERO`] as the sentinel to indicate the end of + /// the table. + const ZERO: Self::RawType; + + /// Converts an id into a raw id. + /// + /// `offset` is the offset from the memory location where the raw device id is stored to the + /// location where its associated context information is stored. Implementations must store + /// this in the appropriate context/data field of the raw type. + fn to_rawid(&self, offset: isize) -> Self::RawType; +} + +/// A zero-terminated device id array. +#[derive(Copy, Clone)] +#[repr(C)] +pub struct IdArrayIds<T: RawDeviceId, const N: usize> { + ids: [T::RawType; N], + sentinel: T::RawType, +} + +unsafe impl<T: RawDeviceId, const N: usize> Sync for IdArrayIds<T, N> {} + +/// A zero-terminated device id array, followed by context data. +#[repr(C)] +pub struct IdArray<T: RawDeviceId, U, const N: usize> { + ids: IdArrayIds<T, N>, + id_infos: [Option<U>; N], +} + +impl<T: RawDeviceId, U, const N: usize> IdArray<T, U, N> { + /// Creates a new instance of the array. + /// + /// The contents are derived from the given identifiers and context information. + pub const fn new(ids: [T; N], infos: [Option<U>; N]) -> Self + where + T: ~const RawDeviceId + Copy, + T::RawType: Copy + Clone, + { + let mut array = Self { + ids: IdArrayIds { + ids: [T::ZERO; N], + sentinel: T::ZERO, + }, + id_infos: infos, + }; + let mut i = 0usize; + while i < N { + // SAFETY: Both pointers are within `array` (or one byte beyond), consequently they are + // derived from the same allocated object. We are using a `u8` pointer, whose size 1, + // so the pointers are necessarily 1-byte aligned. + let offset = unsafe { + (&array.id_infos[i] as *const _ as *const u8) + .offset_from(&array.ids.ids[i] as *const _ as _) + }; + array.ids.ids[i] = ids[i].to_rawid(offset); + i += 1; + } + array + } + + /// Returns an `IdTable` backed by `self`. + /// + /// This is used to essentially erase the array size. + pub const fn as_table(&self) -> IdTable<'_, T, U> { + IdTable { + first: &self.ids.ids[0], + _p: PhantomData, + } + } + + /// Returns the number of items in the ID table. + pub const fn count(&self) -> usize { + self.ids.ids.len() + } + + /// Returns the inner IdArrayIds array, without the context data. + pub const fn as_ids(&self) -> IdArrayIds<T, N> + where + T: ~const RawDeviceId + Copy, + { + self.ids + } +} + +/// A device id table. +/// +/// The table is guaranteed to be zero-terminated and to be followed by an array of context data of +/// type `Option<U>`. +pub struct IdTable<'a, T: RawDeviceId, U> { + first: &'a T::RawType, + _p: PhantomData<&'a U>, +} + +impl<T: RawDeviceId, U> const AsRef<T::RawType> for IdTable<'_, T, U> { + fn as_ref(&self) -> &T::RawType { + self.first + } +} + +/// Counts the number of parenthesis-delimited, comma-separated items. +/// +/// # Examples +/// +/// ``` +/// # use kernel::count_paren_items; +/// +/// assert_eq!(0, count_paren_items!()); +/// assert_eq!(1, count_paren_items!((A))); +/// assert_eq!(1, count_paren_items!((A),)); +/// assert_eq!(2, count_paren_items!((A), (B))); +/// assert_eq!(2, count_paren_items!((A), (B),)); +/// assert_eq!(3, count_paren_items!((A), (B), (C))); +/// assert_eq!(3, count_paren_items!((A), (B), (C),)); +/// ``` +#[macro_export] +macro_rules! count_paren_items { + (($($item:tt)*), $($remaining:tt)*) => { 1 + $crate::count_paren_items!($($remaining)*) }; + (($($item:tt)*)) => { 1 }; + () => { 0 }; +} + +/// Converts a comma-separated list of pairs into an array with the first element. That is, it +/// discards the second element of the pair. +/// +/// Additionally, it automatically introduces a type if the first element is warpped in curly +/// braces, for example, if it's `{v: 10}`, it becomes `X { v: 10 }`; this is to avoid repeating +/// the type. +/// +/// # Examples +/// +/// ``` +/// # use kernel::first_item; +/// +/// #[derive(PartialEq, Debug)] +/// struct X { +/// v: u32, +/// } +/// +/// assert_eq!([] as [X; 0], first_item!(X, )); +/// assert_eq!([X { v: 10 }], first_item!(X, ({ v: 10 }, Y))); +/// assert_eq!([X { v: 10 }], first_item!(X, ({ v: 10 }, Y),)); +/// assert_eq!([X { v: 10 }], first_item!(X, (X { v: 10 }, Y))); +/// assert_eq!([X { v: 10 }], first_item!(X, (X { v: 10 }, Y),)); +/// assert_eq!([X { v: 10 }, X { v: 20 }], first_item!(X, ({ v: 10 }, Y), ({ v: 20 }, Y))); +/// assert_eq!([X { v: 10 }, X { v: 20 }], first_item!(X, ({ v: 10 }, Y), ({ v: 20 }, Y),)); +/// assert_eq!([X { v: 10 }, X { v: 20 }], first_item!(X, (X { v: 10 }, Y), (X { v: 20 }, Y))); +/// assert_eq!([X { v: 10 }, X { v: 20 }], first_item!(X, (X { v: 10 }, Y), (X { v: 20 }, Y),)); +/// assert_eq!([X { v: 10 }, X { v: 20 }, X { v: 30 }], +/// first_item!(X, ({ v: 10 }, Y), ({ v: 20 }, Y), ({v: 30}, Y))); +/// assert_eq!([X { v: 10 }, X { v: 20 }, X { v: 30 }], +/// first_item!(X, ({ v: 10 }, Y), ({ v: 20 }, Y), ({v: 30}, Y),)); +/// assert_eq!([X { v: 10 }, X { v: 20 }, X { v: 30 }], +/// first_item!(X, (X { v: 10 }, Y), (X { v: 20 }, Y), (X {v: 30}, Y))); +/// assert_eq!([X { v: 10 }, X { v: 20 }, X { v: 30 }], +/// first_item!(X, (X { v: 10 }, Y), (X { v: 20 }, Y), (X {v: 30}, Y),)); +/// ``` +#[macro_export] +macro_rules! first_item { + ($id_type:ty, $(({$($first:tt)*}, $second:expr)),* $(,)?) => { + { + type IdType = $id_type; + [$(IdType{$($first)*},)*] + } + }; + ($id_type:ty, $(($first:expr, $second:expr)),* $(,)?) => { [$($first,)*] }; +} + +/// Converts a comma-separated list of pairs into an array with the second element. That is, it +/// discards the first element of the pair. +/// +/// # Examples +/// +/// ``` +/// # use kernel::second_item; +/// +/// assert_eq!([] as [u32; 0], second_item!()); +/// assert_eq!([10u32], second_item!((X, 10u32))); +/// assert_eq!([10u32], second_item!((X, 10u32),)); +/// assert_eq!([10u32], second_item!(({ X }, 10u32))); +/// assert_eq!([10u32], second_item!(({ X }, 10u32),)); +/// assert_eq!([10u32, 20], second_item!((X, 10u32), (X, 20))); +/// assert_eq!([10u32, 20], second_item!((X, 10u32), (X, 20),)); +/// assert_eq!([10u32, 20], second_item!(({ X }, 10u32), ({ X }, 20))); +/// assert_eq!([10u32, 20], second_item!(({ X }, 10u32), ({ X }, 20),)); +/// assert_eq!([10u32, 20, 30], second_item!((X, 10u32), (X, 20), (X, 30))); +/// assert_eq!([10u32, 20, 30], second_item!((X, 10u32), (X, 20), (X, 30),)); +/// assert_eq!([10u32, 20, 30], second_item!(({ X }, 10u32), ({ X }, 20), ({ X }, 30))); +/// assert_eq!([10u32, 20, 30], second_item!(({ X }, 10u32), ({ X }, 20), ({ X }, 30),)); +/// ``` +#[macro_export] +macro_rules! second_item { + ($(({$($first:tt)*}, $second:expr)),* $(,)?) => { [$($second,)*] }; + ($(($first:expr, $second:expr)),* $(,)?) => { [$($second,)*] }; +} + +/// Defines a new constant [`IdArray`] with a concise syntax. +/// +/// It is meant to be used by buses and subsystems to create a similar macro with their device id +/// type already specified, i.e., with fewer parameters to the end user. +/// +/// # Examples +/// +// TODO: Exported but not usable by kernel modules (requires `const_trait_impl`). +/// ```ignore +/// #![feature(const_trait_impl)] +/// # use kernel::{define_id_array, driver::RawDeviceId}; +/// +/// #[derive(Copy, Clone)] +/// struct Id(u32); +/// +/// // SAFETY: `ZERO` is all zeroes and `to_rawid` stores `offset` as the second element of the raw +/// // device id pair. +/// unsafe impl const RawDeviceId for Id { +/// type RawType = (u64, isize); +/// const ZERO: Self::RawType = (0, 0); +/// fn to_rawid(&self, offset: isize) -> Self::RawType { +/// (self.0 as u64 + 1, offset) +/// } +/// } +/// +/// define_id_array!(A1, Id, (), []); +/// define_id_array!(A2, Id, &'static [u8], [(Id(10), None)]); +/// define_id_array!(A3, Id, &'static [u8], [(Id(10), Some(b"id1")), ]); +/// define_id_array!(A4, Id, &'static [u8], [(Id(10), Some(b"id1")), (Id(20), Some(b"id2"))]); +/// define_id_array!(A5, Id, &'static [u8], [(Id(10), Some(b"id1")), (Id(20), Some(b"id2")), ]); +/// define_id_array!(A6, Id, &'static [u8], [(Id(10), None), (Id(20), Some(b"id2")), ]); +/// define_id_array!(A7, Id, &'static [u8], [(Id(10), Some(b"id1")), (Id(20), None), ]); +/// define_id_array!(A8, Id, &'static [u8], [(Id(10), None), (Id(20), None), ]); +/// +/// // Within a bus driver: +/// driver_id_table!(BUS_ID_TABLE, Id, &'static [u8], A1); +/// // At the top level: +/// module_id_table!(MODULE_ID_TABLE, "mybus", Id, A1); +/// ``` +#[macro_export] +macro_rules! define_id_array { + ($table_name:ident, $id_type:ty, $data_type:ty, [ $($t:tt)* ]) => { + const $table_name: + $crate::driver::IdArray<$id_type, $data_type, { $crate::count_paren_items!($($t)*) }> = + $crate::driver::IdArray::new( + $crate::first_item!($id_type, $($t)*), $crate::second_item!($($t)*)); + }; +} + +/// Declares an [`IdArray`] as an [`IdTable`] for a bus driver with a concise syntax. +/// +/// It is meant to be used by buses and subsystems to create a similar macro with their device id +/// type already specified, i.e., with fewer parameters to the end user. +/// +/// # Examples +/// +// TODO: Exported but not usable by kernel modules (requires `const_trait_impl`). +/// ```ignore +/// #![feature(const_trait_impl)] +/// # use kernel::{driver_id_table}; + +/// driver_id_table!(BUS_ID_TABLE, Id, &'static [u8], MY_ID_ARRAY); +/// ``` +#[macro_export] +macro_rules! driver_id_table { + ($table_name:ident, $id_type:ty, $data_type:ty, $target:expr) => { + const $table_name: Option<$crate::driver::IdTable<'static, $id_type, $data_type>> = + Some($target.as_table()); + }; +} + +/// Declares an [`IdArray`] as a module-level ID tablewith a concise syntax. +/// +/// It is meant to be used by buses and subsystems to create a similar macro with their device id +/// type already specified, i.e., with fewer parameters to the end user. +/// +/// # Examples +/// +// TODO: Exported but not usable by kernel modules (requires `const_trait_impl`). +/// ```ignore +/// #![feature(const_trait_impl)] +/// # use kernel::{driver_id_table}; + +/// driver_id_table!(BUS_ID_TABLE, Id, &'static [u8], MY_ID_ARRAY); +/// ``` +#[macro_export] +macro_rules! module_id_table { + ($item_name:ident, $table_type:literal, $id_type:ty, $table_name:ident) => { + #[export_name = concat!("__mod_", $table_type, "__", stringify!($table_name), "_device_table")] + static $item_name: $crate::driver::IdArrayIds<$id_type, { $table_name.count() }> = + $table_name.as_ids(); + }; +} + +/// Custom code within device removal. +pub trait DeviceRemoval { + /// Cleans resources up when the device is removed. + /// + /// This is called when a device is removed and offers implementers the chance to run some code + /// that cleans state up. + fn device_remove(&self); +} + +impl DeviceRemoval for () { + fn device_remove(&self) {} +} + +impl<T: DeviceRemoval> DeviceRemoval for Arc<T> { + fn device_remove(&self) { + self.deref().device_remove(); + } +} + +impl<T: DeviceRemoval> DeviceRemoval for Box<T> { + fn device_remove(&self) { + self.deref().device_remove(); + } +} + +/// A kernel module that only registers the given driver on init. +/// +/// This is a helper struct to make it easier to define single-functionality modules, in this case, +/// modules that offer a single driver. +pub struct Module<T: DriverOps> { + _driver: Pin<Box<Registration<T>>>, +} + +impl<T: DriverOps> crate::Module for Module<T> { + fn init(name: &'static CStr, module: &'static ThisModule) -> Result<Self> { + Ok(Self { + _driver: Registration::new_pinned(name, module)?, + }) + } +} + +/// Declares a kernel module that exposes a single driver. +/// +/// It is meant to be used as a helper by other subsystems so they can more easily expose their own +/// macros. +#[macro_export] +macro_rules! module_driver { + (<$gen_type:ident>, $driver_ops:ty, { type: $type:ty, $($f:tt)* }) => { + type Ops<$gen_type> = $driver_ops; + type ModuleType = $crate::driver::Module<Ops<$type>>; + $crate::prelude::module! { + type: ModuleType, + $($f)* + } + } +} diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs new file mode 100644 index 000000000000..6007f941137a --- /dev/null +++ b/rust/kernel/drm/device.rs @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +//! DRM device. +//! +//! C header: [`include/linux/drm/drm_device.h`](../../../../include/linux/drm/drm_device.h) + +use crate::{bindings, device, drm, types::ForeignOwnable}; +use core::marker::PhantomData; + +/// Represents a reference to a DRM device. The device is reference-counted and is guaranteed to +/// not be dropped while this object is alive. +pub struct Device<T: drm::drv::Driver> { + // Type invariant: ptr must be a valid and initialized drm_device, + // and this value must either own a reference to it or the caller + // must ensure that it is never dropped if the reference is borrowed. + pub(super) ptr: *mut bindings::drm_device, + _p: PhantomData<T>, +} + +impl<T: drm::drv::Driver> Device<T> { + // Not intended to be called externally, except via declare_drm_ioctls!() + #[doc(hidden)] + pub unsafe fn from_raw(raw: *mut bindings::drm_device) -> Device<T> { + Device { + ptr: raw, + _p: PhantomData, + } + } + + #[allow(dead_code)] + pub(crate) fn raw(&self) -> *const bindings::drm_device { + self.ptr + } + + pub(crate) fn raw_mut(&mut self) -> *mut bindings::drm_device { + self.ptr + } + + /// Returns a borrowed reference to the user data associated with this Device. + pub fn data(&self) -> <T::Data as ForeignOwnable>::Borrowed<'_> { + unsafe { T::Data::borrow((*self.ptr).dev_private) } + } +} + +impl<T: drm::drv::Driver> Drop for Device<T> { + fn drop(&mut self) { + // SAFETY: By the type invariants, we know that `self` owns a reference, so it is safe to + // relinquish it now. + unsafe { bindings::drm_dev_put(self.ptr) }; + } +} + +impl<T: drm::drv::Driver> Clone for Device<T> { + fn clone(&self) -> Self { + // SAFETY: We get a new reference and then create a new owning object from the raw pointer + unsafe { + bindings::drm_dev_get(self.ptr); + Device::from_raw(self.ptr) + } + } +} + +// SAFETY: `Device` only holds a pointer to a C device, which is safe to be used from any thread. +unsafe impl<T: drm::drv::Driver> Send for Device<T> {} + +// SAFETY: `Device` only holds a pointer to a C device, references to which are safe to be used +// from any thread. +unsafe impl<T: drm::drv::Driver> Sync for Device<T> {} + +// Make drm::Device work for dev_info!() and friends +unsafe impl<T: drm::drv::Driver> device::RawDevice for Device<T> { + fn raw_device(&self) -> *mut bindings::device { + // SAFETY: ptr must be valid per the type invariant + unsafe { (*self.ptr).dev } + } +} diff --git a/rust/kernel/drm/drv.rs b/rust/kernel/drm/drv.rs new file mode 100644 index 000000000000..c138352cb489 --- /dev/null +++ b/rust/kernel/drm/drv.rs @@ -0,0 +1,342 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +//! DRM driver core. +//! +//! C header: [`include/linux/drm/drm_drv.h`](../../../../include/linux/drm/drm_drv.h) + +use crate::{ + bindings, device, drm, + error::code::*, + error::from_kernel_err_ptr, + error::{Error, Result}, + prelude::*, + private::Sealed, + str::CStr, + types::ForeignOwnable, + ThisModule, +}; +use core::{ + marker::{PhantomData, PhantomPinned}, + pin::Pin, +}; +use macros::vtable; + +/// Driver use the GEM memory manager. This should be set for all modern drivers. +pub const FEAT_GEM: u32 = bindings::drm_driver_feature_DRIVER_GEM; +/// Driver supports mode setting interfaces (KMS). +pub const FEAT_MODESET: u32 = bindings::drm_driver_feature_DRIVER_MODESET; +/// Driver supports dedicated render nodes. +pub const FEAT_RENDER: u32 = bindings::drm_driver_feature_DRIVER_RENDER; +/// Driver supports the full atomic modesetting userspace API. +/// +/// Drivers which only use atomic internally, but do not support the full userspace API (e.g. not +/// all properties converted to atomic, or multi-plane updates are not guaranteed to be tear-free) +/// should not set this flag. +pub const FEAT_ATOMIC: u32 = bindings::drm_driver_feature_DRIVER_ATOMIC; +/// Driver supports DRM sync objects for explicit synchronization of command submission. +pub const FEAT_SYNCOBJ: u32 = bindings::drm_driver_feature_DRIVER_SYNCOBJ; +/// Driver supports the timeline flavor of DRM sync objects for explicit synchronization of command +/// submission. +pub const FEAT_SYNCOBJ_TIMELINE: u32 = bindings::drm_driver_feature_DRIVER_SYNCOBJ_TIMELINE; + +/// Information data for a DRM Driver. +pub struct DriverInfo { + /// Driver major version. + pub major: i32, + /// Driver minor version. + pub minor: i32, + /// Driver patchlevel version. + pub patchlevel: i32, + /// Driver name. + pub name: &'static CStr, + /// Driver description. + pub desc: &'static CStr, + /// Driver date. + pub date: &'static CStr, +} + +/// Internal memory management operation set, normally created by memory managers (e.g. GEM). +/// +/// See `kernel::drm::gem` and `kernel::drm::gem::shmem`. +pub struct AllocOps { + pub(crate) gem_create_object: Option< + unsafe extern "C" fn( + dev: *mut bindings::drm_device, + size: usize, + ) -> *mut bindings::drm_gem_object, + >, + pub(crate) prime_handle_to_fd: Option< + unsafe extern "C" fn( + dev: *mut bindings::drm_device, + file_priv: *mut bindings::drm_file, + handle: u32, + flags: u32, + prime_fd: *mut core::ffi::c_int, + ) -> core::ffi::c_int, + >, + pub(crate) prime_fd_to_handle: Option< + unsafe extern "C" fn( + dev: *mut bindings::drm_device, + file_priv: *mut bindings::drm_file, + prime_fd: core::ffi::c_int, + handle: *mut u32, + ) -> core::ffi::c_int, + >, + pub(crate) gem_prime_import: Option< + unsafe extern "C" fn( + dev: *mut bindings::drm_device, + dma_buf: *mut bindings::dma_buf, + ) -> *mut bindings::drm_gem_object, + >, + pub(crate) gem_prime_import_sg_table: Option< + unsafe extern "C" fn( + dev: *mut bindings::drm_device, + attach: *mut bindings::dma_buf_attachment, + sgt: *mut bindings::sg_table, + ) -> *mut bindings::drm_gem_object, + >, + pub(crate) gem_prime_mmap: Option< + unsafe extern "C" fn( + obj: *mut bindings::drm_gem_object, + vma: *mut bindings::vm_area_struct, + ) -> core::ffi::c_int, + >, + pub(crate) dumb_create: Option< + unsafe extern "C" fn( + file_priv: *mut bindings::drm_file, + dev: *mut bindings::drm_device, + args: *mut bindings::drm_mode_create_dumb, + ) -> core::ffi::c_int, + >, + pub(crate) dumb_map_offset: Option< + unsafe extern "C" fn( + file_priv: *mut bindings::drm_file, + dev: *mut bindings::drm_device, + handle: u32, + offset: *mut u64, + ) -> core::ffi::c_int, + >, + pub(crate) dumb_destroy: Option< + unsafe extern "C" fn( + file_priv: *mut bindings::drm_file, + dev: *mut bindings::drm_device, + handle: u32, + ) -> core::ffi::c_int, + >, +} + +/// Trait for memory manager implementations. Implemented internally. +pub trait AllocImpl: Sealed + drm::gem::IntoGEMObject { + /// The C callback operations for this memory manager. + const ALLOC_OPS: AllocOps; +} + +/// A DRM driver implementation. +#[vtable] +pub trait Driver { + /// Context data associated with the DRM driver + /// + /// Determines the type of the context data passed to each of the methods of the trait. + type Data: ForeignOwnable + Sync + Send; + + /// The type used to manage memory for this driver. + /// + /// Should be either `drm::gem::Object<T>` or `drm::gem::shmem::Object<T>`. + type Object: AllocImpl; + + /// The type used to represent a DRM File (client) + type File: drm::file::DriverFile; + + /// Driver metadata + const INFO: DriverInfo; + + /// Feature flags + const FEATURES: u32; + + /// IOCTL list. See `kernel::drm::ioctl::declare_drm_ioctls!{}`. + const IOCTLS: &'static [drm::ioctl::DrmIoctlDescriptor]; +} + +/// A registration of a DRM device +/// +/// # Invariants: +/// +/// drm is always a valid pointer to an allocated drm_device +pub struct Registration<T: Driver> { + drm: drm::device::Device<T>, + registered: bool, + fops: bindings::file_operations, + vtable: Pin<Box<bindings::drm_driver>>, + _p: PhantomData<T>, + _pin: PhantomPinned, +} + +#[cfg(CONFIG_DRM_LEGACY)] +macro_rules! drm_legacy_fields { + ( $($field:ident: $val:expr),* $(,)? ) => { + bindings::drm_driver { + $( $field: $val ),*, + firstopen: None, + preclose: None, + dma_ioctl: None, + dma_quiescent: None, + context_dtor: None, + irq_handler: None, + irq_preinstall: None, + irq_postinstall: None, + irq_uninstall: None, + get_vblank_counter: None, + enable_vblank: None, + disable_vblank: None, + dev_priv_size: 0, + } + } +} + +#[cfg(not(CONFIG_DRM_LEGACY))] +macro_rules! drm_legacy_fields { + ( $($field:ident: $val:expr),* $(,)? ) => { + bindings::drm_driver { + $( $field: $val ),* + } + } +} + +/// Registers a DRM device with the rest of the kernel. +/// +/// It automatically picks up THIS_MODULE. +#[allow(clippy::crate_in_macro_def)] +#[macro_export] +macro_rules! drm_device_register { + ($reg:expr, $data:expr, $flags:expr $(,)?) => {{ + $crate::drm::drv::Registration::register($reg, $data, $flags, &crate::THIS_MODULE) + }}; +} + +impl<T: Driver> Registration<T> { + const VTABLE: bindings::drm_driver = drm_legacy_fields! { + load: None, + open: Some(drm::file::open_callback::<T::File>), + postclose: Some(drm::file::postclose_callback::<T::File>), + lastclose: None, + unload: None, + release: None, + master_set: None, + master_drop: None, + debugfs_init: None, + gem_create_object: T::Object::ALLOC_OPS.gem_create_object, + prime_handle_to_fd: T::Object::ALLOC_OPS.prime_handle_to_fd, + prime_fd_to_handle: T::Object::ALLOC_OPS.prime_fd_to_handle, + gem_prime_import: T::Object::ALLOC_OPS.gem_prime_import, + gem_prime_import_sg_table: T::Object::ALLOC_OPS.gem_prime_import_sg_table, + gem_prime_mmap: T::Object::ALLOC_OPS.gem_prime_mmap, + dumb_create: T::Object::ALLOC_OPS.dumb_create, + dumb_map_offset: T::Object::ALLOC_OPS.dumb_map_offset, + dumb_destroy: T::Object::ALLOC_OPS.dumb_destroy, + + major: T::INFO.major, + minor: T::INFO.minor, + patchlevel: T::INFO.patchlevel, + name: T::INFO.name.as_char_ptr() as *mut _, + desc: T::INFO.desc.as_char_ptr() as *mut _, + date: T::INFO.date.as_char_ptr() as *mut _, + + driver_features: T::FEATURES, + ioctls: T::IOCTLS.as_ptr(), + num_ioctls: T::IOCTLS.len() as i32, + fops: core::ptr::null_mut(), + }; + + /// Creates a new [`Registration`] but does not register it yet. + /// + /// It is allowed to move. + pub fn new(parent: &dyn device::RawDevice) -> Result<Self> { + let vtable = Pin::new(Box::try_new(Self::VTABLE)?); + let raw_drm = unsafe { bindings::drm_dev_alloc(&*vtable, parent.raw_device()) }; + let raw_drm = from_kernel_err_ptr(raw_drm)?; + + // The reference count is one, and now we take ownership of that reference as a + // drm::device::Device. + let drm = unsafe { drm::device::Device::from_raw(raw_drm) }; + + Ok(Self { + drm, + registered: false, + vtable, + fops: drm::gem::create_fops(), + _pin: PhantomPinned, + _p: PhantomData, + }) + } + + /// Registers a DRM device with the rest of the kernel. + /// + /// Users are encouraged to use the [`drm_device_register!()`] macro because it automatically + /// picks up the current module. + pub fn register( + self: Pin<&mut Self>, + data: T::Data, + flags: usize, + module: &'static ThisModule, + ) -> Result { + if self.registered { + // Already registered. + return Err(EINVAL); + } + + // SAFETY: We never move out of `this`. + let this = unsafe { self.get_unchecked_mut() }; + let data_pointer = <T::Data as ForeignOwnable>::into_foreign(data); + // SAFETY: `drm` is valid per the type invariant + unsafe { + (*this.drm.raw_mut()).dev_private = data_pointer as *mut _; + } + + this.fops.owner = module.0; + this.vtable.fops = &this.fops; + + // SAFETY: The device is now initialized and ready to be registered. + let ret = unsafe { bindings::drm_dev_register(this.drm.raw_mut(), flags as u64) }; + if ret < 0 { + // SAFETY: `data_pointer` was returned by `into_foreign` above. + unsafe { T::Data::from_foreign(data_pointer) }; + return Err(Error::from_kernel_errno(ret)); + } + + this.registered = true; + Ok(()) + } + + /// Returns a reference to the `Device` instance for this registration. + pub fn device(&self) -> &drm::device::Device<T> { + &self.drm + } +} + +// SAFETY: `Registration` doesn't offer any methods or access to fields when shared between threads +// or CPUs, so it is safe to share it. +unsafe impl<T: Driver> Sync for Registration<T> {} + +// SAFETY: Registration with and unregistration from the drm subsystem can happen from any thread. +// Additionally, `T::Data` (which is dropped during unregistration) is `Send`, so it is ok to move +// `Registration` to different threads. +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl<T: Driver> Send for Registration<T> {} + +impl<T: Driver> Drop for Registration<T> { + /// Removes the registration from the kernel if it has completed successfully before. + fn drop(&mut self) { + if self.registered { + // Get a pointer to the data stored in device before destroying it. + // SAFETY: `drm` is valid per the type invariant + let data_pointer = unsafe { (*self.drm.raw_mut()).dev_private }; + + // SAFETY: Since `registered` is true, `self.drm` is both valid and registered. + unsafe { bindings::drm_dev_unregister(self.drm.raw_mut()) }; + + // Free data as well. + // SAFETY: `data_pointer` was returned by `into_foreign` during registration. + unsafe { <T::Data as ForeignOwnable>::from_foreign(data_pointer) }; + } + } +} diff --git a/rust/kernel/drm/file.rs b/rust/kernel/drm/file.rs new file mode 100644 index 000000000000..48751e93c38a --- /dev/null +++ b/rust/kernel/drm/file.rs @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +//! DRM File objects. +//! +//! C header: [`include/linux/drm/drm_file.h`](../../../../include/linux/drm/drm_file.h) + +use crate::{bindings, drm, error::Result}; +use alloc::boxed::Box; +use core::marker::PhantomData; +use core::ops::Deref; + +/// Trait that must be implemented by DRM drivers to represent a DRM File (a client instance). +pub trait DriverFile { + /// The parent `Driver` implementation for this `DriverFile`. + type Driver: drm::drv::Driver; + + /// Open a new file (called when a client opens the DRM device). + fn open(device: &drm::device::Device<Self::Driver>) -> Result<Box<Self>>; +} + +/// An open DRM File. +/// +/// # Invariants +/// `raw` is a valid pointer to a `drm_file` struct. +#[repr(transparent)] +pub struct File<T: DriverFile> { + raw: *mut bindings::drm_file, + _p: PhantomData<T>, +} + +pub(super) unsafe extern "C" fn open_callback<T: DriverFile>( + raw_dev: *mut bindings::drm_device, + raw_file: *mut bindings::drm_file, +) -> core::ffi::c_int { + let drm = core::mem::ManuallyDrop::new(unsafe { drm::device::Device::from_raw(raw_dev) }); + // SAFETY: This reference won't escape this function + let file = unsafe { &mut *raw_file }; + + let inner = match T::open(&drm) { + Err(e) => { + return e.to_kernel_errno(); + } + Ok(i) => i, + }; + + file.driver_priv = Box::into_raw(inner) as *mut _; + + 0 +} + +pub(super) unsafe extern "C" fn postclose_callback<T: DriverFile>( + _dev: *mut bindings::drm_device, + raw_file: *mut bindings::drm_file, +) { + // SAFETY: This reference won't escape this function + let file = unsafe { &*raw_file }; + + // Drop the DriverFile + unsafe { Box::from_raw(file.driver_priv as *mut T) }; +} + +impl<T: DriverFile> File<T> { + // Not intended to be called externally, except via declare_drm_ioctls!() + #[doc(hidden)] + pub unsafe fn from_raw(raw_file: *mut bindings::drm_file) -> File<T> { + File { + raw: raw_file, + _p: PhantomData, + } + } + + #[allow(dead_code)] + /// Return the raw pointer to the underlying `drm_file`. + pub(super) fn raw(&self) -> *const bindings::drm_file { + self.raw + } + + /// Return an immutable reference to the raw `drm_file` structure. + pub(super) fn file(&self) -> &bindings::drm_file { + unsafe { &*self.raw } + } +} + +impl<T: DriverFile> Deref for File<T> { + type Target = T; + + fn deref(&self) -> &T { + unsafe { &*(self.file().driver_priv as *const T) } + } +} + +impl<T: DriverFile> crate::private::Sealed for File<T> {} + +/// Generic trait to allow users that don't care about driver specifics to accept any File<T>. +/// +/// # Safety +/// Must only be implemented for File<T> and return the pointer, following the normal invariants +/// of that type. +pub unsafe trait GenericFile: crate::private::Sealed { + /// Returns the raw const pointer to the `struct drm_file` + fn raw(&self) -> *const bindings::drm_file; + /// Returns the raw mut pointer to the `struct drm_file` + fn raw_mut(&mut self) -> *mut bindings::drm_file; +} + +unsafe impl<T: DriverFile> GenericFile for File<T> { + fn raw(&self) -> *const bindings::drm_file { + self.raw + } + fn raw_mut(&mut self) -> *mut bindings::drm_file { + self.raw + } +} diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs new file mode 100644 index 000000000000..c4a42bb2f718 --- /dev/null +++ b/rust/kernel/drm/gem/mod.rs @@ -0,0 +1,387 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +//! DRM GEM API +//! +//! C header: [`include/linux/drm/drm_gem.h`](../../../../include/linux/drm/drm_gem.h) + +#[cfg(CONFIG_DRM_GEM_SHMEM_HELPER = "y")] +pub mod shmem; + +use alloc::boxed::Box; + +use crate::{ + bindings, + drm::{device, drv, file}, + error::{to_result, Result}, + prelude::*, +}; +use core::{mem, mem::ManuallyDrop, ops::Deref, ops::DerefMut}; + +/// GEM object functions, which must be implemented by drivers. +pub trait BaseDriverObject<T: BaseObject>: Sync + Send + Sized { + /// Create a new driver data object for a GEM object of a given size. + fn new(dev: &device::Device<T::Driver>, size: usize) -> Result<Self>; + + /// Open a new handle to an existing object, associated with a File. + fn open( + _obj: &<<T as IntoGEMObject>::Driver as drv::Driver>::Object, + _file: &file::File<<<T as IntoGEMObject>::Driver as drv::Driver>::File>, + ) -> Result { + Ok(()) + } + + /// Close a handle to an existing object, associated with a File. + fn close( + _obj: &<<T as IntoGEMObject>::Driver as drv::Driver>::Object, + _file: &file::File<<<T as IntoGEMObject>::Driver as drv::Driver>::File>, + ) { + } +} + +/// Trait that represents a GEM object subtype +pub trait IntoGEMObject: Sized + crate::private::Sealed { + /// Owning driver for this type + type Driver: drv::Driver; + + /// Returns a reference to the raw `drm_gem_object` structure, which must be valid as long as + /// this owning object is valid. + fn gem_obj(&self) -> &bindings::drm_gem_object; + + /// Returns a reference to the raw `drm_gem_object` structure, which must be valid as long as + /// this owning object is valid. + fn mut_gem_obj(&mut self) -> &mut bindings::drm_gem_object; + + /// Converts a pointer to a `drm_gem_object` into a pointer to this type. + fn from_gem_obj(obj: *mut bindings::drm_gem_object) -> *mut Self; +} + +/// Trait which must be implemented by drivers using base GEM objects. +pub trait DriverObject: BaseDriverObject<Object<Self>> { + /// Parent `Driver` for this object. + type Driver: drv::Driver; +} + +unsafe extern "C" fn free_callback<T: DriverObject>(obj: *mut bindings::drm_gem_object) { + // SAFETY: All of our objects are Object<T>. + let this = crate::container_of!(obj, Object<T>, obj) as *mut Object<T>; + + // SAFETY: The pointer we got has to be valid + unsafe { bindings::drm_gem_object_release(obj) }; + + // SAFETY: All of our objects are allocated via Box<>, and we're in the + // free callback which guarantees this object has zero remaining references, + // so we can drop it + unsafe { Box::from_raw(this) }; +} + +unsafe extern "C" fn open_callback<T: BaseDriverObject<U>, U: BaseObject>( + raw_obj: *mut bindings::drm_gem_object, + raw_file: *mut bindings::drm_file, +) -> core::ffi::c_int { + // SAFETY: The pointer we got has to be valid. + let file = unsafe { + file::File::<<<U as IntoGEMObject>::Driver as drv::Driver>::File>::from_raw(raw_file) + }; + let obj = + <<<U as IntoGEMObject>::Driver as drv::Driver>::Object as IntoGEMObject>::from_gem_obj( + raw_obj, + ); + + // SAFETY: from_gem_obj() returns a valid pointer as long as the type is + // correct and the raw_obj we got is valid. + match T::open(unsafe { &*obj }, &file) { + Err(e) => e.to_kernel_errno(), + Ok(()) => 0, + } +} + +unsafe extern "C" fn close_callback<T: BaseDriverObject<U>, U: BaseObject>( + raw_obj: *mut bindings::drm_gem_object, + raw_file: *mut bindings::drm_file, +) { + // SAFETY: The pointer we got has to be valid. + let file = unsafe { + file::File::<<<U as IntoGEMObject>::Driver as drv::Driver>::File>::from_raw(raw_file) + }; + let obj = + <<<U as IntoGEMObject>::Driver as drv::Driver>::Object as IntoGEMObject>::from_gem_obj( + raw_obj, + ); + + // SAFETY: from_gem_obj() returns a valid pointer as long as the type is + // correct and the raw_obj we got is valid. + T::close(unsafe { &*obj }, &file); +} + +impl<T: DriverObject> IntoGEMObject for Object<T> { + type Driver = T::Driver; + + fn gem_obj(&self) -> &bindings::drm_gem_object { + &self.obj + } + + fn mut_gem_obj(&mut self) -> &mut bindings::drm_gem_object { + &mut self.obj + } + + fn from_gem_obj(obj: *mut bindings::drm_gem_object) -> *mut Object<T> { + crate::container_of!(obj, Object<T>, obj) as *mut Object<T> + } +} + +/// Base operations shared by all GEM object classes +pub trait BaseObject: IntoGEMObject { + /// Returns the size of the object in bytes. + fn size(&self) -> usize { + self.gem_obj().size + } + + /// Sets the exportable flag, which controls whether the object can be exported via PRIME. + fn set_exportable(&mut self, exportable: bool) { + self.mut_gem_obj().exportable = exportable; + } + + /// Creates a new reference to the object. + fn reference(&self) -> ObjectRef<Self> { + // SAFETY: Having a reference to an Object implies holding a GEM reference + unsafe { + bindings::drm_gem_object_get(self.gem_obj() as *const _ as *mut _); + } + ObjectRef { + ptr: self as *const _, + } + } + + /// Creates a new handle for the object associated with a given `File` + /// (or returns an existing one). + fn create_handle( + &self, + file: &file::File<<<Self as IntoGEMObject>::Driver as drv::Driver>::File>, + ) -> Result<u32> { + let mut handle: u32 = 0; + // SAFETY: The arguments are all valid per the type invariants. + to_result(unsafe { + bindings::drm_gem_handle_create( + file.raw() as *mut _, + self.gem_obj() as *const _ as *mut _, + &mut handle, + ) + })?; + Ok(handle) + } + + /// Looks up an object by its handle for a given `File`. + fn lookup_handle( + file: &file::File<<<Self as IntoGEMObject>::Driver as drv::Driver>::File>, + handle: u32, + ) -> Result<ObjectRef<Self>> { + // SAFETY: The arguments are all valid per the type invariants. + let ptr = unsafe { bindings::drm_gem_object_lookup(file.raw() as *mut _, handle) }; + + if ptr.is_null() { + Err(ENOENT) + } else { + Ok(ObjectRef { + ptr: ptr as *const _, + }) + } + } + + /// Creates an mmap offset to map the object from userspace. + fn create_mmap_offset(&self) -> Result<u64> { + // SAFETY: The arguments are valid per the type invariant. + to_result(unsafe { + // TODO: is this threadsafe? + bindings::drm_gem_create_mmap_offset(self.gem_obj() as *const _ as *mut _) + })?; + Ok(unsafe { + bindings::drm_vma_node_offset_addr(&self.gem_obj().vma_node as *const _ as *mut _) + }) + } +} + +impl<T: IntoGEMObject> BaseObject for T {} + +/// A base GEM object. +#[repr(C)] +pub struct Object<T: DriverObject> { + obj: bindings::drm_gem_object, + // The DRM core ensures the Device exists as long as its objects exist, so we don't need to + // manage the reference count here. + dev: ManuallyDrop<device::Device<T::Driver>>, + inner: T, +} + +impl<T: DriverObject> Object<T> { + /// The size of this object's structure. + pub const SIZE: usize = mem::size_of::<Self>(); + + const OBJECT_FUNCS: bindings::drm_gem_object_funcs = bindings::drm_gem_object_funcs { + free: Some(free_callback::<T>), + open: Some(open_callback::<T, Object<T>>), + close: Some(close_callback::<T, Object<T>>), + print_info: None, + export: None, + pin: None, + unpin: None, + get_sg_table: None, + vmap: None, + vunmap: None, + mmap: None, + vm_ops: core::ptr::null_mut(), + }; + + /// Create a new GEM object. + pub fn new(dev: &device::Device<T::Driver>, size: usize) -> Result<UniqueObjectRef<Self>> { + let mut obj: Box<Self> = Box::try_new(Self { + // SAFETY: This struct is expected to be zero-initialized + obj: unsafe { mem::zeroed() }, + // SAFETY: The drm subsystem guarantees that the drm_device will live as long as + // the GEM object lives, so we can conjure a reference out of thin air. + dev: ManuallyDrop::new(unsafe { device::Device::from_raw(dev.ptr) }), + inner: T::new(dev, size)?, + })?; + + obj.obj.funcs = &Self::OBJECT_FUNCS; + to_result(unsafe { + bindings::drm_gem_object_init(dev.raw() as *mut _, &mut obj.obj, size) + })?; + + let obj_ref = UniqueObjectRef { + ptr: Box::leak(obj), + }; + + Ok(obj_ref) + } + + /// Returns the `Device` that owns this GEM object. + pub fn dev(&self) -> &device::Device<T::Driver> { + &self.dev + } +} + +impl<T: DriverObject> crate::private::Sealed for Object<T> {} + +impl<T: DriverObject> Deref for Object<T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl<T: DriverObject> DerefMut for Object<T> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +impl<T: DriverObject> drv::AllocImpl for Object<T> { + const ALLOC_OPS: drv::AllocOps = drv::AllocOps { + gem_create_object: None, + prime_handle_to_fd: Some(bindings::drm_gem_prime_handle_to_fd), + prime_fd_to_handle: Some(bindings::drm_gem_prime_fd_to_handle), + gem_prime_import: None, + gem_prime_import_sg_table: None, + gem_prime_mmap: Some(bindings::drm_gem_prime_mmap), + dumb_create: None, + dumb_map_offset: None, + dumb_destroy: None, + }; +} + +/// A reference-counted shared reference to a base GEM object. +pub struct ObjectRef<T: IntoGEMObject> { + // Invariant: the pointer is valid and initialized, and this ObjectRef owns a reference to it. + ptr: *const T, +} + +/// SAFETY: GEM object references are safe to share between threads. +unsafe impl<T: IntoGEMObject> Send for ObjectRef<T> {} +unsafe impl<T: IntoGEMObject> Sync for ObjectRef<T> {} + +impl<T: IntoGEMObject> Clone for ObjectRef<T> { + fn clone(&self) -> Self { + self.reference() + } +} + +impl<T: IntoGEMObject> Drop for ObjectRef<T> { + fn drop(&mut self) { + // SAFETY: Having an ObjectRef implies holding a GEM reference. + // The free callback will take care of deallocation. + unsafe { + bindings::drm_gem_object_put((*self.ptr).gem_obj() as *const _ as *mut _); + } + } +} + +impl<T: IntoGEMObject> Deref for ObjectRef<T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: The pointer is valid per the invariant + unsafe { &*self.ptr } + } +} + +/// A unique reference to a base GEM object. +pub struct UniqueObjectRef<T: IntoGEMObject> { + // Invariant: the pointer is valid and initialized, and this ObjectRef owns the only reference + // to it. + ptr: *mut T, +} + +impl<T: IntoGEMObject> UniqueObjectRef<T> { + /// Downgrade this reference to a shared reference. + pub fn into_ref(self) -> ObjectRef<T> { + let ptr = self.ptr as *const _; + core::mem::forget(self); + + ObjectRef { ptr } + } +} + +impl<T: IntoGEMObject> Drop for UniqueObjectRef<T> { + fn drop(&mut self) { + // SAFETY: Having a UniqueObjectRef implies holding a GEM + // reference. The free callback will take care of deallocation. + unsafe { + bindings::drm_gem_object_put((*self.ptr).gem_obj() as *const _ as *mut _); + } + } +} + +impl<T: IntoGEMObject> Deref for UniqueObjectRef<T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: The pointer is valid per the invariant + unsafe { &*self.ptr } + } +} + +impl<T: IntoGEMObject> DerefMut for UniqueObjectRef<T> { + fn deref_mut(&mut self) -> &mut Self::Target { + // SAFETY: The pointer is valid per the invariant + unsafe { &mut *self.ptr } + } +} + +pub(super) fn create_fops() -> bindings::file_operations { + bindings::file_operations { + owner: core::ptr::null_mut(), + open: Some(bindings::drm_open), + release: Some(bindings::drm_release), + unlocked_ioctl: Some(bindings::drm_ioctl), + #[cfg(CONFIG_COMPAT)] + compat_ioctl: Some(bindings::drm_compat_ioctl), + #[cfg(not(CONFIG_COMPAT))] + compat_ioctl: None, + poll: Some(bindings::drm_poll), + read: Some(bindings::drm_read), + llseek: Some(bindings::noop_llseek), + mmap: Some(bindings::drm_gem_mmap), + ..Default::default() + } +} diff --git a/rust/kernel/drm/gem/shmem.rs b/rust/kernel/drm/gem/shmem.rs new file mode 100644 index 000000000000..8f17eba0be99 --- /dev/null +++ b/rust/kernel/drm/gem/shmem.rs @@ -0,0 +1,385 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! DRM GEM shmem helper objects +//! +//! C header: [`include/linux/drm/drm_gem_shmem_helper.h`](../../../../include/linux/drm/drm_gem_shmem_helper.h) + +use crate::drm::{device, drv, gem}; +use crate::{ + error::{from_kernel_err_ptr, to_result}, + prelude::*, +}; +use core::{ + marker::PhantomData, + mem, + mem::{ManuallyDrop, MaybeUninit}, + ops::{Deref, DerefMut}, + ptr::addr_of_mut, + slice, +}; + +use gem::BaseObject; + +/// Trait which must be implemented by drivers using shmem-backed GEM objects. +pub trait DriverObject: gem::BaseDriverObject<Object<Self>> { + /// Parent `Driver` for this object. + type Driver: drv::Driver; +} + +// FIXME: This is terrible and I don't know how to avoid it +#[cfg(CONFIG_NUMA)] +macro_rules! vm_numa_fields { + ( $($field:ident: $val:expr),* $(,)? ) => { + bindings::vm_operations_struct { + $( $field: $val ),*, + set_policy: None, + get_policy: None, + } + } +} + +#[cfg(not(CONFIG_NUMA))] +macro_rules! vm_numa_fields { + ( $($field:ident: $val:expr),* $(,)? ) => { + bindings::vm_operations_struct { + $( $field: $val ),* + } + } +} + +const SHMEM_VM_OPS: bindings::vm_operations_struct = vm_numa_fields! { + open: Some(bindings::drm_gem_shmem_vm_open), + close: Some(bindings::drm_gem_shmem_vm_close), + may_split: None, + mremap: None, + mprotect: None, + fault: Some(bindings::drm_gem_shmem_fault), + huge_fault: None, + map_pages: None, + pagesize: None, + page_mkwrite: None, + pfn_mkwrite: None, + access: None, + name: None, + find_special_page: None, +}; + +/// A shmem-backed GEM object. +#[repr(C)] +pub struct Object<T: DriverObject> { + obj: bindings::drm_gem_shmem_object, + // The DRM core ensures the Device exists as long as its objects exist, so we don't need to + // manage the reference count here. + dev: ManuallyDrop<device::Device<T::Driver>>, + inner: T, +} + +unsafe extern "C" fn gem_create_object<T: DriverObject>( + raw_dev: *mut bindings::drm_device, + size: usize, +) -> *mut bindings::drm_gem_object { + // SAFETY: GEM ensures the device lives as long as its objects live, + // so we can conjure up a reference from thin air and never drop it. + let dev = ManuallyDrop::new(unsafe { device::Device::from_raw(raw_dev) }); + + let inner = match T::new(&*dev, size) { + Ok(v) => v, + Err(e) => return e.to_ptr(), + }; + + let p = unsafe { + bindings::krealloc( + core::ptr::null(), + Object::<T>::SIZE, + bindings::GFP_KERNEL | bindings::__GFP_ZERO, + ) as *mut Object<T> + }; + + if p.is_null() { + return ENOMEM.to_ptr(); + } + + // SAFETY: p is valid as long as the alloc succeeded + unsafe { + addr_of_mut!((*p).dev).write(dev); + addr_of_mut!((*p).inner).write(inner); + } + + // SAFETY: drm_gem_shmem_object is safe to zero-init, and + // the rest of Object has been initialized + let new: &mut Object<T> = unsafe { &mut *(p as *mut _) }; + + new.obj.base.funcs = &Object::<T>::VTABLE; + &mut new.obj.base +} + +unsafe extern "C" fn free_callback<T: DriverObject>(obj: *mut bindings::drm_gem_object) { + // SAFETY: All of our objects are Object<T>. + let p = crate::container_of!(obj, Object<T>, obj) as *mut Object<T>; + + // SAFETY: p is never used after this + unsafe { + core::ptr::drop_in_place(&mut (*p).inner); + } + + // SAFETY: This pointer has to be valid, since p is valid + unsafe { + bindings::drm_gem_shmem_free(&mut (*p).obj); + } +} + +impl<T: DriverObject> Object<T> { + /// The size of this object's structure. + const SIZE: usize = mem::size_of::<Self>(); + + /// `drm_gem_object_funcs` vtable suitable for GEM shmem objects. + const VTABLE: bindings::drm_gem_object_funcs = bindings::drm_gem_object_funcs { + free: Some(free_callback::<T>), + open: Some(super::open_callback::<T, Object<T>>), + close: Some(super::close_callback::<T, Object<T>>), + print_info: Some(bindings::drm_gem_shmem_object_print_info), + export: None, + pin: Some(bindings::drm_gem_shmem_object_pin), + unpin: Some(bindings::drm_gem_shmem_object_unpin), + get_sg_table: Some(bindings::drm_gem_shmem_object_get_sg_table), + vmap: Some(bindings::drm_gem_shmem_object_vmap), + vunmap: Some(bindings::drm_gem_shmem_object_vunmap), + mmap: Some(bindings::drm_gem_shmem_object_mmap), + vm_ops: &SHMEM_VM_OPS, + }; + + // SAFETY: Must only be used with DRM functions that are thread-safe + unsafe fn mut_shmem(&self) -> *mut bindings::drm_gem_shmem_object { + &self.obj as *const _ as *mut _ + } + + /// Create a new shmem-backed DRM object of the given size. + pub fn new(dev: &device::Device<T::Driver>, size: usize) -> Result<gem::UniqueObjectRef<Self>> { + // SAFETY: This function can be called as long as the ALLOC_OPS are set properly + // for this driver, and the gem_create_object is called. + let p = unsafe { bindings::drm_gem_shmem_create(dev.raw() as *mut _, size) }; + let p = crate::container_of!(p, Object<T>, obj) as *mut _; + + // SAFETY: The gem_create_object callback ensures this is a valid Object<T>, + // so we can take a unique reference to it. + let obj_ref = gem::UniqueObjectRef { ptr: p }; + + Ok(obj_ref) + } + + /// Returns the `Device` that owns this GEM object. + pub fn dev(&self) -> &device::Device<T::Driver> { + &self.dev + } + + /// Creates (if necessary) and returns a scatter-gather table of DMA pages for this object. + /// + /// This will pin the object in memory. + pub fn sg_table(&self) -> Result<SGTable<T>> { + // SAFETY: drm_gem_shmem_get_pages_sgt is thread-safe. + let sgt = from_kernel_err_ptr(unsafe { + bindings::drm_gem_shmem_get_pages_sgt(self.mut_shmem()) + })?; + + Ok(SGTable { + sgt, + _owner: self.reference(), + }) + } + + /// Creates and returns a virtual kernel memory mapping for this object. + pub fn vmap(&self) -> Result<VMap<T>> { + let mut map: MaybeUninit<bindings::iosys_map> = MaybeUninit::uninit(); + + // SAFETY: drm_gem_shmem_vmap is thread-safe + to_result(unsafe { bindings::drm_gem_shmem_vmap(self.mut_shmem(), map.as_mut_ptr()) })?; + + // SAFETY: if drm_gem_shmem_vmap did not fail, map is initialized now + let map = unsafe { map.assume_init() }; + + Ok(VMap { + map, + owner: self.reference(), + }) + } + + /// Set the write-combine flag for this object. + /// + /// Should be called before any mappings are made. + pub fn set_wc(&mut self, map_wc: bool) { + unsafe { (*self.mut_shmem()).map_wc = map_wc }; + } +} + +impl<T: DriverObject> Deref for Object<T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl<T: DriverObject> DerefMut for Object<T> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +impl<T: DriverObject> crate::private::Sealed for Object<T> {} + +impl<T: DriverObject> gem::IntoGEMObject for Object<T> { + type Driver = T::Driver; + + fn gem_obj(&self) -> &bindings::drm_gem_object { + &self.obj.base + } + + fn mut_gem_obj(&mut self) -> &mut bindings::drm_gem_object { + &mut self.obj.base + } + + fn from_gem_obj(obj: *mut bindings::drm_gem_object) -> *mut Object<T> { + crate::container_of!(obj, Object<T>, obj) as *mut Object<T> + } +} + +impl<T: DriverObject> drv::AllocImpl for Object<T> { + const ALLOC_OPS: drv::AllocOps = drv::AllocOps { + gem_create_object: Some(gem_create_object::<T>), + prime_handle_to_fd: Some(bindings::drm_gem_prime_handle_to_fd), + prime_fd_to_handle: Some(bindings::drm_gem_prime_fd_to_handle), + gem_prime_import: None, + gem_prime_import_sg_table: Some(bindings::drm_gem_shmem_prime_import_sg_table), + gem_prime_mmap: Some(bindings::drm_gem_prime_mmap), + dumb_create: Some(bindings::drm_gem_shmem_dumb_create), + dumb_map_offset: None, + dumb_destroy: None, + }; +} + +/// A virtual mapping for a shmem-backed GEM object in kernel address space. +pub struct VMap<T: DriverObject> { + map: bindings::iosys_map, + owner: gem::ObjectRef<Object<T>>, +} + +impl<T: DriverObject> VMap<T> { + /// Returns a const raw pointer to the start of the mapping. + pub fn as_ptr(&self) -> *const core::ffi::c_void { + // SAFETY: The shmem helpers always return non-iomem maps + unsafe { self.map.__bindgen_anon_1.vaddr } + } + + /// Returns a mutable raw pointer to the start of the mapping. + pub fn as_mut_ptr(&mut self) -> *mut core::ffi::c_void { + // SAFETY: The shmem helpers always return non-iomem maps + unsafe { self.map.__bindgen_anon_1.vaddr } + } + + /// Returns a byte slice view of the mapping. + pub fn as_slice(&self) -> &[u8] { + // SAFETY: The vmap maps valid memory up to the owner size + unsafe { slice::from_raw_parts(self.as_ptr() as *const u8, self.owner.size()) } + } + + /// Returns mutable a byte slice view of the mapping. + pub fn as_mut_slice(&mut self) -> &mut [u8] { + // SAFETY: The vmap maps valid memory up to the owner size + unsafe { slice::from_raw_parts_mut(self.as_mut_ptr() as *mut u8, self.owner.size()) } + } + + /// Borrows a reference to the object that owns this virtual mapping. + pub fn owner(&self) -> &gem::ObjectRef<Object<T>> { + &self.owner + } +} + +impl<T: DriverObject> Drop for VMap<T> { + fn drop(&mut self) { + // SAFETY: This function is thread-safe + unsafe { + bindings::drm_gem_shmem_vunmap(self.owner.mut_shmem(), &mut self.map); + } + } +} + +/// SAFETY: `iosys_map` objects are safe to send across threads. +unsafe impl<T: DriverObject> Send for VMap<T> {} +unsafe impl<T: DriverObject> Sync for VMap<T> {} + +/// A single scatter-gather entry, representing a span of pages in the device's DMA address space. +/// +/// For devices not behind a standalone IOMMU, this corresponds to physical addresses. +#[repr(transparent)] +pub struct SGEntry(bindings::scatterlist); + +impl SGEntry { + /// Returns the starting DMA address of this span + pub fn dma_address(&self) -> usize { + (unsafe { bindings::sg_dma_address(&self.0) }) as usize + } + + /// Returns the length of this span in bytes + pub fn dma_len(&self) -> usize { + (unsafe { bindings::sg_dma_len(&self.0) }) as usize + } +} + +/// A scatter-gather table of DMA address spans for a GEM shmem object. +/// +/// # Invariants +/// `sgt` must be a valid pointer to the `sg_table`, which must correspond to the owned +/// object in `_owner` (which ensures it remains valid). +pub struct SGTable<T: DriverObject> { + sgt: *const bindings::sg_table, + _owner: gem::ObjectRef<Object<T>>, +} + +impl<T: DriverObject> SGTable<T> { + /// Returns an iterator through the SGTable's entries + pub fn iter(&'_ self) -> SGTableIter<'_> { + SGTableIter { + left: unsafe { (*self.sgt).nents } as usize, + sg: unsafe { (*self.sgt).sgl }, + _p: PhantomData, + } + } +} + +impl<'a, T: DriverObject> IntoIterator for &'a SGTable<T> { + type Item = &'a SGEntry; + type IntoIter = SGTableIter<'a>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +/// SAFETY: `sg_table` objects are safe to send across threads. +unsafe impl<T: DriverObject> Send for SGTable<T> {} +unsafe impl<T: DriverObject> Sync for SGTable<T> {} + +/// An iterator through `SGTable` entries. +/// +/// # Invariants +/// `sg` must be a valid pointer to the scatterlist, which must outlive our lifetime. +pub struct SGTableIter<'a> { + sg: *mut bindings::scatterlist, + left: usize, + _p: PhantomData<&'a ()>, +} + +impl<'a> Iterator for SGTableIter<'a> { + type Item = &'a SGEntry; + + fn next(&mut self) -> Option<Self::Item> { + if self.left == 0 { + None + } else { + let sg = self.sg; + self.sg = unsafe { bindings::sg_next(self.sg) }; + self.left -= 1; + Some(unsafe { &(*(sg as *const SGEntry)) }) + } + } +} diff --git a/rust/kernel/drm/ioctl.rs b/rust/kernel/drm/ioctl.rs new file mode 100644 index 000000000000..10304efbd5f1 --- /dev/null +++ b/rust/kernel/drm/ioctl.rs @@ -0,0 +1,147 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +#![allow(non_snake_case)] + +//! DRM IOCTL definitions. +//! +//! C header: [`include/linux/drm/drm_ioctl.h`](../../../../include/linux/drm/drm_ioctl.h) + +use crate::ioctl; + +const BASE: u32 = bindings::DRM_IOCTL_BASE as u32; + +/// Construct a DRM ioctl number with no argument. +pub const fn IO(nr: u32) -> u32 { + ioctl::_IO(BASE, nr) +} + +/// Construct a DRM ioctl number with a read-only argument. +pub const fn IOR<T>(nr: u32) -> u32 { + ioctl::_IOR::<T>(BASE, nr) +} + +/// Construct a DRM ioctl number with a write-only argument. +pub const fn IOW<T>(nr: u32) -> u32 { + ioctl::_IOW::<T>(BASE, nr) +} + +/// Construct a DRM ioctl number with a read-write argument. +pub const fn IOWR<T>(nr: u32) -> u32 { + ioctl::_IOWR::<T>(BASE, nr) +} + +/// Descriptor type for DRM ioctls. Use the `declare_drm_ioctls!{}` macro to construct them. +pub type DrmIoctlDescriptor = bindings::drm_ioctl_desc; + +/// This is for ioctl which are used for rendering, and require that the file descriptor is either +/// for a render node, or if it’s a legacy/primary node, then it must be authenticated. +pub const AUTH: u32 = bindings::drm_ioctl_flags_DRM_AUTH; + +/// This must be set for any ioctl which can change the modeset or display state. Userspace must +/// call the ioctl through a primary node, while it is the active master. +/// +/// Note that read-only modeset ioctl can also be called by unauthenticated clients, or when a +/// master is not the currently active one. +pub const MASTER: u32 = bindings::drm_ioctl_flags_DRM_MASTER; + +/// Anything that could potentially wreak a master file descriptor needs to have this flag set. +/// +/// Current that’s only for the SETMASTER and DROPMASTER ioctl, which e.g. logind can call to force +/// a non-behaving master (display compositor) into compliance. +/// +/// This is equivalent to callers with the SYSADMIN capability. +pub const ROOT_ONLY: u32 = bindings::drm_ioctl_flags_DRM_ROOT_ONLY; + +/// Whether drm_ioctl_desc.func should be called with the DRM BKL held or not. Enforced as the +/// default for all modern drivers, hence there should never be a need to set this flag. +/// +/// Do not use anywhere else than for the VBLANK_WAIT IOCTL, which is the only legacy IOCTL which +/// needs this. +pub const UNLOCKED: u32 = bindings::drm_ioctl_flags_DRM_UNLOCKED; + +/// This is used for all ioctl needed for rendering only, for drivers which support render nodes. +/// This should be all new render drivers, and hence it should be always set for any ioctl with +/// `AUTH` set. Note though that read-only query ioctl might have this set, but have not set +/// DRM_AUTH because they do not require authentication. +pub const RENDER_ALLOW: u32 = bindings::drm_ioctl_flags_DRM_RENDER_ALLOW; + +/// Declare the DRM ioctls for a driver. +/// +/// Each entry in the list should have the form: +/// +/// `(ioctl_number, argument_type, flags, user_callback),` +/// +/// `argument_type` is the type name within the `bindings` crate. +/// `user_callback` should have the following prototype: +/// +/// ``` +/// fn foo(device: &kernel::drm::device::Device<Self>, +/// data: &mut bindings::argument_type, +/// file: &kernel::drm::file::File<Self::File>, +/// ) +/// ``` +/// where `Self` is the drm::drv::Driver implementation these ioctls are being declared within. +/// +/// # Examples +/// +/// ``` +/// kernel::declare_drm_ioctls! { +/// (FOO_GET_PARAM, drm_foo_get_param, ioctl::RENDER_ALLOW, my_get_param_handler), +/// } +/// ``` +/// +#[macro_export] +macro_rules! declare_drm_ioctls { + ( $(($cmd:ident, $struct:ident, $flags:expr, $func:expr)),* $(,)? ) => { + const IOCTLS: &'static [$crate::drm::ioctl::DrmIoctlDescriptor] = { + const _:() = { + let i: u32 = $crate::bindings::DRM_COMMAND_BASE; + // Assert that all the IOCTLs are in the right order and there are no gaps, + // and that the sizeof of the specified type is correct. + $( + let cmd: u32 = $crate::macros::concat_idents!($crate::bindings::DRM_IOCTL_, $cmd); + ::core::assert!(i == $crate::ioctl::_IOC_NR(cmd)); + ::core::assert!(core::mem::size_of::<$crate::bindings::$struct>() == $crate::ioctl::_IOC_SIZE(cmd)); + let i: u32 = i + 1; + )* + }; + + let ioctls = &[$( + $crate::bindings::drm_ioctl_desc { + cmd: $crate::macros::concat_idents!($crate::bindings::DRM_IOCTL_, $cmd) as u32, + func: { + #[allow(non_snake_case)] + unsafe extern "C" fn $cmd( + raw_dev: *mut $crate::bindings::drm_device, + raw_data: *mut ::core::ffi::c_void, + raw_file_priv: *mut $crate::bindings::drm_file, + ) -> core::ffi::c_int { + // SAFETY: We never drop this, and the DRM core ensures the device lives + // while callbacks are being called. + // + // FIXME: Currently there is nothing enforcing that the types of the + // dev/file match the current driver these ioctls are being declared + // for, and it's not clear how to enforce this within the type system. + let dev = ::core::mem::ManuallyDrop::new(unsafe { + $crate::drm::device::Device::from_raw(raw_dev) + }); + // SAFETY: This is just the ioctl argument, which hopefully has the right type + // (we've done our best checking the size). + let data = unsafe { &mut *(raw_data as *mut $crate::bindings::$struct) }; + // SAFETY: This is just the DRM file structure + let file = unsafe { $crate::drm::file::File::from_raw(raw_file_priv) }; + + match $func(&*dev, data, &file) { + Err(e) => e.to_kernel_errno(), + Ok(i) => i.try_into().unwrap_or(ERANGE.to_kernel_errno()), + } + } + Some($cmd) + }, + flags: $flags, + name: $crate::c_str!(::core::stringify!($cmd)).as_char_ptr(), + } + ),*]; + ioctls + }; + }; +} diff --git a/rust/kernel/drm/mm.rs b/rust/kernel/drm/mm.rs new file mode 100644 index 000000000000..83e27a7dcc7e --- /dev/null +++ b/rust/kernel/drm/mm.rs @@ -0,0 +1,309 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +//! DRM MM range allocator +//! +//! C header: [`include/linux/drm/drm_mm.h`](../../../../include/linux/drm/drm_mm.h) + +use crate::{ + bindings, + error::{to_result, Result}, + str::CStr, + sync::{Arc, LockClassKey, LockIniter, Mutex, UniqueArc}, + types::Opaque, +}; + +use alloc::boxed::Box; + +use core::{ + marker::{PhantomData, PhantomPinned}, + ops::Deref, + pin::Pin, +}; + +/// Type alias representing a DRM MM node. +pub type Node<A, T> = Pin<Box<NodeData<A, T>>>; + +/// Trait which must be implemented by the inner allocator state type provided by the user. +pub trait AllocInner<T> { + /// Notification that a node was dropped from the allocator. + fn drop_object(&mut self, _start: u64, _size: u64, _color: usize, _object: &mut T) {} +} + +impl<T> AllocInner<T> for () {} + +/// Wrapper type for a `struct drm_mm` plus user AllocInner object. +/// +/// # Invariants +/// The `drm_mm` struct is valid and initialized. +struct MmInner<A: AllocInner<T>, T>(Opaque<bindings::drm_mm>, A, PhantomData<T>); + +/// Represents a single allocated node in the MM allocator +pub struct NodeData<A: AllocInner<T>, T> { + node: bindings::drm_mm_node, + mm: Arc<Mutex<MmInner<A, T>>>, + valid: bool, + /// A drm_mm_node needs to be pinned because nodes reference each other in a linked list. + _pin: PhantomPinned, + inner: T, +} + +// SAFETY: Allocator ops take the mutex, and there are no mutable actions on the node. +unsafe impl<A: Send + AllocInner<T>, T: Send> Send for NodeData<A, T> {} +unsafe impl<A: Send + AllocInner<T>, T: Sync> Sync for NodeData<A, T> {} + +/// Available MM node insertion modes +#[repr(u32)] +pub enum InsertMode { + /// Search for the smallest hole (within the search range) that fits the desired node. + /// + /// Allocates the node from the bottom of the found hole. + Best = bindings::drm_mm_insert_mode_DRM_MM_INSERT_BEST, + + /// Search for the lowest hole (address closest to 0, within the search range) that fits the + /// desired node. + /// + /// Allocates the node from the bottom of the found hole. + Low = bindings::drm_mm_insert_mode_DRM_MM_INSERT_LOW, + + /// Search for the highest hole (address closest to U64_MAX, within the search range) that fits + /// the desired node. + /// + /// Allocates the node from the top of the found hole. The specified alignment for the node is + /// applied to the base of the node (`Node.start()`). + High = bindings::drm_mm_insert_mode_DRM_MM_INSERT_HIGH, + + /// Search for the most recently evicted hole (within the search range) that fits the desired + /// node. This is appropriate for use immediately after performing an eviction scan and removing + /// the selected nodes to form a hole. + /// + /// Allocates the node from the bottom of the found hole. + Evict = bindings::drm_mm_insert_mode_DRM_MM_INSERT_EVICT, +} + +/// A clonable, interlocked reference to the allocator state. +/// +/// This is useful to perform actions on the user-supplied `AllocInner<T>` type given just a Node, +/// without immediately taking the lock. +#[derive(Clone)] +pub struct InnerRef<A: AllocInner<T>, T>(Arc<Mutex<MmInner<A, T>>>); + +impl<A: AllocInner<T>, T> InnerRef<A, T> { + /// Operate on the user `AllocInner<T>` implementation, taking the lock. + pub fn with<RetVal>(&self, cb: impl FnOnce(&mut A) -> RetVal) -> RetVal { + let mut l = self.0.lock(); + cb(&mut l.1) + } +} + +impl<A: AllocInner<T>, T> NodeData<A, T> { + /// Returns the color of the node (an opaque value) + pub fn color(&self) -> usize { + self.node.color as usize + } + + /// Returns the start address of the node + pub fn start(&self) -> u64 { + self.node.start + } + + /// Returns the size of the node in bytes + pub fn size(&self) -> u64 { + self.node.size + } + + /// Operate on the user `AllocInner<T>` implementation associated with this node's allocator. + pub fn with_inner<RetVal>(&self, cb: impl FnOnce(&mut A) -> RetVal) -> RetVal { + let mut l = self.mm.lock(); + cb(&mut l.1) + } + + /// Return a clonable, detached reference to the allocator inner data. + pub fn alloc_ref(&self) -> InnerRef<A, T> { + InnerRef(self.mm.clone()) + } + + /// Return a mutable reference to the inner data. + pub fn inner_mut(self: Pin<&mut Self>) -> &mut T { + // SAFETY: This is okay because inner is not structural + unsafe { &mut self.get_unchecked_mut().inner } + } +} + +impl<A: AllocInner<T>, T> Deref for NodeData<A, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl<A: AllocInner<T>, T> Drop for NodeData<A, T> { + fn drop(&mut self) { + if self.valid { + let mut guard = self.mm.lock(); + + // Inform the user allocator that a node is being dropped. + guard + .1 + .drop_object(self.start(), self.size(), self.color(), &mut self.inner); + // SAFETY: The MM lock is still taken, so we can safely remove the node. + unsafe { bindings::drm_mm_remove_node(&mut self.node) }; + } + } +} + +/// An instance of a DRM MM range allocator. +pub struct Allocator<A: AllocInner<T>, T> { + mm: Arc<Mutex<MmInner<A, T>>>, + _p: PhantomData<T>, +} + +impl<A: AllocInner<T>, T> Allocator<A, T> { + /// Create a new range allocator for the given start and size range of addresses. + /// + /// The user may optionally provide an inner object representing allocator state, which will + /// be protected by the same lock. If not required, `()` can be used. + pub fn new( + start: u64, + size: u64, + inner: A, + name: &'static CStr, + lock_key: &'static LockClassKey, + ) -> Result<Allocator<A, T>> { + // SAFETY: We call `Mutex::init_lock` below. + let mut mm: Pin<UniqueArc<Mutex<MmInner<A, T>>>> = UniqueArc::try_new(unsafe { + Mutex::new(MmInner(Opaque::uninit(), inner, PhantomData)) + })? + .into(); + + mm.as_mut().init_lock(name, lock_key); + + unsafe { + // SAFETY: The Opaque instance provides a valid pointer, and it is initialized after + // this call. + bindings::drm_mm_init(mm.lock().0.get(), start, size); + } + + Ok(Allocator { + mm: mm.into(), + _p: PhantomData, + }) + } + + /// Insert a new node into the allocator of a given size. + /// + /// `node` is the user `T` type data to store into the node. + pub fn insert_node(&mut self, node: T, size: u64) -> Result<Node<A, T>> { + self.insert_node_generic(node, size, 0, 0, InsertMode::Best) + } + + /// Insert a new node into the allocator of a given size, with configurable alignment, + /// color, and insertion mode. + /// + /// `node` is the user `T` type data to store into the node. + pub fn insert_node_generic( + &mut self, + node: T, + size: u64, + alignment: u64, + color: usize, + mode: InsertMode, + ) -> Result<Node<A, T>> { + self.insert_node_in_range(node, size, alignment, color, 0, u64::MAX, mode) + } + + /// Insert a new node into the allocator of a given size, with configurable alignment, + /// color, insertion mode, and sub-range to allocate from. + /// + /// `node` is the user `T` type data to store into the node. + #[allow(clippy::too_many_arguments)] + pub fn insert_node_in_range( + &mut self, + node: T, + size: u64, + alignment: u64, + color: usize, + start: u64, + end: u64, + mode: InsertMode, + ) -> Result<Node<A, T>> { + let mut mm_node = Box::try_new(NodeData { + // SAFETY: This C struct should be zero-initialized. + node: unsafe { core::mem::zeroed() }, + valid: false, + inner: node, + mm: self.mm.clone(), + _pin: PhantomPinned, + })?; + + let guard = self.mm.lock(); + // SAFETY: We hold the lock and all pointers are valid. + to_result(unsafe { + bindings::drm_mm_insert_node_in_range( + guard.0.get(), + &mut mm_node.node, + size, + alignment, + color as core::ffi::c_ulong, + start, + end, + mode as u32, + ) + })?; + + mm_node.valid = true; + + Ok(Pin::from(mm_node)) + } + + /// Insert a node into the allocator at a fixed start address. + /// + /// `node` is the user `T` type data to store into the node. + pub fn reserve_node( + &mut self, + node: T, + start: u64, + size: u64, + color: usize, + ) -> Result<Node<A, T>> { + let mut mm_node = Box::try_new(NodeData { + // SAFETY: This C struct should be zero-initialized. + node: unsafe { core::mem::zeroed() }, + valid: false, + inner: node, + mm: self.mm.clone(), + _pin: PhantomPinned, + })?; + + mm_node.node.start = start; + mm_node.node.size = size; + mm_node.node.color = color as core::ffi::c_ulong; + + let guard = self.mm.lock(); + // SAFETY: We hold the lock and all pointers are valid. + to_result(unsafe { bindings::drm_mm_reserve_node(guard.0.get(), &mut mm_node.node) })?; + + mm_node.valid = true; + + Ok(Pin::from(mm_node)) + } + + /// Operate on the inner user type `A`, taking the allocator lock + pub fn with_inner<RetVal>(&self, cb: impl FnOnce(&mut A) -> RetVal) -> RetVal { + let mut guard = self.mm.lock(); + cb(&mut guard.1) + } +} + +impl<A: AllocInner<T>, T> Drop for MmInner<A, T> { + fn drop(&mut self) { + // SAFETY: If the MmInner is dropped then all nodes are gone (since they hold references), + // so it is safe to tear down the allocator. + unsafe { + bindings::drm_mm_takedown(self.0.get()); + } + } +} + +// MmInner is safely Send if the AllocInner user type is Send. +unsafe impl<A: Send + AllocInner<T>, T> Send for MmInner<A, T> {} diff --git a/rust/kernel/drm/mod.rs b/rust/kernel/drm/mod.rs new file mode 100644 index 000000000000..b1f182453ec1 --- /dev/null +++ b/rust/kernel/drm/mod.rs @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +//! DRM subsystem abstractions. + +pub mod device; +pub mod drv; +pub mod file; +pub mod gem; +pub mod ioctl; +pub mod mm; +pub mod sched; +pub mod syncobj; diff --git a/rust/kernel/drm/sched.rs b/rust/kernel/drm/sched.rs new file mode 100644 index 000000000000..a5275cc16179 --- /dev/null +++ b/rust/kernel/drm/sched.rs @@ -0,0 +1,358 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +//! DRM Scheduler +//! +//! C header: [`include/linux/drm/gpu_scheduler.h`](../../../../include/linux/drm/gpu_scheduler.h) + +use crate::{ + bindings, device, + dma_fence::*, + error::{to_result, Result}, + prelude::*, + sync::{Arc, UniqueArc}, +}; +use alloc::boxed::Box; +use core::marker::PhantomData; +use core::mem::MaybeUninit; +use core::ops::{Deref, DerefMut}; +use core::ptr::addr_of_mut; + +/// Scheduler status after timeout recovery +#[repr(u32)] +pub enum Status { + /// Device recovered from the timeout and can execute jobs again + Nominal = bindings::drm_gpu_sched_stat_DRM_GPU_SCHED_STAT_NOMINAL, + /// Device is no longer available + NoDevice = bindings::drm_gpu_sched_stat_DRM_GPU_SCHED_STAT_ENODEV, +} + +/// Scheduler priorities +#[repr(i32)] +pub enum Priority { + /// Low userspace priority + Min = bindings::drm_sched_priority_DRM_SCHED_PRIORITY_MIN, + /// Normal userspace priority + Normal = bindings::drm_sched_priority_DRM_SCHED_PRIORITY_NORMAL, + /// High userspace priority + High = bindings::drm_sched_priority_DRM_SCHED_PRIORITY_HIGH, + /// Kernel priority (highest) + Kernel = bindings::drm_sched_priority_DRM_SCHED_PRIORITY_KERNEL, +} + +/// Trait to be implemented by driver job objects. +pub trait JobImpl: Sized { + /// Called when the scheduler is considering scheduling this job next, to get another Fence + /// for this job to block on. Once it returns None, run() may be called. + fn prepare(_job: &mut Job<Self>) -> Option<Fence> { + None // Equivalent to NULL function pointer + } + + /// Called before job execution to check whether the hardware is free enough to run the job. + /// This can be used to implement more complex hardware resource policies than the hw_submission + /// limit. + fn can_run(_job: &mut Job<Self>) -> bool { + true + } + + /// Called to execute the job once all of the dependencies have been resolved. This may be + /// called multiple times, if timed_out() has happened and drm_sched_job_recovery() decides + /// to try it again. + fn run(job: &mut Job<Self>) -> Result<Option<Fence>>; + + /// Called when a job has taken too long to execute, to trigger GPU recovery. + /// + /// This method is called in a workqueue context. + fn timed_out(job: &mut Job<Self>) -> Status; +} + +unsafe extern "C" fn prepare_job_cb<T: JobImpl>( + sched_job: *mut bindings::drm_sched_job, + _s_entity: *mut bindings::drm_sched_entity, +) -> *mut bindings::dma_fence { + // SAFETY: All of our jobs are Job<T>. + let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>; + + match T::prepare(unsafe { &mut *p }) { + None => core::ptr::null_mut(), + Some(fence) => fence.into_raw(), + } +} + +unsafe extern "C" fn run_job_cb<T: JobImpl>( + sched_job: *mut bindings::drm_sched_job, +) -> *mut bindings::dma_fence { + // SAFETY: All of our jobs are Job<T>. + let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>; + + match T::run(unsafe { &mut *p }) { + Err(e) => e.to_ptr(), + Ok(None) => core::ptr::null_mut(), + Ok(Some(fence)) => fence.into_raw(), + } +} + +unsafe extern "C" fn can_run_job_cb<T: JobImpl>(sched_job: *mut bindings::drm_sched_job) -> bool { + // SAFETY: All of our jobs are Job<T>. + let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>; + + T::can_run(unsafe { &mut *p }) +} + +unsafe extern "C" fn timedout_job_cb<T: JobImpl>( + sched_job: *mut bindings::drm_sched_job, +) -> bindings::drm_gpu_sched_stat { + // SAFETY: All of our jobs are Job<T>. + let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>; + + T::timed_out(unsafe { &mut *p }) as bindings::drm_gpu_sched_stat +} + +unsafe extern "C" fn free_job_cb<T: JobImpl>(sched_job: *mut bindings::drm_sched_job) { + // SAFETY: All of our jobs are Job<T>. + let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>; + + // Convert the job back to a Box and drop it + // SAFETY: All of our Job<T>s are created inside a box. + unsafe { Box::from_raw(p) }; +} + +/// A DRM scheduler job. +pub struct Job<T: JobImpl> { + job: bindings::drm_sched_job, + inner: T, +} + +impl<T: JobImpl> Deref for Job<T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl<T: JobImpl> DerefMut for Job<T> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +impl<T: JobImpl> Drop for Job<T> { + fn drop(&mut self) { + // SAFETY: At this point the job has either been submitted and this is being called from + // `free_job_cb` above, or it hasn't and it is safe to call `drm_sched_job_cleanup`. + unsafe { bindings::drm_sched_job_cleanup(&mut self.job) }; + } +} + +/// A pending DRM scheduler job (not yet armed) +pub struct PendingJob<'a, T: JobImpl>(Box<Job<T>>, PhantomData<&'a T>); + +impl<'a, T: JobImpl> PendingJob<'a, T> { + /// Add a fence as a dependency to the job + pub fn add_dependency(&mut self, fence: Fence) -> Result { + to_result(unsafe { + bindings::drm_sched_job_add_dependency(&mut self.0.job, fence.into_raw()) + }) + } + + /// Arm the job to make it ready for execution + pub fn arm(mut self) -> ArmedJob<'a, T> { + unsafe { bindings::drm_sched_job_arm(&mut self.0.job) }; + ArmedJob(self.0, PhantomData) + } +} + +impl<'a, T: JobImpl> Deref for PendingJob<'a, T> { + type Target = Job<T>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'a, T: JobImpl> DerefMut for PendingJob<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +/// An armed DRM scheduler job (not yet submitted) +pub struct ArmedJob<'a, T: JobImpl>(Box<Job<T>>, PhantomData<&'a T>); + +impl<'a, T: JobImpl> ArmedJob<'a, T> { + /// Returns the job fences + pub fn fences(&self) -> JobFences<'_> { + JobFences(unsafe { &mut *self.0.job.s_fence }) + } + + /// Push the job for execution into the scheduler + pub fn push(self) { + // After this point, the job is submitted and owned by the scheduler + let ptr = match self { + ArmedJob(job, _) => Box::<Job<T>>::into_raw(job), + }; + + // SAFETY: We are passing in ownership of a valid Box raw pointer. + unsafe { bindings::drm_sched_entity_push_job(addr_of_mut!((*ptr).job)) }; + } +} +impl<'a, T: JobImpl> Deref for ArmedJob<'a, T> { + type Target = Job<T>; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl<'a, T: JobImpl> DerefMut for ArmedJob<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +/// Reference to the bundle of fences attached to a DRM scheduler job +pub struct JobFences<'a>(&'a mut bindings::drm_sched_fence); + +impl<'a> JobFences<'a> { + /// Returns a new reference to the job scheduled fence. + pub fn scheduled(&mut self) -> Fence { + unsafe { Fence::get_raw(&mut self.0.scheduled) } + } + + /// Returns a new reference to the job finished fence. + pub fn finished(&mut self) -> Fence { + unsafe { Fence::get_raw(&mut self.0.finished) } + } +} + +struct EntityInner<T: JobImpl> { + entity: bindings::drm_sched_entity, + // TODO: Allow users to share guilty flag between entities + sched: Arc<SchedulerInner<T>>, + guilty: bindings::atomic_t, + _p: PhantomData<T>, +} + +impl<T: JobImpl> Drop for EntityInner<T> { + fn drop(&mut self) { + // SAFETY: The EntityInner is initialized. This will cancel/free all jobs. + unsafe { bindings::drm_sched_entity_destroy(&mut self.entity) }; + } +} + +// SAFETY: TODO +unsafe impl<T: JobImpl> Sync for EntityInner<T> {} +unsafe impl<T: JobImpl> Send for EntityInner<T> {} + +/// A DRM scheduler entity. +pub struct Entity<T: JobImpl>(Pin<Box<EntityInner<T>>>); + +impl<T: JobImpl> Entity<T> { + /// Create a new scheduler entity. + pub fn new(sched: &Scheduler<T>, priority: Priority) -> Result<Self> { + let mut entity: Box<MaybeUninit<EntityInner<T>>> = Box::try_new_zeroed()?; + + let mut sched_ptr = &sched.0.sched as *const _ as *mut _; + + // SAFETY: The Box is allocated above and valid. + unsafe { + bindings::drm_sched_entity_init( + addr_of_mut!((*entity.as_mut_ptr()).entity), + priority as _, + &mut sched_ptr, + 1, + addr_of_mut!((*entity.as_mut_ptr()).guilty), + ) + }; + + // SAFETY: The Box is allocated above and valid. + unsafe { addr_of_mut!((*entity.as_mut_ptr()).sched).write(sched.0.clone()) }; + + // SAFETY: entity is now initialized. + Ok(Self(Pin::from(unsafe { entity.assume_init() }))) + } + + /// Create a new job on this entity. + /// + /// The entity must outlive the pending job until it transitions into the submitted state, + /// after which the scheduler owns it. + pub fn new_job(&self, inner: T) -> Result<PendingJob<'_, T>> { + let mut job: Box<MaybeUninit<Job<T>>> = Box::try_new_zeroed()?; + + // SAFETY: We hold a reference to the entity (which is a valid pointer), + // and the job object was just allocated above. + to_result(unsafe { + bindings::drm_sched_job_init( + addr_of_mut!((*job.as_mut_ptr()).job), + &self.0.as_ref().get_ref().entity as *const _ as *mut _, + core::ptr::null_mut(), + ) + })?; + + // SAFETY: The Box pointer is valid, and this initializes the inner member. + unsafe { addr_of_mut!((*job.as_mut_ptr()).inner).write(inner) }; + + // SAFETY: All fields of the Job<T> are now initialized. + Ok(PendingJob(unsafe { job.assume_init() }, PhantomData)) + } +} + +/// DRM scheduler inner data +pub struct SchedulerInner<T: JobImpl> { + sched: bindings::drm_gpu_scheduler, + _p: PhantomData<T>, +} + +impl<T: JobImpl> Drop for SchedulerInner<T> { + fn drop(&mut self) { + // SAFETY: The scheduler is valid. This assumes drm_sched_fini() will take care of + // freeing all in-progress jobs. + unsafe { bindings::drm_sched_fini(&mut self.sched) }; + } +} + +// SAFETY: TODO +unsafe impl<T: JobImpl> Sync for SchedulerInner<T> {} +unsafe impl<T: JobImpl> Send for SchedulerInner<T> {} + +/// A DRM Scheduler +pub struct Scheduler<T: JobImpl>(Arc<SchedulerInner<T>>); + +impl<T: JobImpl> Scheduler<T> { + const OPS: bindings::drm_sched_backend_ops = bindings::drm_sched_backend_ops { + prepare_job: Some(prepare_job_cb::<T>), + can_run_job: Some(can_run_job_cb::<T>), + run_job: Some(run_job_cb::<T>), + timedout_job: Some(timedout_job_cb::<T>), + free_job: Some(free_job_cb::<T>), + }; + /// Creates a new DRM Scheduler object + // TODO: Shared timeout workqueues & scores + pub fn new( + device: &impl device::RawDevice, + hw_submission: u32, + hang_limit: u32, + timeout_ms: usize, + name: &'static CStr, + ) -> Result<Scheduler<T>> { + let mut sched: UniqueArc<MaybeUninit<SchedulerInner<T>>> = UniqueArc::try_new_uninit()?; + + // SAFETY: The drm_sched pointer is valid and pinned as it was just allocated above. + to_result(unsafe { + bindings::drm_sched_init( + addr_of_mut!((*sched.as_mut_ptr()).sched), + &Self::OPS, + hw_submission, + hang_limit, + bindings::msecs_to_jiffies(timeout_ms.try_into()?).try_into()?, + core::ptr::null_mut(), + core::ptr::null_mut(), + name.as_char_ptr(), + device.raw_device(), + ) + })?; + + // SAFETY: All fields of SchedulerInner are now initialized. + Ok(Scheduler(unsafe { sched.assume_init() }.into())) + } +} diff --git a/rust/kernel/drm/syncobj.rs b/rust/kernel/drm/syncobj.rs new file mode 100644 index 000000000000..10eed05eb27a --- /dev/null +++ b/rust/kernel/drm/syncobj.rs @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT + +//! DRM Sync Objects +//! +//! C header: [`include/linux/drm/drm_syncobj.h`](../../../../include/linux/drm/drm_syncobj.h) + +use crate::{bindings, dma_fence::*, drm, error::Result, prelude::*}; + +/// A DRM Sync Object +/// +/// # Invariants +/// ptr is a valid pointer to a drm_syncobj and we own a reference to it. +pub struct SyncObj { + ptr: *mut bindings::drm_syncobj, +} + +impl SyncObj { + /// Looks up a sync object by its handle for a given `File`. + pub fn lookup_handle(file: &impl drm::file::GenericFile, handle: u32) -> Result<SyncObj> { + // SAFETY: The arguments are all valid per the type invariants. + let ptr = unsafe { bindings::drm_syncobj_find(file.raw() as *mut _, handle) }; + + if ptr.is_null() { + Err(ENOENT) + } else { + Ok(SyncObj { ptr }) + } + } + + /// Returns the DMA fence associated with this sync object, if any. + pub fn fence_get(&self) -> Option<Fence> { + let fence = unsafe { bindings::drm_syncobj_fence_get(self.ptr) }; + if fence.is_null() { + None + } else { + // SAFETY: The pointer is non-NULL and drm_syncobj_fence_get acquired an + // additional reference. + Some(unsafe { Fence::from_raw(fence) }) + } + } + + /// Replaces the DMA fence with a new one, or removes it if fence is None. + pub fn replace_fence(&self, fence: Option<&Fence>) { + unsafe { + bindings::drm_syncobj_replace_fence( + self.ptr, + fence.map_or(core::ptr::null_mut(), |a| a.raw()), + ) + }; + } + + /// Adds a new timeline point to the syncobj. + pub fn add_point(&self, chain: FenceChain, fence: &Fence, point: u64) { + // SAFETY: All arguments should be valid per the respective type invariants. + // This takes over the FenceChain ownership. + unsafe { bindings::drm_syncobj_add_point(self.ptr, chain.into_raw(), fence.raw(), point) }; + } +} + +impl Drop for SyncObj { + fn drop(&mut self) { + // SAFETY: We own a reference to this syncobj. + unsafe { bindings::drm_syncobj_put(self.ptr) }; + } +} + +impl Clone for SyncObj { + fn clone(&self) -> Self { + // SAFETY: `ptr` is valid per the type invariant and we own a reference to it. + unsafe { bindings::drm_syncobj_get(self.ptr) }; + SyncObj { ptr: self.ptr } + } +} + +// SAFETY: drm_syncobj operations are internally locked. +unsafe impl Sync for SyncObj {} +unsafe impl Send for SyncObj {} diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs index 5b9751d7ff1d..9b9f5e479207 100644 --- a/rust/kernel/error.rs +++ b/rust/kernel/error.rs @@ -4,12 +4,15 @@ //! //! C header: [`include/uapi/asm-generic/errno-base.h`](../../../include/uapi/asm-generic/errno-base.h) +use crate::str::CStr; + use alloc::{ alloc::{AllocError, LayoutError}, collections::TryReserveError, }; use core::convert::From; +use core::fmt; use core::num::TryFromIntError; use core::str::Utf8Error; @@ -58,6 +61,117 @@ pub mod code { declare_err!(EPIPE, "Broken pipe."); declare_err!(EDOM, "Math argument out of domain of func."); declare_err!(ERANGE, "Math result not representable."); + declare_err!(EDEADLK, "Resource deadlock would occur"); + declare_err!(ENAMETOOLONG, "File name too long"); + declare_err!(ENOLCK, "No record locks available"); + declare_err!( + ENOSYS, + "Invalid system call number.", + "", + "This error code is special: arch syscall entry code will return", + "[`ENOSYS`] if users try to call a syscall that doesn't exist.", + "To keep failures of syscalls that really do exist distinguishable from", + "failures due to attempts to use a nonexistent syscall, syscall", + "implementations should refrain from returning [`ENOSYS`]." + ); + declare_err!(ENOTEMPTY, "Directory not empty."); + declare_err!(ELOOP, "Too many symbolic links encountered."); + declare_err!(EWOULDBLOCK, "Operation would block."); + declare_err!(ENOMSG, "No message of desired type."); + declare_err!(EIDRM, "Identifier removed."); + declare_err!(ECHRNG, "Channel number out of range."); + declare_err!(EL2NSYNC, "Level 2 not synchronized."); + declare_err!(EL3HLT, "Level 3 halted."); + declare_err!(EL3RST, "Level 3 reset."); + declare_err!(ELNRNG, "Link number out of range."); + declare_err!(EUNATCH, "Protocol driver not attached."); + declare_err!(ENOCSI, "No CSI structure available."); + declare_err!(EL2HLT, "Level 2 halted."); + declare_err!(EBADE, "Invalid exchange."); + declare_err!(EBADR, "Invalid request descriptor."); + declare_err!(EXFULL, "Exchange full."); + declare_err!(ENOANO, "No anode."); + declare_err!(EBADRQC, "Invalid request code."); + declare_err!(EBADSLT, "Invalid slot."); + declare_err!(EDEADLOCK, "Resource deadlock would occur."); + declare_err!(EBFONT, "Bad font file format."); + declare_err!(ENOSTR, "Device not a stream."); + declare_err!(ENODATA, "No data available."); + declare_err!(ETIME, "Timer expired."); + declare_err!(ENOSR, "Out of streams resources."); + declare_err!(ENONET, "Machine is not on the network."); + declare_err!(ENOPKG, "Package not installed."); + declare_err!(EREMOTE, "Object is remote."); + declare_err!(ENOLINK, "Link has been severed."); + declare_err!(EADV, "Advertise error."); + declare_err!(ESRMNT, "Srmount error."); + declare_err!(ECOMM, "Communication error on send."); + declare_err!(EPROTO, "Protocol error."); + declare_err!(EMULTIHOP, "Multihop attempted."); + declare_err!(EDOTDOT, "RFS specific error."); + declare_err!(EBADMSG, "Not a data message."); + declare_err!(EOVERFLOW, "Value too large for defined data type."); + declare_err!(ENOTUNIQ, "Name not unique on network."); + declare_err!(EBADFD, "File descriptor in bad state."); + declare_err!(EREMCHG, "Remote address changed."); + declare_err!(ELIBACC, "Can not access a needed shared library."); + declare_err!(ELIBBAD, "Accessing a corrupted shared library."); + declare_err!(ELIBSCN, ".lib section in a.out corrupted."); + declare_err!(ELIBMAX, "Attempting to link in too many shared libraries."); + declare_err!(ELIBEXEC, "Cannot exec a shared library directly."); + declare_err!(EILSEQ, "Illegal byte sequence."); + declare_err!(ERESTART, "Interrupted system call should be restarted."); + declare_err!(ESTRPIPE, "Streams pipe error."); + declare_err!(EUSERS, "Too many users."); + declare_err!(ENOTSOCK, "Socket operation on non-socket."); + declare_err!(EDESTADDRREQ, "Destination address required."); + declare_err!(EMSGSIZE, "Message too long."); + declare_err!(EPROTOTYPE, "Protocol wrong type for socket."); + declare_err!(ENOPROTOOPT, "Protocol not available."); + declare_err!(EPROTONOSUPPORT, "Protocol not supported."); + declare_err!(ESOCKTNOSUPPORT, "Socket type not supported."); + declare_err!(EOPNOTSUPP, "Operation not supported on transport endpoint."); + declare_err!(EPFNOSUPPORT, "Protocol family not supported."); + declare_err!(EAFNOSUPPORT, "Address family not supported by protocol."); + declare_err!(EADDRINUSE, "Address already in use."); + declare_err!(EADDRNOTAVAIL, "Cannot assign requested address."); + declare_err!(ENETDOWN, "Network is down."); + declare_err!(ENETUNREACH, "Network is unreachable."); + declare_err!(ENETRESET, "Network dropped connection because of reset."); + declare_err!(ECONNABORTED, "Software caused connection abort."); + declare_err!(ECONNRESET, "Connection reset by peer."); + declare_err!(ENOBUFS, "No buffer space available."); + declare_err!(EISCONN, "Transport endpoint is already connected."); + declare_err!(ENOTCONN, "Transport endpoint is not connected."); + declare_err!(ESHUTDOWN, "Cannot send after transport endpoint shutdown."); + declare_err!(ETOOMANYREFS, "Too many references: cannot splice."); + declare_err!(ETIMEDOUT, "Connection timed out."); + declare_err!(ECONNREFUSED, "Connection refused."); + declare_err!(EHOSTDOWN, "Host is down."); + declare_err!(EHOSTUNREACH, "No route to host."); + declare_err!(EALREADY, "Operation already in progress."); + declare_err!(EINPROGRESS, "Operation now in progress."); + declare_err!(ESTALE, "Stale file handle."); + declare_err!(EUCLEAN, "Structure needs cleaning."); + declare_err!(ENOTNAM, "Not a XENIX named type file."); + declare_err!(ENAVAIL, "No XENIX semaphores available."); + declare_err!(EISNAM, "Is a named type file."); + declare_err!(EREMOTEIO, "Remote I/O error."); + declare_err!(EDQUOT, "Quota exceeded."); + declare_err!(ENOMEDIUM, "No medium found."); + declare_err!(EMEDIUMTYPE, "Wrong medium type."); + declare_err!(ECANCELED, "Operation Canceled."); + declare_err!(ENOKEY, "Required key not available."); + declare_err!(EKEYEXPIRED, "Key has expired."); + declare_err!(EKEYREVOKED, "Key has been revoked."); + declare_err!(EKEYREJECTED, "Key was rejected by service."); + declare_err!(EOWNERDEAD, "Owner died.", "", "For robust mutexes."); + declare_err!(ENOTRECOVERABLE, "State not recoverable."); + declare_err!(ERFKILL, "Operation not possible due to RF-kill."); + declare_err!(EHWPOISON, "Memory page has hardware error."); + declare_err!(ERESTARTSYS, "Restart the system call."); + declare_err!(ENOTSUPP, "Operation is not supported."); + declare_err!(ENOPARAM, "Parameter not supported."); } /// Generic integer kernel error. @@ -72,10 +186,72 @@ pub mod code { pub struct Error(core::ffi::c_int); impl Error { + /// Creates an [`Error`] from a kernel error code. + /// + /// It is a bug to pass an out-of-range `errno`. `EINVAL` would + /// be returned in such a case. + pub(crate) fn from_kernel_errno(errno: core::ffi::c_int) -> Error { + if errno < -(bindings::MAX_ERRNO as i32) || errno >= 0 { + // TODO: Make it a `WARN_ONCE` once available. + crate::pr_warn!( + "attempted to create `Error` with out of range `errno`: {}", + errno + ); + return code::EINVAL; + } + + // INVARIANT: The check above ensures the type invariant + // will hold. + Error(errno) + } + /// Returns the kernel error code. pub fn to_kernel_errno(self) -> core::ffi::c_int { self.0 } + + /// Returns the error encoded as a pointer. + #[allow(dead_code)] + pub(crate) fn to_ptr<T>(self) -> *mut T { + // SAFETY: Valid as long as self.0 is a valid error + unsafe { bindings::ERR_PTR(self.0.into()) as *mut _ } + } + + /// Returns a string representing the error, if one exists. + #[cfg(not(testlib))] + pub fn name(&self) -> Option<&'static CStr> { + // SAFETY: Just an FFI call, there are no extra safety requirements. + let ptr = unsafe { bindings::errname(-self.0) }; + if ptr.is_null() { + None + } else { + // SAFETY: The string returned by `errname` is static and `NUL`-terminated. + Some(unsafe { CStr::from_char_ptr(ptr) }) + } + } + + /// Returns a string representing the error, if one exists. + /// + /// When `testlib` is configured, this always returns `None` to avoid the dependency on a + /// kernel function so that tests that use this (e.g., by calling [`Result::unwrap`]) can still + /// run in userspace. + #[cfg(testlib)] + pub fn name(&self) -> Option<&'static CStr> { + None + } +} + +impl fmt::Debug for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.name() { + // Print out number if no name can be found. + None => f.debug_tuple("Error").field(&-self.0).finish(), + // SAFETY: These strings are ASCII-only. + Some(name) => f + .debug_tuple(unsafe { core::str::from_utf8_unchecked(name) }) + .finish(), + } + } } impl From<AllocError> for Error { @@ -141,3 +317,108 @@ impl From<core::convert::Infallible> for Error { /// it should still be modeled as returning a `Result` rather than /// just an [`Error`]. pub type Result<T = ()> = core::result::Result<T, Error>; + +/// Converts an integer as returned by a C kernel function to an error if it's negative, and +/// `Ok(())` otherwise. +pub fn to_result(err: core::ffi::c_int) -> Result { + if err < 0 { + Err(Error::from_kernel_errno(err)) + } else { + Ok(()) + } +} + +/// Transform a kernel "error pointer" to a normal pointer. +/// +/// Some kernel C API functions return an "error pointer" which optionally +/// embeds an `errno`. Callers are supposed to check the returned pointer +/// for errors. This function performs the check and converts the "error pointer" +/// to a normal pointer in an idiomatic fashion. +/// +/// # Examples +/// +/// ```ignore +/// # use kernel::from_kernel_err_ptr; +/// # use kernel::bindings; +/// fn devm_platform_ioremap_resource( +/// pdev: &mut PlatformDevice, +/// index: u32, +/// ) -> Result<*mut core::ffi::c_void> { +/// // SAFETY: FFI call. +/// unsafe { +/// from_kernel_err_ptr(bindings::devm_platform_ioremap_resource( +/// pdev.to_ptr(), +/// index, +/// )) +/// } +/// } +/// ``` +// TODO: Remove `dead_code` marker once an in-kernel client is available. +#[allow(dead_code)] +pub(crate) fn from_kernel_err_ptr<T>(ptr: *mut T) -> Result<*mut T> { + // CAST: Casting a pointer to `*const core::ffi::c_void` is always valid. + let const_ptr: *const core::ffi::c_void = ptr.cast(); + // SAFETY: The FFI function does not deref the pointer. + if unsafe { bindings::IS_ERR(const_ptr) } { + // SAFETY: The FFI function does not deref the pointer. + let err = unsafe { bindings::PTR_ERR(const_ptr) }; + // CAST: If `IS_ERR()` returns `true`, + // then `PTR_ERR()` is guaranteed to return a + // negative value greater-or-equal to `-bindings::MAX_ERRNO`, + // which always fits in an `i16`, as per the invariant above. + // And an `i16` always fits in an `i32`. So casting `err` to + // an `i32` can never overflow, and is always valid. + // + // SAFETY: `IS_ERR()` ensures `err` is a + // negative value greater-or-equal to `-bindings::MAX_ERRNO`. + #[cfg_attr(CONFIG_ARM, allow(clippy::unnecessary_cast))] + return Err(Error(err as i32)); + } + Ok(ptr) +} + +pub(crate) fn from_kernel_result_helper<T>(r: Result<T>) -> T +where + T: From<i16>, +{ + match r { + Ok(v) => v, + // NO-OVERFLOW: negative `errno`s are no smaller than `-bindings::MAX_ERRNO`, + // `-bindings::MAX_ERRNO` fits in an `i16` as per invariant above, + // therefore a negative `errno` always fits in an `i16` and will not overflow. + Err(e) => T::from(e.to_kernel_errno() as i16), + } +} + +/// Transforms a [`crate::error::Result<T>`] to a kernel C integer result. +/// +/// This is useful when calling Rust functions that return [`crate::error::Result<T>`] +/// from inside `extern "C"` functions that need to return an integer +/// error result. +/// +/// `T` should be convertible to an `i16` via `From<i16>`. +/// +/// # Examples +/// +/// ```ignore +/// # use kernel::from_kernel_result; +/// # use kernel::bindings; +/// unsafe extern "C" fn probe_callback( +/// pdev: *mut bindings::platform_device, +/// ) -> core::ffi::c_int { +/// from_kernel_result! { +/// let ptr = devm_alloc(pdev)?; +/// bindings::platform_set_drvdata(pdev, ptr); +/// Ok(0) +/// } +/// } +/// ``` +macro_rules! from_kernel_result { + ($($tt:tt)*) => {{ + $crate::error::from_kernel_result_helper((|| { + $($tt)* + })()) + }}; +} + +pub(crate) use from_kernel_result; diff --git a/rust/kernel/io_buffer.rs b/rust/kernel/io_buffer.rs new file mode 100644 index 000000000000..d5a258a5ff8f --- /dev/null +++ b/rust/kernel/io_buffer.rs @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Buffers used in IO. + +use crate::error::Result; +use alloc::vec::Vec; +use core::mem::{size_of, MaybeUninit}; + +/// Represents a buffer to be read from during IO. +pub trait IoBufferReader { + /// Returns the number of bytes left to be read from the io buffer. + /// + /// Note that even reading less than this number of bytes may fail. + fn len(&self) -> usize; + + /// Returns `true` if no data is available in the io buffer. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Reads raw data from the io buffer into a raw kernel buffer. + /// + /// # Safety + /// + /// The output buffer must be valid. + unsafe fn read_raw(&mut self, out: *mut u8, len: usize) -> Result; + + /// Reads all data remaining in the io buffer. + /// + /// Returns `EFAULT` if the address does not currently point to mapped, readable memory. + fn read_all(&mut self) -> Result<Vec<u8>> { + let mut data = Vec::<u8>::new(); + data.try_resize(self.len(), 0)?; + + // SAFETY: The output buffer is valid as we just allocated it. + unsafe { self.read_raw(data.as_mut_ptr(), data.len())? }; + Ok(data) + } + + /// Reads a byte slice from the io buffer. + /// + /// Returns `EFAULT` if the byte slice is bigger than the remaining size of the user slice or + /// if the address does not currently point to mapped, readable memory. + fn read_slice(&mut self, data: &mut [u8]) -> Result { + // SAFETY: The output buffer is valid as it's coming from a live reference. + unsafe { self.read_raw(data.as_mut_ptr(), data.len()) } + } + + /// Reads the contents of a plain old data (POD) type from the io buffer. + fn read<T: ReadableFromBytes>(&mut self) -> Result<T> { + let mut out = MaybeUninit::<T>::uninit(); + // SAFETY: The buffer is valid as it was just allocated. + unsafe { self.read_raw(out.as_mut_ptr() as _, size_of::<T>()) }?; + // SAFETY: We just initialised the data. + Ok(unsafe { out.assume_init() }) + } +} + +/// Represents a buffer to be written to during IO. +pub trait IoBufferWriter { + /// Returns the number of bytes left to be written into the io buffer. + /// + /// Note that even writing less than this number of bytes may fail. + fn len(&self) -> usize; + + /// Returns `true` if the io buffer cannot hold any additional data. + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Writes zeroes to the io buffer. + /// + /// Differently from the other write functions, `clear` will zero as much as it can and update + /// the writer internal state to reflect this. It will, however, return an error if it cannot + /// clear `len` bytes. + /// + /// For example, if a caller requests that 100 bytes be cleared but a segfault happens after + /// 20 bytes, then EFAULT is returned and the writer is advanced by 20 bytes. + fn clear(&mut self, len: usize) -> Result; + + /// Writes a byte slice into the io buffer. + /// + /// Returns `EFAULT` if the byte slice is bigger than the remaining size of the io buffer or if + /// the address does not currently point to mapped, writable memory. + fn write_slice(&mut self, data: &[u8]) -> Result { + // SAFETY: The input buffer is valid as it's coming from a live reference. + unsafe { self.write_raw(data.as_ptr(), data.len()) } + } + + /// Writes raw data to the io buffer from a raw kernel buffer. + /// + /// # Safety + /// + /// The input buffer must be valid. + unsafe fn write_raw(&mut self, data: *const u8, len: usize) -> Result; + + /// Writes the contents of the given data into the io buffer. + fn write<T: WritableToBytes>(&mut self, data: &T) -> Result { + // SAFETY: The input buffer is valid as it's coming from a live + // reference to a type that implements `WritableToBytes`. + unsafe { self.write_raw(data as *const T as _, size_of::<T>()) } + } +} + +/// Specifies that a type is safely readable from byte slices. +/// +/// Not all types can be safely read from byte slices; examples from +/// <https://doc.rust-lang.org/reference/behavior-considered-undefined.html> include `bool` +/// that must be either `0` or `1`, and `char` that cannot be a surrogate or above `char::MAX`. +/// +/// # Safety +/// +/// Implementers must ensure that the type is made up only of types that can be safely read from +/// arbitrary byte sequences (e.g., `u32`, `u64`, etc.). +pub unsafe trait ReadableFromBytes {} + +// SAFETY: All bit patterns are acceptable values of the types below. +unsafe impl ReadableFromBytes for u8 {} +unsafe impl ReadableFromBytes for u16 {} +unsafe impl ReadableFromBytes for u32 {} +unsafe impl ReadableFromBytes for u64 {} +unsafe impl ReadableFromBytes for usize {} +unsafe impl ReadableFromBytes for i8 {} +unsafe impl ReadableFromBytes for i16 {} +unsafe impl ReadableFromBytes for i32 {} +unsafe impl ReadableFromBytes for i64 {} +unsafe impl ReadableFromBytes for isize {} + +/// Specifies that a type is safely writable to byte slices. +/// +/// This means that we don't read undefined values (which leads to UB) in preparation for writing +/// to the byte slice. It also ensures that no potentially sensitive information is leaked into the +/// byte slices. +/// +/// # Safety +/// +/// A type must not include padding bytes and must be fully initialised to safely implement +/// [`WritableToBytes`] (i.e., it doesn't contain [`MaybeUninit`] fields). A composition of +/// writable types in a structure is not necessarily writable because it may result in padding +/// bytes. +pub unsafe trait WritableToBytes {} + +// SAFETY: Initialised instances of the following types have no uninitialised portions. +unsafe impl WritableToBytes for u8 {} +unsafe impl WritableToBytes for u16 {} +unsafe impl WritableToBytes for u32 {} +unsafe impl WritableToBytes for u64 {} +unsafe impl WritableToBytes for usize {} +unsafe impl WritableToBytes for i8 {} +unsafe impl WritableToBytes for i16 {} +unsafe impl WritableToBytes for i32 {} +unsafe impl WritableToBytes for i64 {} +unsafe impl WritableToBytes for isize {} diff --git a/rust/kernel/io_mem.rs b/rust/kernel/io_mem.rs new file mode 100644 index 000000000000..5bb8800b04f5 --- /dev/null +++ b/rust/kernel/io_mem.rs @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Memory-mapped IO. +//! +//! C header: [`include/asm-generic/io.h`](../../../../include/asm-generic/io.h) + +#![allow(dead_code)] + +use crate::{bindings, error::code::*, error::Result}; +use core::convert::TryInto; + +/// The type of `Resource`. +pub enum IoResource { + /// i/o memory + Mem = bindings::IORESOURCE_MEM as _, +} + +/// Represents a memory resource. +pub struct Resource { + offset: bindings::resource_size_t, + size: bindings::resource_size_t, + flags: core::ffi::c_ulong, +} + +impl Resource { + pub(crate) fn new( + start: bindings::resource_size_t, + end: bindings::resource_size_t, + flags: core::ffi::c_ulong, + ) -> Option<Self> { + if start == 0 { + return None; + } + Some(Self { + offset: start, + size: end.checked_sub(start)?.checked_add(1)?, + flags, + }) + } +} + +/// Represents a memory block of at least `SIZE` bytes. +/// +/// # Invariants +/// +/// `ptr` is a non-null and valid address of at least `SIZE` bytes and returned by an `ioremap` +/// variant. `ptr` is also 8-byte aligned. +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// use kernel::io_mem::{IoMem, Resource}; +/// +/// fn test(res: Resource) -> Result { +/// // Create an io mem block of at least 100 bytes. +/// // SAFETY: No DMA operations are initiated through `mem`. +/// let mem = unsafe { IoMem::<100>::try_new(res) }?; +/// +/// // Read one byte from offset 10. +/// let v = mem.readb(10); +/// +/// // Write value to offset 20. +/// mem.writeb(v, 20); +/// +/// Ok(()) +/// } +/// ``` +pub struct IoMem<const SIZE: usize> { + ptr: usize, +} + +macro_rules! define_read { + ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => { + /// Reads IO data from the given offset known, at compile time. + /// + /// If the offset is not known at compile time, the build will fail. + $(#[$attr])* + #[inline] + pub fn $name(&self, offset: usize) -> $type_name { + Self::check_offset::<$type_name>(offset); + let ptr = self.ptr.wrapping_add(offset); + // SAFETY: The type invariants guarantee that `ptr` is a valid pointer. The check above + // guarantees that the code won't build if `offset` makes the read go out of bounds + // (including the type size). + unsafe { bindings::$name(ptr as _) } + } + + /// Reads IO data from the given offset. + /// + /// It fails if/when the offset (plus the type size) is out of bounds. + $(#[$attr])* + pub fn $try_name(&self, offset: usize) -> Result<$type_name> { + if !Self::offset_ok::<$type_name>(offset) { + return Err(EINVAL); + } + let ptr = self.ptr.wrapping_add(offset); + // SAFETY: The type invariants guarantee that `ptr` is a valid pointer. The check above + // returns an error if `offset` would make the read go out of bounds (including the + // type size). + Ok(unsafe { bindings::$name(ptr as _) }) + } + }; +} + +macro_rules! define_write { + ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => { + /// Writes IO data to the given offset, known at compile time. + /// + /// If the offset is not known at compile time, the build will fail. + $(#[$attr])* + #[inline] + pub fn $name(&self, value: $type_name, offset: usize) { + Self::check_offset::<$type_name>(offset); + let ptr = self.ptr.wrapping_add(offset); + // SAFETY: The type invariants guarantee that `ptr` is a valid pointer. The check above + // guarantees that the code won't link if `offset` makes the write go out of bounds + // (including the type size). + unsafe { bindings::$name(value, ptr as _) } + } + + /// Writes IO data to the given offset. + /// + /// It fails if/when the offset (plus the type size) is out of bounds. + $(#[$attr])* + pub fn $try_name(&self, value: $type_name, offset: usize) -> Result { + if !Self::offset_ok::<$type_name>(offset) { + return Err(EINVAL); + } + let ptr = self.ptr.wrapping_add(offset); + // SAFETY: The type invariants guarantee that `ptr` is a valid pointer. The check above + // returns an error if `offset` would make the write go out of bounds (including the + // type size). + unsafe { bindings::$name(value, ptr as _) }; + Ok(()) + } + }; +} + +impl<const SIZE: usize> IoMem<SIZE> { + /// Tries to create a new instance of a memory block. + /// + /// The resource described by `res` is mapped into the CPU's address space so that it can be + /// accessed directly. It is also consumed by this function so that it can't be mapped again + /// to a different address. + /// + /// # Safety + /// + /// Callers must ensure that either (a) the resulting interface cannot be used to initiate DMA + /// operations, or (b) that DMA operations initiated via the returned interface use DMA handles + /// allocated through the `dma` module. + pub unsafe fn try_new(res: Resource) -> Result<Self> { + // Check that the resource has at least `SIZE` bytes in it. + if res.size < SIZE.try_into()? { + return Err(EINVAL); + } + + // To be able to check pointers at compile time based only on offsets, we need to guarantee + // that the base pointer is minimally aligned. So we conservatively expect at least 8 bytes. + if res.offset % 8 != 0 { + crate::pr_err!("Physical address is not 64-bit aligned: {:x}", res.offset); + return Err(EDOM); + } + + // Try to map the resource. + // SAFETY: Just mapping the memory range. + let addr = if res.flags & (bindings::IORESOURCE_MEM_NONPOSTED as core::ffi::c_ulong) != 0 { + unsafe { bindings::ioremap_np(res.offset, res.size as _) } + } else { + unsafe { bindings::ioremap(res.offset, res.size as _) } + }; + + if addr.is_null() { + Err(ENOMEM) + } else { + // INVARIANT: `addr` is non-null and was returned by `ioremap`, so it is valid. It is + // also 8-byte aligned because we checked it above. + Ok(Self { ptr: addr as usize }) + } + } + + #[inline] + const fn offset_ok<T>(offset: usize) -> bool { + let type_size = core::mem::size_of::<T>(); + if let Some(end) = offset.checked_add(type_size) { + end <= SIZE && offset % type_size == 0 + } else { + false + } + } + + fn offset_ok_of_val<T: ?Sized>(offset: usize, value: &T) -> bool { + let value_size = core::mem::size_of_val(value); + let value_alignment = core::mem::align_of_val(value); + if let Some(end) = offset.checked_add(value_size) { + end <= SIZE && offset % value_alignment == 0 + } else { + false + } + } + + #[inline] + const fn check_offset<T>(offset: usize) { + crate::build_assert!(Self::offset_ok::<T>(offset), "IoMem offset overflow"); + } + + /// Copy memory block from an i/o memory by filling the specified buffer with it. + /// + /// # Examples + /// ``` + /// use kernel::io_mem::{self, IoMem, Resource}; + /// + /// fn test(res: Resource) -> Result { + /// // Create an i/o memory block of at least 100 bytes. + /// let mem = unsafe { IoMem::<100>::try_new(res) }?; + /// + /// let mut buffer: [u8; 32] = [0; 32]; + /// + /// // Memcpy 16 bytes from an offset 10 of i/o memory block into the buffer. + /// mem.try_memcpy_fromio(&mut buffer[..16], 10)?; + /// + /// Ok(()) + /// } + /// ``` + pub fn try_memcpy_fromio(&self, buffer: &mut [u8], offset: usize) -> Result { + if !Self::offset_ok_of_val(offset, buffer) { + return Err(EINVAL); + } + + let ptr = self.ptr.wrapping_add(offset); + + // SAFETY: + // - The type invariants guarantee that `ptr` is a valid pointer. + // - The bounds of `buffer` are checked with a call to `offset_ok_of_val()`. + unsafe { + bindings::memcpy_fromio( + buffer.as_mut_ptr() as *mut _, + ptr as *const _, + buffer.len() as _, + ) + }; + Ok(()) + } + + define_read!(readb, try_readb, u8); + define_read!(readw, try_readw, u16); + define_read!(readl, try_readl, u32); + define_read!( + #[cfg(CONFIG_64BIT)] + readq, + try_readq, + u64 + ); + + define_read!(readb_relaxed, try_readb_relaxed, u8); + define_read!(readw_relaxed, try_readw_relaxed, u16); + define_read!(readl_relaxed, try_readl_relaxed, u32); + define_read!( + #[cfg(CONFIG_64BIT)] + readq_relaxed, + try_readq_relaxed, + u64 + ); + + define_write!(writeb, try_writeb, u8); + define_write!(writew, try_writew, u16); + define_write!(writel, try_writel, u32); + define_write!( + #[cfg(CONFIG_64BIT)] + writeq, + try_writeq, + u64 + ); + + define_write!(writeb_relaxed, try_writeb_relaxed, u8); + define_write!(writew_relaxed, try_writew_relaxed, u16); + define_write!(writel_relaxed, try_writel_relaxed, u32); + define_write!( + #[cfg(CONFIG_64BIT)] + writeq_relaxed, + try_writeq_relaxed, + u64 + ); +} + +impl<const SIZE: usize> Drop for IoMem<SIZE> { + fn drop(&mut self) { + // SAFETY: By the type invariant, `self.ptr` is a value returned by a previous successful + // call to `ioremap`. + unsafe { bindings::iounmap(self.ptr as _) }; + } +} diff --git a/rust/kernel/io_pgtable.rs b/rust/kernel/io_pgtable.rs new file mode 100644 index 000000000000..4c90adefcd24 --- /dev/null +++ b/rust/kernel/io_pgtable.rs @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! IOMMU page table management +//! +//! C header: [`include/io-pgtable.h`](../../../../include/io-pgtable.h) + +use crate::{ + bindings, device, + error::{code::*, to_result, Result}, + types::{ForeignOwnable, ScopeGuard}, +}; + +use core::marker::PhantomData; +use core::mem; +use core::num::NonZeroU64; + +/// Protection flags used with IOMMU mappings. +pub mod prot { + /// Read access. + pub const READ: u32 = bindings::IOMMU_READ; + /// Write access. + pub const WRITE: u32 = bindings::IOMMU_WRITE; + /// Request cache coherency. + pub const CACHE: u32 = bindings::IOMMU_CACHE; + /// Request no-execute permission. + pub const NOEXEC: u32 = bindings::IOMMU_NOEXEC; + /// MMIO peripheral mapping. + pub const MMIO: u32 = bindings::IOMMU_MMIO; + /// Privileged mapping. + pub const PRIV: u32 = bindings::IOMMU_PRIV; +} + +/// Represents a requested io_pgtable configuration. +pub struct Config { + /// Quirk bitmask (type-specific). + pub quirks: usize, + /// Valid page sizes, as a bitmask of powers of two. + pub pgsize_bitmap: usize, + /// Input address space size in bits. + pub ias: usize, + /// Output address space size in bits. + pub oas: usize, + /// IOMMU uses coherent accesses for page table walks. + pub coherent_walk: bool, +} + +/// IOMMU callbacks for TLB and page table management. +/// +/// Users must implement this trait to perform the TLB flush actions for this IOMMU, if +/// required. +pub trait FlushOps { + /// User-specified type owned by the IOPagetable that will be passed to TLB operations. + type Data: ForeignOwnable + Send + Sync; + + /// Synchronously invalidate the entire TLB context. + fn tlb_flush_all(data: <Self::Data as ForeignOwnable>::Borrowed<'_>); + + /// Synchronously invalidate all intermediate TLB state (sometimes referred to as the "walk + /// cache") for a virtual address range. + fn tlb_flush_walk( + data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + iova: usize, + size: usize, + granule: usize, + ); + + /// Optional callback to queue up leaf TLB invalidation for a single page. + /// + /// IOMMUs that cannot batch TLB invalidation operations efficiently will typically issue + /// them here, but others may decide to update the iommu_iotlb_gather structure and defer + /// the invalidation until iommu_iotlb_sync() instead. + /// + /// TODO: Implement the gather argument for batching. + fn tlb_add_page( + data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + iova: usize, + granule: usize, + ); +} + +/// Inner page table info shared across all table types. +/// # Invariants +/// +/// - [`self.ops`] is valid and non-null. +/// - [`self.cfg`] is valid and non-null. +#[doc(hidden)] +pub struct IoPageTableInner { + ops: *mut bindings::io_pgtable_ops, + cfg: bindings::io_pgtable_cfg, + data: *mut core::ffi::c_void, +} + +/// Helper trait to get the config type for a single page table type from the union. +pub trait GetConfig { + /// Returns the specific output configuration for this page table type. + fn cfg(iopt: &impl IoPageTable) -> &Self + where + Self: Sized; +} + +/// A generic IOMMU page table +pub trait IoPageTable: crate::private::Sealed { + #[doc(hidden)] + const FLUSH_OPS: bindings::iommu_flush_ops; + + #[doc(hidden)] + fn new_fmt<T: FlushOps>( + dev: &dyn device::RawDevice, + format: u32, + config: Config, + data: T::Data, + ) -> Result<IoPageTableInner> { + let ptr = data.into_foreign() as *mut _; + let guard = ScopeGuard::new(|| { + // SAFETY: `ptr` came from a previous call to `into_foreign`. + unsafe { T::Data::from_foreign(ptr) }; + }); + + let mut raw_cfg = bindings::io_pgtable_cfg { + quirks: config.quirks.try_into()?, + pgsize_bitmap: config.pgsize_bitmap.try_into()?, + ias: config.ias.try_into()?, + oas: config.oas.try_into()?, + coherent_walk: config.coherent_walk, + tlb: &Self::FLUSH_OPS, + iommu_dev: dev.raw_device(), + __bindgen_anon_1: unsafe { mem::zeroed() }, + }; + + let ops = unsafe { + bindings::alloc_io_pgtable_ops(format as bindings::io_pgtable_fmt, &mut raw_cfg, ptr) + }; + + if ops.is_null() { + return Err(EINVAL); + } + + guard.dismiss(); + Ok(IoPageTableInner { + ops, + cfg: raw_cfg, + data: ptr, + }) + } + + /// Map a range of pages. + fn map_pages( + &mut self, + iova: usize, + paddr: usize, + pgsize: usize, + pgcount: usize, + prot: u32, + ) -> Result<usize> { + let mut mapped: usize = 0; + + to_result(unsafe { + (*self.inner_mut().ops).map_pages.unwrap()( + self.inner_mut().ops, + iova as u64, + paddr as u64, + pgsize, + pgcount, + prot as i32, + bindings::GFP_KERNEL, + &mut mapped, + ) + })?; + + Ok(mapped) + } + + /// Unmap a range of pages. + fn unmap_pages( + &mut self, + iova: usize, + pgsize: usize, + pgcount: usize, + // TODO: gather: *mut iommu_iotlb_gather, + ) -> usize { + unsafe { + (*self.inner_mut().ops).unmap_pages.unwrap()( + self.inner_mut().ops, + iova as u64, + pgsize, + pgcount, + core::ptr::null_mut(), + ) + } + } + + /// Translate an IOVA to the corresponding physical address, if mapped. + fn iova_to_phys(&mut self, iova: usize) -> Option<NonZeroU64> { + NonZeroU64::new(unsafe { + (*self.inner_mut().ops).iova_to_phys.unwrap()(self.inner_mut().ops, iova as u64) + }) + } + + #[doc(hidden)] + fn inner_mut(&mut self) -> &mut IoPageTableInner; + + #[doc(hidden)] + fn inner(&self) -> &IoPageTableInner; + + #[doc(hidden)] + fn raw_cfg(&self) -> &bindings::io_pgtable_cfg { + &self.inner().cfg + } +} + +unsafe impl Send for IoPageTableInner {} +unsafe impl Sync for IoPageTableInner {} + +unsafe extern "C" fn tlb_flush_all_callback<T: FlushOps>(cookie: *mut core::ffi::c_void) { + T::tlb_flush_all(unsafe { T::Data::borrow(cookie) }); +} + +unsafe extern "C" fn tlb_flush_walk_callback<T: FlushOps>( + iova: core::ffi::c_ulong, + size: usize, + granule: usize, + cookie: *mut core::ffi::c_void, +) { + T::tlb_flush_walk( + unsafe { T::Data::borrow(cookie) }, + iova as usize, + size, + granule, + ); +} + +unsafe extern "C" fn tlb_add_page_callback<T: FlushOps>( + _gather: *mut bindings::iommu_iotlb_gather, + iova: core::ffi::c_ulong, + granule: usize, + cookie: *mut core::ffi::c_void, +) { + T::tlb_add_page(unsafe { T::Data::borrow(cookie) }, iova as usize, granule); +} + +macro_rules! iopt_cfg { + ($name:ident, $field:ident, $type:ident) => { + /// An IOMMU page table configuration for a specific kind of pagetable. + pub type $name = bindings::$type; + + impl GetConfig for $name { + fn cfg(iopt: &impl IoPageTable) -> &$name { + unsafe { &iopt.raw_cfg().__bindgen_anon_1.$field } + } + } + }; +} + +impl GetConfig for () { + fn cfg(_iopt: &impl IoPageTable) -> &() { + &() + } +} + +macro_rules! iopt_type { + ($type:ident, $cfg:ty, $fmt:ident) => { + /// Represents an IOPagetable of this type. + pub struct $type<T: FlushOps>(IoPageTableInner, PhantomData<T>); + + impl<T: FlushOps> $type<T> { + /// Creates a new IOPagetable implementation of this type. + pub fn new(dev: &dyn device::RawDevice, config: Config, data: T::Data) -> Result<Self> { + Ok(Self( + <Self as IoPageTable>::new_fmt::<T>(dev, bindings::$fmt, config, data)?, + PhantomData, + )) + } + + /// Get the configuration for this IOPagetable. + pub fn cfg(&self) -> &$cfg { + <$cfg as GetConfig>::cfg(self) + } + } + + impl<T: FlushOps> crate::private::Sealed for $type<T> {} + + impl<T: FlushOps> IoPageTable for $type<T> { + const FLUSH_OPS: bindings::iommu_flush_ops = bindings::iommu_flush_ops { + tlb_flush_all: Some(tlb_flush_all_callback::<T>), + tlb_flush_walk: Some(tlb_flush_walk_callback::<T>), + tlb_add_page: Some(tlb_add_page_callback::<T>), + }; + + fn inner(&self) -> &IoPageTableInner { + &self.0 + } + + fn inner_mut(&mut self) -> &mut IoPageTableInner { + &mut self.0 + } + } + + impl<T: FlushOps> Drop for $type<T> { + fn drop(&mut self) { + // SAFETY: The pointer is valid by the type invariant. + unsafe { bindings::free_io_pgtable_ops(self.0.ops) }; + + // Free context data. + // + // SAFETY: This matches the call to `into_foreign` from `new` in the success case. + unsafe { T::Data::from_foreign(self.0.data) }; + } + } + }; +} + +// Ew... +iopt_cfg!( + ARMLPAES1Cfg, + arm_lpae_s1_cfg, + io_pgtable_cfg__bindgen_ty_1__bindgen_ty_1 +); +iopt_cfg!( + ARMLPAES2Cfg, + arm_lpae_s2_cfg, + io_pgtable_cfg__bindgen_ty_1__bindgen_ty_2 +); +iopt_cfg!( + ARMv7SCfg, + arm_v7s_cfg, + io_pgtable_cfg__bindgen_ty_1__bindgen_ty_3 +); +iopt_cfg!( + ARMMaliLPAECfg, + arm_mali_lpae_cfg, + io_pgtable_cfg__bindgen_ty_1__bindgen_ty_4 +); +iopt_cfg!( + AppleDARTCfg, + apple_dart_cfg, + io_pgtable_cfg__bindgen_ty_1__bindgen_ty_5 +); +iopt_cfg!( + AppleUATCfg, + apple_uat_cfg, + io_pgtable_cfg__bindgen_ty_1__bindgen_ty_6 +); + +iopt_type!(ARM32LPAES1, ARMLPAES1Cfg, io_pgtable_fmt_ARM_32_LPAE_S1); +iopt_type!(ARM32LPAES2, ARMLPAES2Cfg, io_pgtable_fmt_ARM_32_LPAE_S2); +iopt_type!(ARM64LPAES1, ARMLPAES1Cfg, io_pgtable_fmt_ARM_64_LPAE_S1); +iopt_type!(ARM64LPAES2, ARMLPAES2Cfg, io_pgtable_fmt_ARM_64_LPAE_S2); +iopt_type!(ARMv7S, ARMv7SCfg, io_pgtable_fmt_ARM_V7S); +iopt_type!(ARMMaliLPAE, ARMMaliLPAECfg, io_pgtable_fmt_ARM_MALI_LPAE); +iopt_type!(AMDIOMMUV1, (), io_pgtable_fmt_AMD_IOMMU_V1); +iopt_type!(AppleDART, AppleDARTCfg, io_pgtable_fmt_APPLE_DART); +iopt_type!(AppleDART2, AppleDARTCfg, io_pgtable_fmt_APPLE_DART2); +iopt_type!(AppleUAT, AppleUATCfg, io_pgtable_fmt_APPLE_UAT); diff --git a/rust/kernel/ioctl.rs b/rust/kernel/ioctl.rs new file mode 100644 index 000000000000..6cd8e5738b91 --- /dev/null +++ b/rust/kernel/ioctl.rs @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +#![allow(non_snake_case)] + +//! ioctl() number definitions +//! +//! C header: [`include/asm-generic/ioctl.h`](../../../../include/asm-generic/ioctl.h) + +/// Build an ioctl number, analogous to the C macro of the same name. +const fn _IOC(dir: u32, ty: u32, nr: u32, size: usize) -> u32 { + core::assert!(dir <= bindings::_IOC_DIRMASK); + core::assert!(ty <= bindings::_IOC_TYPEMASK); + core::assert!(nr <= bindings::_IOC_NRMASK); + core::assert!(size <= (bindings::_IOC_SIZEMASK as usize)); + + (dir << bindings::_IOC_DIRSHIFT) + | (ty << bindings::_IOC_TYPESHIFT) + | (nr << bindings::_IOC_NRSHIFT) + | ((size as u32) << bindings::_IOC_SIZESHIFT) +} + +/// Build an ioctl number for an argumentless ioctl. +pub const fn _IO(ty: u32, nr: u32) -> u32 { + _IOC(bindings::_IOC_NONE, ty, nr, 0) +} + +/// Build an ioctl number for an read-only ioctl. +pub const fn _IOR<T>(ty: u32, nr: u32) -> u32 { + _IOC(bindings::_IOC_READ, ty, nr, core::mem::size_of::<T>()) +} + +/// Build an ioctl number for an write-only ioctl. +pub const fn _IOW<T>(ty: u32, nr: u32) -> u32 { + _IOC(bindings::_IOC_WRITE, ty, nr, core::mem::size_of::<T>()) +} + +/// Build an ioctl number for a read-write ioctl. +pub const fn _IOWR<T>(ty: u32, nr: u32) -> u32 { + _IOC( + bindings::_IOC_READ | bindings::_IOC_WRITE, + ty, + nr, + core::mem::size_of::<T>(), + ) +} + +/// Get the ioctl direction from an ioctl number. +pub const fn _IOC_DIR(nr: u32) -> u32 { + (nr >> bindings::_IOC_DIRSHIFT) & bindings::_IOC_DIRMASK +} + +/// Get the ioctl type from an ioctl number. +pub const fn _IOC_TYPE(nr: u32) -> u32 { + (nr >> bindings::_IOC_TYPESHIFT) & bindings::_IOC_TYPEMASK +} + +/// Get the ioctl number from an ioctl number. +pub const fn _IOC_NR(nr: u32) -> u32 { + (nr >> bindings::_IOC_NRSHIFT) & bindings::_IOC_NRMASK +} + +/// Get the ioctl size from an ioctl number. +pub const fn _IOC_SIZE(nr: u32) -> usize { + ((nr >> bindings::_IOC_SIZESHIFT) & bindings::_IOC_SIZEMASK) as usize +} diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs index 53040fa9e897..390f9f7b6f9c 100644 --- a/rust/kernel/lib.rs +++ b/rust/kernel/lib.rs @@ -13,7 +13,16 @@ #![no_std] #![feature(allocator_api)] -#![feature(core_ffi_c)] +#![feature(associated_type_defaults)] +#![feature(coerce_unsized)] +#![feature(const_mut_refs)] +#![feature(const_refs_to_cell)] +#![feature(const_trait_impl)] +#![feature(dispatch_from_dyn)] +#![feature(duration_constants)] +#![feature(new_uninit)] +#![feature(receiver_trait)] +#![feature(unsize)] // Ensure conditional compilation based on the kernel configuration works; // otherwise we may silently break things like initcall handling. @@ -23,15 +32,36 @@ compile_error!("Missing kernel configuration for conditional compilation"); #[cfg(not(test))] #[cfg(not(testlib))] mod allocator; + mod build_assert; +pub mod delay; +pub mod device; +#[cfg(CONFIG_DMA_SHARED_BUFFER)] +pub mod dma_fence; +pub mod driver; +#[cfg(CONFIG_DRM = "y")] +pub mod drm; pub mod error; +pub mod io_buffer; +pub mod io_mem; +pub mod io_pgtable; +pub mod ioctl; +pub mod module_param; +pub mod of; +pub mod platform; pub mod prelude; pub mod print; +pub mod revocable; +pub mod soc; mod static_assert; #[doc(hidden)] pub mod std_vendor; pub mod str; +pub mod sync; +pub mod time; pub mod types; +pub mod user_ptr; +pub mod xarray; #[doc(hidden)] pub use bindings; @@ -40,6 +70,16 @@ pub use macros; #[doc(hidden)] pub use build_error::build_error; +pub(crate) mod private { + #[allow(unreachable_pub)] + pub trait Sealed {} +} + +/// Page size defined in terms of the `PAGE_SHIFT` macro from C. +/// +/// [`PAGE_SHIFT`]: ../../../include/asm-generic/page.h +pub const PAGE_SIZE: usize = 1 << bindings::PAGE_SHIFT; + /// Prefix to appear before log messages printed from within the `kernel` crate. const __LOG_PREFIX: &[u8] = b"rust_kernel\0"; @@ -53,7 +93,7 @@ pub trait Module: Sized + Sync { /// should do. /// /// Equivalent to the `module_init` macro in the C API. - fn init(module: &'static ThisModule) -> error::Result<Self>; + fn init(name: &'static crate::str::CStr, module: &'static ThisModule) -> error::Result<Self>; } /// Equivalent to `THIS_MODULE` in the C API. @@ -73,6 +113,43 @@ impl ThisModule { pub const unsafe fn from_ptr(ptr: *mut bindings::module) -> ThisModule { ThisModule(ptr) } + + /// Locks the module parameters to access them. + /// + /// Returns a [`KParamGuard`] that will release the lock when dropped. + pub fn kernel_param_lock(&self) -> KParamGuard<'_> { + // SAFETY: `kernel_param_lock` will check if the pointer is null and + // use the built-in mutex in that case. + #[cfg(CONFIG_SYSFS)] + unsafe { + bindings::kernel_param_lock(self.0) + } + + KParamGuard { + #[cfg(CONFIG_SYSFS)] + this_module: self, + phantom: core::marker::PhantomData, + } + } +} + +/// Scoped lock on the kernel parameters of [`ThisModule`]. +/// +/// Lock will be released when this struct is dropped. +pub struct KParamGuard<'a> { + #[cfg(CONFIG_SYSFS)] + this_module: &'a ThisModule, + phantom: core::marker::PhantomData<&'a ()>, +} + +#[cfg(CONFIG_SYSFS)] +impl<'a> Drop for KParamGuard<'a> { + fn drop(&mut self) { + // SAFETY: `kernel_param_lock` will check if the pointer is null and + // use the built-in mutex in that case. The existence of `self` + // guarantees that the lock is held. + unsafe { bindings::kernel_param_unlock(self.this_module.0) } + } } #[cfg(not(any(testlib, test)))] @@ -85,3 +162,66 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! { // instead of `!`. See <https://github.com/rust-lang/rust-bindgen/issues/2094>. loop {} } + +/// Calculates the offset of a field from the beginning of the struct it belongs to. +/// +/// # Examples +/// +/// ``` +/// # use kernel::prelude::*; +/// # use kernel::offset_of; +/// struct Test { +/// a: u64, +/// b: u32, +/// } +/// +/// assert_eq!(offset_of!(Test, b), 8); +/// ``` +#[macro_export] +macro_rules! offset_of { + ($type:ty, $($f:tt)*) => {{ + let tmp = core::mem::MaybeUninit::<$type>::uninit(); + let outer = tmp.as_ptr(); + // To avoid warnings when nesting `unsafe` blocks. + #[allow(unused_unsafe)] + // SAFETY: The pointer is valid and aligned, just not initialised; `addr_of` ensures that + // we don't actually read from `outer` (which would be UB) nor create an intermediate + // reference. + let inner = unsafe { core::ptr::addr_of!((*outer).$($f)*) } as *const u8; + // To avoid warnings when nesting `unsafe` blocks. + #[allow(unused_unsafe)] + // SAFETY: The two pointers are within the same allocation block. + unsafe { inner.offset_from(outer as *const u8) } + }} +} + +/// Produces a pointer to an object from a pointer to one of its fields. +/// +/// # Safety +/// +/// Callers must ensure that the pointer to the field is in fact a pointer to the specified field, +/// as opposed to a pointer to another object of the same type. If this condition is not met, +/// any dereference of the resulting pointer is UB. +/// +/// # Examples +/// +/// ``` +/// # use kernel::container_of; +/// struct Test { +/// a: u64, +/// b: u32, +/// } +/// +/// let test = Test { a: 10, b: 20 }; +/// let b_ptr = &test.b; +/// let test_alias = container_of!(b_ptr, Test, b); +/// assert!(core::ptr::eq(&test, test_alias)); +/// ``` +#[macro_export] +macro_rules! container_of { + ($ptr:expr, $type:ty, $($f:tt)*) => {{ + let ptr = $ptr as *const _ as *const u8; + let offset = $crate::offset_of!($type, $($f)*); + ptr.wrapping_offset(-offset) as *const $type + }} +} diff --git a/rust/kernel/module_param.rs b/rust/kernel/module_param.rs new file mode 100644 index 000000000000..d587f1036349 --- /dev/null +++ b/rust/kernel/module_param.rs @@ -0,0 +1,501 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Types for module parameters. +//! +//! C header: [`include/linux/moduleparam.h`](../../../include/linux/moduleparam.h) + +use crate::error::{code::*, from_kernel_result}; +use crate::str::{CStr, Formatter}; +use core::fmt::Write; + +/// Types that can be used for module parameters. +/// +/// Note that displaying the type in `sysfs` will fail if +/// [`alloc::string::ToString::to_string`] (as implemented through the +/// [`core::fmt::Display`] trait) writes more than [`PAGE_SIZE`] +/// bytes (including an additional null terminator). +/// +/// [`PAGE_SIZE`]: `crate::PAGE_SIZE` +pub trait ModuleParam: core::fmt::Display + core::marker::Sized { + /// The `ModuleParam` will be used by the kernel module through this type. + /// + /// This may differ from `Self` if, for example, `Self` needs to track + /// ownership without exposing it or allocate extra space for other possible + /// parameter values. See [`StringParam`] or [`ArrayParam`] for examples. + type Value: ?Sized; + + /// Whether the parameter is allowed to be set without an argument. + /// + /// Setting this to `true` allows the parameter to be passed without an + /// argument (e.g. just `module.param` instead of `module.param=foo`). + const NOARG_ALLOWED: bool; + + /// Convert a parameter argument into the parameter value. + /// + /// `None` should be returned when parsing of the argument fails. + /// `arg == None` indicates that the parameter was passed without an + /// argument. If `NOARG_ALLOWED` is set to `false` then `arg` is guaranteed + /// to always be `Some(_)`. + /// + /// Parameters passed at boot time will be set before [`kmalloc`] is + /// available (even if the module is loaded at a later time). However, in + /// this case, the argument buffer will be valid for the entire lifetime of + /// the kernel. So implementations of this method which need to allocate + /// should first check that the allocator is available (with + /// [`crate::bindings::slab_is_available`]) and when it is not available + /// provide an alternative implementation which doesn't allocate. In cases + /// where the allocator is not available it is safe to save references to + /// `arg` in `Self`, but in other cases a copy should be made. + /// + /// [`kmalloc`]: ../../../include/linux/slab.h + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option<Self>; + + /// Get the current value of the parameter for use in the kernel module. + /// + /// This function should not be used directly. Instead use the wrapper + /// `read` which will be generated by [`macros::module`]. + fn value(&self) -> &Self::Value; + + /// Set the module parameter from a string. + /// + /// Used to set the parameter value when loading the module or when set + /// through `sysfs`. + /// + /// # Safety + /// + /// If `val` is non-null then it must point to a valid null-terminated + /// string. The `arg` field of `param` must be an instance of `Self`. + unsafe extern "C" fn set_param( + val: *const core::ffi::c_char, + param: *const crate::bindings::kernel_param, + ) -> core::ffi::c_int { + let arg = if val.is_null() { + None + } else { + Some(unsafe { CStr::from_char_ptr(val).as_bytes() }) + }; + match Self::try_from_param_arg(arg) { + Some(new_value) => { + let old_value = unsafe { (*param).__bindgen_anon_1.arg as *mut Self }; + let _ = unsafe { core::ptr::replace(old_value, new_value) }; + 0 + } + None => EINVAL.to_kernel_errno(), + } + } + + /// Write a string representation of the current parameter value to `buf`. + /// + /// Used for displaying the current parameter value in `sysfs`. + /// + /// # Safety + /// + /// `buf` must be a buffer of length at least `kernel::PAGE_SIZE` that is + /// writeable. The `arg` field of `param` must be an instance of `Self`. + unsafe extern "C" fn get_param( + buf: *mut core::ffi::c_char, + param: *const crate::bindings::kernel_param, + ) -> core::ffi::c_int { + from_kernel_result! { + // SAFETY: The C contracts guarantees that the buffer is at least `PAGE_SIZE` bytes. + let mut f = unsafe { Formatter::from_buffer(buf.cast(), crate::PAGE_SIZE) }; + unsafe { write!(f, "{}\0", *((*param).__bindgen_anon_1.arg as *mut Self)) }?; + Ok(f.bytes_written().try_into()?) + } + } + + /// Drop the parameter. + /// + /// Called when unloading a module. + /// + /// # Safety + /// + /// The `arg` field of `param` must be an instance of `Self`. + unsafe extern "C" fn free(arg: *mut core::ffi::c_void) { + unsafe { core::ptr::drop_in_place(arg as *mut Self) }; + } +} + +/// Trait for parsing integers. +/// +/// Strings beginning with `0x`, `0o`, or `0b` are parsed as hex, octal, or +/// binary respectively. Strings beginning with `0` otherwise are parsed as +/// octal. Anything else is parsed as decimal. A leading `+` or `-` is also +/// permitted. The string may contain a trailing newline. Any string parsed +/// by [`kstrtol()`] or [`kstrtoul()`] will be successfully parsed. +/// +/// [`kstrtol()`]: https://www.kernel.org/doc/html/latest/core-api/kernel-api.html#c.kstrtol +/// [`kstrtoul()`]: https://www.kernel.org/doc/html/latest/core-api/kernel-api.html#c.kstrtoul +trait ParseInt: Sized { + fn from_str_radix(src: &str, radix: u32) -> Result<Self, core::num::ParseIntError>; + fn checked_neg(self) -> Option<Self>; + + fn from_str_unsigned(src: &str) -> Result<Self, core::num::ParseIntError> { + let src = src.strip_suffix('\n').unwrap_or(src); + let (radix, digits) = if let Some(n) = src.strip_prefix("0x") { + (16, n) + } else if let Some(n) = src.strip_prefix("0X") { + (16, n) + } else if let Some(n) = src.strip_prefix("0o") { + (8, n) + } else if let Some(n) = src.strip_prefix("0O") { + (8, n) + } else if let Some(n) = src.strip_prefix("0b") { + (2, n) + } else if let Some(n) = src.strip_prefix("0B") { + (2, n) + } else if src.starts_with('0') { + (8, src) + } else { + (10, src) + }; + Self::from_str_radix(digits, radix) + } + + fn from_str(src: &str) -> Option<Self> { + match src.bytes().next() { + None => None, + Some(b'-') => Self::from_str_unsigned(&src[1..]).ok()?.checked_neg(), + Some(b'+') => Some(Self::from_str_unsigned(&src[1..]).ok()?), + Some(_) => Some(Self::from_str_unsigned(src).ok()?), + } + } +} + +macro_rules! impl_parse_int { + ($ty:ident) => { + impl ParseInt for $ty { + fn from_str_radix(src: &str, radix: u32) -> Result<Self, core::num::ParseIntError> { + $ty::from_str_radix(src, radix) + } + + fn checked_neg(self) -> Option<Self> { + self.checked_neg() + } + } + }; +} + +impl_parse_int!(i8); +impl_parse_int!(u8); +impl_parse_int!(i16); +impl_parse_int!(u16); +impl_parse_int!(i32); +impl_parse_int!(u32); +impl_parse_int!(i64); +impl_parse_int!(u64); +impl_parse_int!(isize); +impl_parse_int!(usize); + +macro_rules! impl_module_param { + ($ty:ident) => { + impl ModuleParam for $ty { + type Value = $ty; + + const NOARG_ALLOWED: bool = false; + + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option<Self> { + let bytes = arg?; + let utf8 = core::str::from_utf8(bytes).ok()?; + <$ty as crate::module_param::ParseInt>::from_str(utf8) + } + + fn value(&self) -> &Self::Value { + self + } + } + }; +} + +#[doc(hidden)] +#[macro_export] +/// Generate a static [`kernel_param_ops`](../../../include/linux/moduleparam.h) struct. +/// +/// # Examples +/// +/// ```ignore +/// make_param_ops!( +/// /// Documentation for new param ops. +/// PARAM_OPS_MYTYPE, // Name for the static. +/// MyType // A type which implements [`ModuleParam`]. +/// ); +/// ``` +macro_rules! make_param_ops { + ($ops:ident, $ty:ty) => { + $crate::make_param_ops!( + #[doc=""] + $ops, + $ty + ); + }; + ($(#[$meta:meta])* $ops:ident, $ty:ty) => { + $(#[$meta])* + /// + /// Static [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// struct generated by [`make_param_ops`]. + pub static $ops: $crate::bindings::kernel_param_ops = $crate::bindings::kernel_param_ops { + flags: if <$ty as $crate::module_param::ModuleParam>::NOARG_ALLOWED { + $crate::bindings::KERNEL_PARAM_OPS_FL_NOARG + } else { + 0 + }, + set: Some(<$ty as $crate::module_param::ModuleParam>::set_param), + get: Some(<$ty as $crate::module_param::ModuleParam>::get_param), + free: Some(<$ty as $crate::module_param::ModuleParam>::free), + }; + }; +} + +impl_module_param!(i8); +impl_module_param!(u8); +impl_module_param!(i16); +impl_module_param!(u16); +impl_module_param!(i32); +impl_module_param!(u32); +impl_module_param!(i64); +impl_module_param!(u64); +impl_module_param!(isize); +impl_module_param!(usize); + +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`i8`]. + PARAM_OPS_I8, + i8 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`u8`]. + PARAM_OPS_U8, + u8 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`i16`]. + PARAM_OPS_I16, + i16 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`u16`]. + PARAM_OPS_U16, + u16 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`i32`]. + PARAM_OPS_I32, + i32 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`u32`]. + PARAM_OPS_U32, + u32 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`i64`]. + PARAM_OPS_I64, + i64 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`u64`]. + PARAM_OPS_U64, + u64 +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`isize`]. + PARAM_OPS_ISIZE, + isize +); +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`usize`]. + PARAM_OPS_USIZE, + usize +); + +impl ModuleParam for bool { + type Value = bool; + + const NOARG_ALLOWED: bool = true; + + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option<Self> { + match arg { + None => Some(true), + Some(b"y") | Some(b"Y") | Some(b"1") | Some(b"true") => Some(true), + Some(b"n") | Some(b"N") | Some(b"0") | Some(b"false") => Some(false), + _ => None, + } + } + + fn value(&self) -> &Self::Value { + self + } +} + +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`bool`]. + PARAM_OPS_BOOL, + bool +); + +/// An array of at __most__ `N` values. +/// +/// # Invariant +/// +/// The first `self.used` elements of `self.values` are initialized. +pub struct ArrayParam<T, const N: usize> { + values: [core::mem::MaybeUninit<T>; N], + used: usize, +} + +impl<T, const N: usize> ArrayParam<T, { N }> { + fn values(&self) -> &[T] { + // SAFETY: The invariant maintained by `ArrayParam` allows us to cast + // the first `self.used` elements to `T`. + unsafe { + &*(&self.values[0..self.used] as *const [core::mem::MaybeUninit<T>] as *const [T]) + } + } +} + +impl<T: Copy, const N: usize> ArrayParam<T, { N }> { + const fn new() -> Self { + // INVARIANT: The first `self.used` elements of `self.values` are + // initialized. + ArrayParam { + values: [core::mem::MaybeUninit::uninit(); N], + used: 0, + } + } + + const fn push(&mut self, val: T) { + if self.used < N { + // INVARIANT: The first `self.used` elements of `self.values` are + // initialized. + self.values[self.used] = core::mem::MaybeUninit::new(val); + self.used += 1; + } + } + + /// Create an instance of `ArrayParam` initialized with `vals`. + /// + /// This function is only meant to be used in the [`module::module`] macro. + pub const fn create(vals: &[T]) -> Self { + let mut result = ArrayParam::new(); + let mut i = 0; + while i < vals.len() { + result.push(vals[i]); + i += 1; + } + result + } +} + +impl<T: core::fmt::Display, const N: usize> core::fmt::Display for ArrayParam<T, { N }> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + for val in self.values() { + write!(f, "{},", val)?; + } + Ok(()) + } +} + +impl<T: Copy + core::fmt::Display + ModuleParam, const N: usize> ModuleParam + for ArrayParam<T, { N }> +{ + type Value = [T]; + + const NOARG_ALLOWED: bool = false; + + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option<Self> { + arg.and_then(|args| { + let mut result = Self::new(); + for arg in args.split(|b| *b == b',') { + result.push(T::try_from_param_arg(Some(arg))?); + } + Some(result) + }) + } + + fn value(&self) -> &Self::Value { + self.values() + } +} + +/// A C-style string parameter. +/// +/// The Rust version of the [`charp`] parameter. This type is meant to be +/// used by the [`macros::module`] macro, not handled directly. Instead use the +/// `read` method generated by that macro. +/// +/// [`charp`]: ../../../include/linux/moduleparam.h +pub enum StringParam { + /// A borrowed parameter value. + /// + /// Either the default value (which is static in the module) or borrowed + /// from the original argument buffer used to set the value. + Ref(&'static [u8]), + + /// A value that was allocated when the parameter was set. + /// + /// The value needs to be freed when the parameter is reset or the module is + /// unloaded. + Owned(alloc::vec::Vec<u8>), +} + +impl StringParam { + fn bytes(&self) -> &[u8] { + match self { + #[allow(clippy::explicit_auto_deref)] + StringParam::Ref(bytes) => *bytes, + StringParam::Owned(vec) => &vec[..], + } + } +} + +impl core::fmt::Display for StringParam { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + let bytes = self.bytes(); + match core::str::from_utf8(bytes) { + Ok(utf8) => write!(f, "{}", utf8), + Err(_) => write!(f, "{:?}", bytes), + } + } +} + +impl ModuleParam for StringParam { + type Value = [u8]; + + const NOARG_ALLOWED: bool = false; + + fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option<Self> { + // SAFETY: It is always safe to call [`slab_is_available`](../../../include/linux/slab.h). + let slab_available = unsafe { crate::bindings::slab_is_available() }; + arg.and_then(|arg| { + if slab_available { + let mut vec = alloc::vec::Vec::new(); + vec.try_extend_from_slice(arg).ok()?; + Some(StringParam::Owned(vec)) + } else { + Some(StringParam::Ref(arg)) + } + }) + } + + fn value(&self) -> &Self::Value { + self.bytes() + } +} + +make_param_ops!( + /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h) + /// for [`StringParam`]. + PARAM_OPS_STR, + StringParam +); diff --git a/rust/kernel/of.rs b/rust/kernel/of.rs new file mode 100644 index 000000000000..a27621b57fbb --- /dev/null +++ b/rust/kernel/of.rs @@ -0,0 +1,546 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Devicetree and Open Firmware abstractions. +//! +//! C header: [`include/linux/of_*.h`](../../../../include/linux/of_*.h) + +// Note: Most OF functions turn into inline dummies with CONFIG_OF(_*) disabled. +// We have to either add config conditionals to helpers.c or here; let's do it +// here for now. In the future, once bindgen can auto-generate static inline +// helpers, this can go away if desired. + +use core::marker::PhantomData; +use core::num::NonZeroU32; + +use crate::{ + bindings, driver, + prelude::*, + str::{BStr, CStr}, +}; + +/// An open firmware device id. +#[derive(Clone, Copy)] +pub enum DeviceId { + /// An open firmware device id where only a compatible string is specified. + Compatible(&'static BStr), +} + +/// Defines a const open firmware device id table that also carries per-entry data/context/info. +/// +/// # Example +/// +/// ``` +/// # use kernel::{define_of_id_table, module_of_id_table, driver_of_id_table}; +/// use kernel::of; +/// +/// define_of_id_table! {MY_ID_TABLE, u32, [ +/// (of::DeviceId::Compatible(b"test-device1,test-device2"), Some(0xff)), +/// (of::DeviceId::Compatible(b"test-device3"), None), +/// ]}; +/// +/// module_of_id_table!(MOD_TABLE, ASAHI_ID_TABLE); +/// +/// // Within the `Driver` implementation: +/// driver_of_id_table!(MY_ID_TABLE); +/// ``` +#[macro_export] +macro_rules! define_of_id_table { + ($name:ident, $data_type:ty, $($t:tt)*) => { + $crate::define_id_array!($name, $crate::of::DeviceId, $data_type, $($t)*); + }; +} + +/// Convenience macro to declare which device ID table to use for a bus driver. +#[macro_export] +macro_rules! driver_of_id_table { + ($name:expr) => { + $crate::driver_id_table!( + OF_DEVICE_ID_TABLE, + $crate::of::DeviceId, + Self::IdInfo, + $name + ); + }; +} + +/// Declare a device ID table as a module-level table. This creates the necessary module alias +/// entries to enable module autoloading. +#[macro_export] +macro_rules! module_of_id_table { + ($item_name:ident, $table_name:ident) => { + $crate::module_id_table!($item_name, "of", $crate::of::DeviceId, $table_name); + }; +} + +// SAFETY: `ZERO` is all zeroed-out and `to_rawid` stores `offset` in `of_device_id::data`. +unsafe impl const driver::RawDeviceId for DeviceId { + type RawType = bindings::of_device_id; + const ZERO: Self::RawType = bindings::of_device_id { + name: [0; 32], + type_: [0; 32], + compatible: [0; 128], + data: core::ptr::null(), + }; + + fn to_rawid(&self, offset: isize) -> Self::RawType { + let DeviceId::Compatible(compatible) = self; + let mut id = Self::ZERO; + let mut i = 0; + while i < compatible.len() { + // If `compatible` does not fit in `id.compatible`, an "index out of bounds" build time + // error will be triggered. + id.compatible[i] = compatible[i] as _; + i += 1; + } + id.compatible[i] = b'\0' as _; + id.data = offset as _; + id + } +} + +/// Type alias for an OF phandle +pub type PHandle = bindings::phandle; + +/// An OF device tree node. +/// +/// # Invariants +/// +/// `raw_node` points to a valid OF node, and we hold a reference to it. +pub struct Node { + raw_node: *mut bindings::device_node, +} + +#[allow(dead_code)] +impl Node { + /// Creates a `Node` from a raw C pointer. The pointer must be owned (the caller + /// gives up its reference). If the pointer is NULL, returns None. + pub(crate) unsafe fn from_raw(raw_node: *mut bindings::device_node) -> Option<Node> { + if raw_node.is_null() { + None + } else { + // INVARIANT: `raw_node` is valid per the above contract, and non-null per the + // above check. + Some(Node { raw_node }) + } + } + + /// Creates a `Node` from a raw C pointer. The pointer must be borrowed (the caller + /// retains its reference, which must be valid for the duration of the call). If the + /// pointer is NULL, returns None. + pub(crate) unsafe fn get_from_raw(raw_node: *mut bindings::device_node) -> Option<Node> { + // SAFETY: `raw_node` is valid or NULL per the above contract. `of_node_get` can handle + // NULL. + unsafe { + #[cfg(CONFIG_OF_DYNAMIC)] + bindings::of_node_get(raw_node); + Node::from_raw(raw_node) + } + } + + /// Returns a reference to the underlying C `device_node` structure. + fn node(&self) -> &bindings::device_node { + // SAFETY: `raw_node` is valid per the type invariant. + unsafe { &*self.raw_node } + } + + /// Returns the name of the node. + pub fn name(&self) -> &CStr { + // SAFETY: The lifetime of the `CStr` is the same as the lifetime of this `Node`. + unsafe { CStr::from_char_ptr(self.node().name) } + } + + /// Returns the phandle for this node. + pub fn phandle(&self) -> PHandle { + self.node().phandle + } + + /// Returns the full name (with address) for this node. + pub fn full_name(&self) -> &CStr { + // SAFETY: The lifetime of the `CStr` is the same as the lifetime of this `Node`. + unsafe { CStr::from_char_ptr(self.node().full_name) } + } + + /// Returns `true` if the node is the root node. + pub fn is_root(&self) -> bool { + unsafe { bindings::of_node_is_root(self.raw_node) } + } + + /// Returns the parent node, if any. + pub fn parent(&self) -> Option<Node> { + #[cfg(not(CONFIG_OF))] + { + None + } + #[cfg(CONFIG_OF)] + // SAFETY: `raw_node` is valid per the type invariant, and `of_get_parent()` takes a + // new reference to the parent (or returns NULL). + unsafe { + Node::from_raw(bindings::of_get_parent(self.raw_node)) + } + } + + /// Returns an iterator over the node's children. + // TODO: use type alias for return type once type_alias_impl_trait is stable + pub fn children( + &self, + ) -> NodeIterator<'_, impl Fn(*mut bindings::device_node) -> *mut bindings::device_node + '_> + { + #[cfg(not(CONFIG_OF))] + { + NodeIterator::new(|_prev| core::ptr::null_mut()) + } + #[cfg(CONFIG_OF)] + // SAFETY: `raw_node` is valid per the type invariant, and the lifetime of the `NodeIterator` + // does not exceed the lifetime of the `Node` so it can borrow its reference. + NodeIterator::new(|prev| unsafe { bindings::of_get_next_child(self.raw_node, prev) }) + } + + /// Find a child by its name and return it, or None if not found. + #[allow(unused_variables)] + pub fn get_child_by_name(&self, name: &CStr) -> Option<Node> { + #[cfg(not(CONFIG_OF))] + { + None + } + #[cfg(CONFIG_OF)] + // SAFETY: `raw_node` is valid per the type invariant. + unsafe { + Node::from_raw(bindings::of_get_child_by_name( + self.raw_node, + name.as_char_ptr(), + )) + } + } + + /// Checks whether the node is compatible with the given compatible string. + /// + /// Returns `None` if there is no match, or `Some<NonZeroU32>` if there is, with the value + /// representing as match score (higher values for more specific compatible matches). + #[allow(unused_variables)] + pub fn is_compatible(&self, compatible: &CStr) -> Option<NonZeroU32> { + #[cfg(not(CONFIG_OF))] + let ret = 0; + #[cfg(CONFIG_OF)] + // SAFETY: `raw_node` is valid per the type invariant. + let ret = + unsafe { bindings::of_device_is_compatible(self.raw_node, compatible.as_char_ptr()) }; + + NonZeroU32::new(ret.try_into().ok()?) + } + + /// Parse a phandle property and return the Node referenced at a given index, if any. + /// + /// Used only for phandle properties with no arguments. + #[allow(unused_variables)] + pub fn parse_phandle(&self, name: &CStr, index: usize) -> Option<Node> { + #[cfg(not(CONFIG_OF))] + { + None + } + #[cfg(CONFIG_OF)] + // SAFETY: `raw_node` is valid per the type invariant. `of_parse_phandle` returns an + // owned reference. + unsafe { + Node::from_raw(bindings::of_parse_phandle( + self.raw_node, + name.as_char_ptr(), + index.try_into().ok()?, + )) + } + } + + #[allow(unused_variables)] + /// Look up a node property by name, returning a `Property` object if found. + pub fn find_property(&self, propname: &CStr) -> Option<Property<'_>> { + #[cfg(not(CONFIG_OF))] + { + None + } + #[cfg(CONFIG_OF)] + // SAFETY: `raw_node` is valid per the type invariant. The property structure + // returned borrows the reference to the owning node, and so has the same + // lifetime. + unsafe { + Property::from_raw(bindings::of_find_property( + self.raw_node, + propname.as_char_ptr(), + core::ptr::null_mut(), + )) + } + } + + /// Look up a mandatory node property by name, and decode it into a value type. + /// + /// Returns `Err(ENOENT)` if the property is not found. + /// + /// The type `T` must implement `TryFrom<Property<'_>>`. + pub fn get_property<'a, T: TryFrom<Property<'a>>>(&'a self, propname: &CStr) -> Result<T> + where + crate::error::Error: From<<T as TryFrom<Property<'a>>>::Error>, + { + Ok(self.find_property(propname).ok_or(ENOENT)?.try_into()?) + } + + /// Look up an optional node property by name, and decode it into a value type. + /// + /// Returns `Ok(None)` if the property is not found. + /// + /// The type `T` must implement `TryFrom<Property<'_>>`. + pub fn get_opt_property<'a, T: TryFrom<Property<'a>>>( + &'a self, + propname: &CStr, + ) -> Result<Option<T>> + where + crate::error::Error: From<<T as TryFrom<Property<'a>>>::Error>, + { + self.find_property(propname) + .map_or(Ok(None), |p| Ok(Some(p.try_into()?))) + } +} + +/// A property attached to a device tree `Node`. +/// +/// # Invariants +/// +/// `raw` must be valid and point to a property that outlives the lifetime of this object. +#[derive(Copy, Clone)] +pub struct Property<'a> { + raw: *mut bindings::property, + _p: PhantomData<&'a Node>, +} + +impl<'a> Property<'a> { + #[cfg(CONFIG_OF)] + /// Create a `Property` object from a raw C pointer. Returns `None` if NULL. + /// + /// The passed pointer must be valid and outlive the lifetime argument, or NULL. + unsafe fn from_raw(raw: *mut bindings::property) -> Option<Property<'a>> { + if raw.is_null() { + None + } else { + Some(Property { + raw, + _p: PhantomData, + }) + } + } + + /// Returns the name of the property as a `CStr`. + pub fn name(&self) -> &CStr { + // SAFETY: `raw` is valid per the type invariant, and the lifetime of the `CStr` does not + // outlive it. + unsafe { CStr::from_char_ptr((*self.raw).name) } + } + + /// Returns the name of the property as a `&[u8]`. + pub fn value(&self) -> &[u8] { + // SAFETY: `raw` is valid per the type invariant, and the lifetime of the slice does not + // outlive it. + unsafe { core::slice::from_raw_parts((*self.raw).value as *const u8, self.len()) } + } + + /// Returns the length of the property in bytes. + pub fn len(&self) -> usize { + // SAFETY: `raw` is valid per the type invariant. + unsafe { (*self.raw).length.try_into().unwrap() } + } + + /// Returns true if the property is empty (zero-length), which typically represents boolean true. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +/// A trait that represents a value decodable from a property with a fixed unit size. +/// +/// This allows us to auto-derive property decode implementations for `Vec<T: PropertyUnit>`. +pub trait PropertyUnit: Sized { + /// The size in bytes of a single data unit. + const UNIT_SIZE: usize; + + /// Decode this data unit from a byte slice. The passed slice will have a length of `UNIT_SIZE`. + fn from_bytes(data: &[u8]) -> Result<Self>; +} + +// This doesn't work... +// impl<'a, T: PropertyUnit> TryFrom<Property<'a>> for T { +// type Error = Error; +// +// fn try_from(p: Property<'_>) -> core::result::Result<T, Self::Error> { +// if p.value().len() != T::UNIT_SIZE { +// Err(EINVAL) +// } else { +// Ok(T::from_bytes(p.value())?) +// } +// } +// } + +impl<'a, T: PropertyUnit> TryFrom<Property<'a>> for Vec<T> { + type Error = Error; + + fn try_from(p: Property<'_>) -> core::result::Result<Vec<T>, Self::Error> { + if p.len() % T::UNIT_SIZE != 0 { + return Err(EINVAL); + } + + let mut v = Vec::new(); + let val = p.value(); + for off in (0..p.len()).step_by(T::UNIT_SIZE) { + v.try_push(T::from_bytes(&val[off..off + T::UNIT_SIZE])?)?; + } + Ok(v) + } +} + +macro_rules! prop_int_type ( + ($type:ty) => { + impl<'a> TryFrom<Property<'a>> for $type { + type Error = Error; + + fn try_from(p: Property<'_>) -> core::result::Result<$type, Self::Error> { + Ok(<$type>::from_be_bytes(p.value().try_into().or(Err(EINVAL))?)) + } + } + + impl PropertyUnit for $type { + const UNIT_SIZE: usize = <$type>::BITS as usize / 8; + + fn from_bytes(data: &[u8]) -> Result<Self> { + Ok(<$type>::from_be_bytes(data.try_into().or(Err(EINVAL))?)) + } + } + } +); + +prop_int_type!(u8); +prop_int_type!(u16); +prop_int_type!(u32); +prop_int_type!(u64); +prop_int_type!(i8); +prop_int_type!(i16); +prop_int_type!(i32); +prop_int_type!(i64); + +/// An iterator across a collection of Node objects. +/// +/// # Invariants +/// +/// `cur` must be NULL or a valid node owned reference. If NULL, it represents either the first +/// or last position of the iterator. +/// +/// If `done` is true, `cur` must be NULL. +/// +/// fn_next must be a callback that iterates from one node to the next, and it must not capture +/// values that exceed the lifetime of the iterator. It must return owned references and also +/// take owned references. +pub struct NodeIterator<'a, T> +where + T: Fn(*mut bindings::device_node) -> *mut bindings::device_node, +{ + cur: *mut bindings::device_node, + done: bool, + fn_next: T, + _p: PhantomData<&'a T>, +} + +impl<'a, T> NodeIterator<'a, T> +where + T: Fn(*mut bindings::device_node) -> *mut bindings::device_node, +{ + fn new(next: T) -> NodeIterator<'a, T> { + // INVARIANT: `cur` is initialized to NULL to represent the initial state. + NodeIterator { + cur: core::ptr::null_mut(), + done: false, + fn_next: next, + _p: PhantomData, + } + } +} + +impl<'a, T> Iterator for NodeIterator<'a, T> +where + T: Fn(*mut bindings::device_node) -> *mut bindings::device_node, +{ + type Item = Node; + + fn next(&mut self) -> Option<Self::Item> { + if self.done { + None + } else { + // INVARIANT: if the new `cur` is NULL, then the iterator has reached its end and we + // set `done` to `true`. + self.cur = (self.fn_next)(self.cur); + self.done = self.cur.is_null(); + // SAFETY: `fn_next` must return an owned reference per the iterator contract. + // The iterator itself is considered to own this reference, so we take another one. + unsafe { Node::get_from_raw(self.cur) } + } + } +} + +// Drop impl to ensure we drop the current node being iterated on, if any. +impl<'a, T> Drop for NodeIterator<'a, T> +where + T: Fn(*mut bindings::device_node) -> *mut bindings::device_node, +{ + fn drop(&mut self) { + // SAFETY: `cur` is valid or NULL, and `of_node_put()` can handle NULL. + #[cfg(CONFIG_OF_DYNAMIC)] + unsafe { + bindings::of_node_put(self.cur) + }; + } +} + +/// Returns the root node of the OF device tree (if any). +pub fn root() -> Option<Node> { + unsafe { Node::get_from_raw(bindings::of_root) } +} + +/// Returns the /chosen node of the OF device tree (if any). +pub fn chosen() -> Option<Node> { + unsafe { Node::get_from_raw(bindings::of_chosen) } +} + +/// Returns the /aliases node of the OF device tree (if any). +pub fn aliases() -> Option<Node> { + unsafe { Node::get_from_raw(bindings::of_aliases) } +} + +/// Returns the system stdout node of the OF device tree (if any). +pub fn stdout() -> Option<Node> { + unsafe { Node::get_from_raw(bindings::of_stdout) } +} + +#[allow(unused_variables)] +/// Looks up a node in the device tree by phandle. +pub fn find_node_by_phandle(handle: PHandle) -> Option<Node> { + #[cfg(not(CONFIG_OF))] + { + None + } + #[cfg(CONFIG_OF)] + unsafe { + #[allow(dead_code)] + Node::from_raw(bindings::of_find_node_by_phandle(handle)) + } +} + +impl Clone for Node { + fn clone(&self) -> Node { + // SAFETY: `raw_node` is valid and non-NULL per the type invariant, + // so this can never return None. + unsafe { Node::get_from_raw(self.raw_node).unwrap() } + } +} + +impl Drop for Node { + fn drop(&mut self) { + #[cfg(CONFIG_OF_DYNAMIC)] + // SAFETY: `raw_node` is valid per the type invariant. + unsafe { + bindings::of_node_put(self.raw_node) + }; + } +} diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs new file mode 100644 index 000000000000..542865da17a1 --- /dev/null +++ b/rust/kernel/platform.rs @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Platform devices and drivers. +//! +//! Also called `platdev`, `pdev`. +//! +//! C header: [`include/linux/platform_device.h`](../../../../include/linux/platform_device.h) + +use crate::{ + bindings, + device::{self, RawDevice}, + driver, + error::{code::*, from_kernel_result, to_result, Result}, + io_mem::{IoMem, IoResource, Resource}, + of, + str::CStr, + types::ForeignOwnable, + ThisModule, +}; + +/// A registration of a platform driver. +pub type Registration<T> = driver::Registration<Adapter<T>>; + +/// An adapter for the registration of platform drivers. +pub struct Adapter<T: Driver>(T); + +impl<T: Driver> driver::DriverOps for Adapter<T> { + type RegType = bindings::platform_driver; + + unsafe fn register( + reg: *mut bindings::platform_driver, + name: &'static CStr, + module: &'static ThisModule, + ) -> Result { + // SAFETY: By the safety requirements of this function (defined in the trait definition), + // `reg` is non-null and valid. + let pdrv = unsafe { &mut *reg }; + + pdrv.driver.name = name.as_char_ptr(); + pdrv.probe = Some(Self::probe_callback); + pdrv.remove = Some(Self::remove_callback); + if let Some(t) = T::OF_DEVICE_ID_TABLE { + pdrv.driver.of_match_table = t.as_ref(); + } + // SAFETY: + // - `pdrv` lives at least until the call to `platform_driver_unregister()` returns. + // - `name` pointer has static lifetime. + // - `module.0` lives at least as long as the module. + // - `probe()` and `remove()` are static functions. + // - `of_match_table` is either a raw pointer with static lifetime, + // as guaranteed by the [`driver::IdTable`] type, or null. + to_result(unsafe { bindings::__platform_driver_register(reg, module.0) }) + } + + unsafe fn unregister(reg: *mut bindings::platform_driver) { + // SAFETY: By the safety requirements of this function (defined in the trait definition), + // `reg` was passed (and updated) by a previous successful call to + // `platform_driver_register`. + unsafe { bindings::platform_driver_unregister(reg) }; + } +} + +impl<T: Driver> Adapter<T> { + fn get_id_info(dev: &Device) -> Option<&'static T::IdInfo> { + let table = T::OF_DEVICE_ID_TABLE?; + + // SAFETY: `table` has static lifetime, so it is valid for read. `dev` is guaranteed to be + // valid while it's alive, so is the raw device returned by it. + let id = unsafe { bindings::of_match_device(table.as_ref(), dev.raw_device()) }; + if id.is_null() { + return None; + } + + // SAFETY: `id` is a pointer within the static table, so it's always valid. + let offset = unsafe { (*id).data }; + if offset.is_null() { + return None; + } + + // SAFETY: The offset comes from a previous call to `offset_from` in `IdArray::new`, which + // guarantees that the resulting pointer is within the table. + let ptr = unsafe { + id.cast::<u8>() + .offset(offset as _) + .cast::<Option<T::IdInfo>>() + }; + + // SAFETY: The id table has a static lifetime, so `ptr` is guaranteed to be valid for read. + #[allow(clippy::needless_borrow)] + unsafe { + (&*ptr).as_ref() + } + } + + extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> core::ffi::c_int { + from_kernel_result! { + // SAFETY: `pdev` is valid by the contract with the C code. `dev` is alive only for the + // duration of this call, so it is guaranteed to remain alive for the lifetime of + // `pdev`. + let mut dev = unsafe { Device::from_ptr(pdev) }; + let info = Self::get_id_info(&dev); + let data = T::probe(&mut dev, info)?; + // SAFETY: `pdev` is guaranteed to be a valid, non-null pointer. + unsafe { bindings::platform_set_drvdata(pdev, data.into_foreign() as _) }; + Ok(0) + } + } + + extern "C" fn remove_callback(pdev: *mut bindings::platform_device) -> core::ffi::c_int { + from_kernel_result! { + // SAFETY: `pdev` is guaranteed to be a valid, non-null pointer. + let ptr = unsafe { bindings::platform_get_drvdata(pdev) }; + // SAFETY: + // - we allocated this pointer using `T::Data::into_foreign`, + // so it is safe to turn back into a `T::Data`. + // - the allocation happened in `probe`, no-one freed the memory, + // `remove` is the canonical kernel location to free driver data. so OK + // to convert the pointer back to a Rust structure here. + let data = unsafe { T::Data::from_foreign(ptr) }; + let ret = T::remove(&data); + <T::Data as driver::DeviceRemoval>::device_remove(&data); + ret?; + Ok(0) + } + } +} + +/// A platform driver. +pub trait Driver { + /// Data stored on device by driver. + /// + /// Corresponds to the data set or retrieved via the kernel's + /// `platform_{set,get}_drvdata()` functions. + /// + /// Require that `Data` implements `ForeignOwnable`. We guarantee to + /// never move the underlying wrapped data structure. This allows + type Data: ForeignOwnable + Send + Sync + driver::DeviceRemoval = (); + + /// The type holding information about each device id supported by the driver. + type IdInfo: 'static = (); + + /// The table of device ids supported by the driver. + const OF_DEVICE_ID_TABLE: Option<driver::IdTable<'static, of::DeviceId, Self::IdInfo>> = None; + + /// Platform driver probe. + /// + /// Called when a new platform device is added or discovered. + /// Implementers should attempt to initialize the device here. + fn probe(dev: &mut Device, id_info: Option<&Self::IdInfo>) -> Result<Self::Data>; + + /// Platform driver remove. + /// + /// Called when a platform device is removed. + /// Implementers should prepare the device for complete removal here. + fn remove(_data: &Self::Data) -> Result { + Ok(()) + } +} + +/// A platform device. +/// +/// # Invariants +/// +/// The field `ptr` is non-null and valid for the lifetime of the object. +pub struct Device { + ptr: *mut bindings::platform_device, + used_resource: u64, +} + +impl Device { + /// Creates a new device from the given pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null and valid. It must remain valid for the lifetime of the returned + /// instance. + unsafe fn from_ptr(ptr: *mut bindings::platform_device) -> Self { + // INVARIANT: The safety requirements of the function ensure the lifetime invariant. + Self { + ptr, + used_resource: 0, + } + } + + /// Returns id of the platform device. + pub fn id(&self) -> i32 { + // SAFETY: By the type invariants, we know that `self.ptr` is non-null and valid. + unsafe { (*self.ptr).id } + } + + /// Sets the DMA masks (normal and coherent) for a platform device. + pub fn set_dma_masks(&mut self, mask: u64) -> Result { + to_result(unsafe { bindings::dma_set_mask_and_coherent(&mut (*self.ptr).dev, mask) }) + } + + /// Gets a system resources of a platform device. + pub fn get_resource(&mut self, rtype: IoResource, num: usize) -> Result<Resource> { + // SAFETY: `self.ptr` is valid by the type invariant. + let res = unsafe { bindings::platform_get_resource(self.ptr, rtype as _, num as _) }; + if res.is_null() { + return Err(EINVAL); + } + + // Get the position of the found resource in the array. + // SAFETY: + // - `self.ptr` is valid by the type invariant. + // - `res` is a displaced pointer to one of the array's elements, + // and `resource` is its base pointer. + let index = unsafe { res.offset_from((*self.ptr).resource) } as usize; + + // Make sure that the index does not exceed the 64-bit mask. + assert!(index < 64); + + if self.used_resource >> index & 1 == 1 { + return Err(EBUSY); + } + self.used_resource |= 1 << index; + + // SAFETY: The pointer `res` is returned from `bindings::platform_get_resource` + // above and checked if it is not a NULL. + unsafe { Resource::new((*res).start, (*res).end, (*res).flags) }.ok_or(EINVAL) + } + + /// Ioremaps resources of a platform device. + /// + /// # Safety + /// + /// Callers must ensure that either (a) the resulting interface cannot be used to initiate DMA + /// operations, or (b) that DMA operations initiated via the returned interface use DMA handles + /// allocated through the `dma` module. + pub unsafe fn ioremap_resource<const SIZE: usize>( + &mut self, + index: usize, + ) -> Result<IoMem<SIZE>> { + let mask = self.used_resource; + let res = self.get_resource(IoResource::Mem, index)?; + + // SAFETY: Valid by the safety contract. + let iomem = unsafe { IoMem::<SIZE>::try_new(res) }; + // If remapping fails, the given resource won't be used, so restore the old mask. + if iomem.is_err() { + self.used_resource = mask; + } + iomem + } +} + +// SAFETY: The device returned by `raw_device` is the raw platform device. +unsafe impl device::RawDevice for Device { + fn raw_device(&self) -> *mut bindings::device { + // SAFETY: By the type invariants, we know that `self.ptr` is non-null and valid. + unsafe { &mut (*self.ptr).dev } + } +} + +/// Declares a kernel module that exposes a single platform driver. +/// +/// # Examples +/// +/// ```ignore +/// # use kernel::{platform, define_of_id_table, module_platform_driver}; +/// # +/// struct MyDriver; +/// impl platform::Driver for MyDriver { +/// // [...] +/// # fn probe(_dev: &mut platform::Device, _id_info: Option<&Self::IdInfo>) -> Result { +/// # Ok(()) +/// # } +/// # define_of_id_table! {(), [ +/// # (of::DeviceId::Compatible(b"brcm,bcm2835-rng"), None), +/// # ]} +/// } +/// +/// module_platform_driver! { +/// type: MyDriver, +/// name: "module_name", +/// author: "Author name", +/// license: "GPL", +/// } +/// ``` +#[macro_export] +macro_rules! module_platform_driver { + ($($f:tt)*) => { + $crate::module_driver!(<T>, $crate::platform::Adapter<T>, { $($f)* }); + }; +} diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs index 7a90249ee9b9..f145a09603a5 100644 --- a/rust/kernel/prelude.rs +++ b/rust/kernel/prelude.rs @@ -11,15 +11,23 @@ //! use kernel::prelude::*; //! ``` +#[doc(no_inline)] pub use core::pin::Pin; +#[doc(no_inline)] pub use alloc::{boxed::Box, vec::Vec}; +#[doc(no_inline)] pub use macros::{module, vtable}; pub use super::build_assert; -pub use super::{dbg, pr_alert, pr_crit, pr_debug, pr_emerg, pr_err, pr_info, pr_notice, pr_warn}; +// `super::std_vendor` is hidden, which makes the macro inline for some reason. +#[doc(no_inline)] +pub use super::dbg; +pub use super::fmt; +pub use super::{dev_alert, dev_crit, dev_dbg, dev_emerg, dev_err, dev_info, dev_notice, dev_warn}; +pub use super::{pr_alert, pr_crit, pr_debug, pr_emerg, pr_err, pr_info, pr_notice, pr_warn}; pub use super::static_assert; diff --git a/rust/kernel/revocable.rs b/rust/kernel/revocable.rs new file mode 100644 index 000000000000..1093c4d26026 --- /dev/null +++ b/rust/kernel/revocable.rs @@ -0,0 +1,425 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Revocable objects. +//! +//! The [`Revocable`] type wraps other types and allows access to them to be revoked. The existence +//! of a [`RevocableGuard`] ensures that objects remain valid. + +use crate::{bindings, sync::rcu}; +use core::{ + cell::UnsafeCell, + marker::PhantomData, + mem::MaybeUninit, + ops::Deref, + ptr::drop_in_place, + sync::atomic::{fence, AtomicBool, AtomicU32, Ordering}, +}; + +/// An object that can become inaccessible at runtime. +/// +/// Once access is revoked and all concurrent users complete (i.e., all existing instances of +/// [`RevocableGuard`] are dropped), the wrapped object is also dropped. +/// +/// # Examples +/// +/// ``` +/// # use kernel::revocable::Revocable; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// fn add_two(v: &Revocable<Example>) -> Option<u32> { +/// let guard = v.try_access()?; +/// Some(guard.a + guard.b) +/// } +/// +/// let v = Revocable::new(Example { a: 10, b: 20 }); +/// assert_eq!(add_two(&v), Some(30)); +/// v.revoke(); +/// assert_eq!(add_two(&v), None); +/// ``` +/// +/// Sample example as above, but explicitly using the rcu read side lock. +/// +/// ``` +/// # use kernel::revocable::Revocable; +/// use kernel::sync::rcu; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// fn add_two(v: &Revocable<Example>) -> Option<u32> { +/// let guard = rcu::read_lock(); +/// let e = v.try_access_with_guard(&guard)?; +/// Some(e.a + e.b) +/// } +/// +/// let v = Revocable::new(Example { a: 10, b: 20 }); +/// assert_eq!(add_two(&v), Some(30)); +/// v.revoke(); +/// assert_eq!(add_two(&v), None); +/// ``` +pub struct Revocable<T> { + is_available: AtomicBool, + data: MaybeUninit<UnsafeCell<T>>, +} + +// SAFETY: `Revocable` is `Send` if the wrapped object is also `Send`. This is because while the +// functionality exposed by `Revocable` can be accessed from any thread/CPU, it is possible that +// this isn't supported by the wrapped object. +unsafe impl<T: Send> Send for Revocable<T> {} + +// SAFETY: `Revocable` is `Sync` if the wrapped object is both `Send` and `Sync`. We require `Send` +// from the wrapped object as well because of `Revocable::revoke`, which can trigger the `Drop` +// implementation of the wrapped object from an arbitrary thread. +unsafe impl<T: Sync + Send> Sync for Revocable<T> {} + +impl<T> Revocable<T> { + /// Creates a new revocable instance of the given data. + pub const fn new(data: T) -> Self { + Self { + is_available: AtomicBool::new(true), + data: MaybeUninit::new(UnsafeCell::new(data)), + } + } + + /// Tries to access the \[revocable\] wrapped object. + /// + /// Returns `None` if the object has been revoked and is therefore no longer accessible. + /// + /// Returns a guard that gives access to the object otherwise; the object is guaranteed to + /// remain accessible while the guard is alive. In such cases, callers are not allowed to sleep + /// because another CPU may be waiting to complete the revocation of this object. + pub fn try_access(&self) -> Option<RevocableGuard<'_, T>> { + let guard = rcu::read_lock(); + if self.is_available.load(Ordering::Relaxed) { + // SAFETY: Since `self.is_available` is true, data is initialised and has to remain + // valid because the RCU read side lock prevents it from being dropped. + Some(unsafe { RevocableGuard::new(self.data.assume_init_ref().get(), guard) }) + } else { + None + } + } + + /// Tries to access the \[revocable\] wrapped object. + /// + /// Returns `None` if the object has been revoked and is therefore no longer accessible. + /// + /// Returns a shared reference to the object otherwise; the object is guaranteed to + /// remain accessible while the rcu read side guard is alive. In such cases, callers are not + /// allowed to sleep because another CPU may be waiting to complete the revocation of this + /// object. + pub fn try_access_with_guard<'a>(&'a self, _guard: &'a rcu::Guard) -> Option<&'a T> { + if self.is_available.load(Ordering::Relaxed) { + // SAFETY: Since `self.is_available` is true, data is initialised and has to remain + // valid because the RCU read side lock prevents it from being dropped. + Some(unsafe { &*self.data.assume_init_ref().get() }) + } else { + None + } + } + + /// Revokes access to and drops the wrapped object. + /// + /// Access to the object is revoked immediately to new callers of [`Revocable::try_access`]. If + /// there are concurrent users of the object (i.e., ones that called [`Revocable::try_access`] + /// beforehand and still haven't dropped the returned guard), this function waits for the + /// concurrent access to complete before dropping the wrapped object. + pub fn revoke(&self) { + if self + .is_available + .compare_exchange(true, false, Ordering::Relaxed, Ordering::Relaxed) + .is_ok() + { + // SAFETY: Just an FFI call, there are no further requirements. + unsafe { bindings::synchronize_rcu() }; + + // SAFETY: We know `self.data` is valid because only one CPU can succeed the + // `compare_exchange` above that takes `is_available` from `true` to `false`. + unsafe { drop_in_place(self.data.assume_init_ref().get()) }; + } + } +} + +impl<T> Drop for Revocable<T> { + fn drop(&mut self) { + // Drop only if the data hasn't been revoked yet (in which case it has already been + // dropped). + if *self.is_available.get_mut() { + // SAFETY: We know `self.data` is valid because no other CPU has changed + // `is_available` to `false` yet, and no other CPU can do it anymore because this CPU + // holds the only reference (mutable) to `self` now. + unsafe { drop_in_place(self.data.assume_init_ref().get()) }; + } + } +} + +/// A guard that allows access to a revocable object and keeps it alive. +/// +/// CPUs may not sleep while holding on to [`RevocableGuard`] because it's in atomic context +/// holding the RCU read-side lock. +/// +/// # Invariants +/// +/// The RCU read-side lock is held while the guard is alive. +pub struct RevocableGuard<'a, T> { + data_ref: *const T, + _rcu_guard: rcu::Guard, + _p: PhantomData<&'a ()>, +} + +impl<T> RevocableGuard<'_, T> { + fn new(data_ref: *const T, rcu_guard: rcu::Guard) -> Self { + Self { + data_ref, + _rcu_guard: rcu_guard, + _p: PhantomData, + } + } +} + +impl<T> Deref for RevocableGuard<'_, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: By the type invariants, we hold the rcu read-side lock, so the object is + // guaranteed to remain valid. + unsafe { &*self.data_ref } + } +} + +/// An object that can become inaccessible at runtime. +/// +/// Once access is revoked and all concurrent users complete (i.e., all existing instances of +/// [`AsyncRevocableGuard`] are dropped), the wrapped object is also dropped. +/// +/// Unlike [`Revocable`], [`AsyncRevocable`] does not wait for concurrent users of the wrapped +/// object to finish before [`AsyncRevocable::revoke`] completes -- thus the async qualifier. This +/// has the advantage of not requiring RCU locks or waits of any kind. +/// +/// # Examples +/// +/// ``` +/// # use kernel::revocable::AsyncRevocable; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// fn add_two(v: &AsyncRevocable<Example>) -> Option<u32> { +/// let guard = v.try_access()?; +/// Some(guard.a + guard.b) +/// } +/// +/// let v = AsyncRevocable::new(Example { a: 10, b: 20 }); +/// assert_eq!(add_two(&v), Some(30)); +/// v.revoke(); +/// assert_eq!(add_two(&v), None); +/// ``` +/// +/// Example where revocation happens while there is a user: +/// +/// ``` +/// # use kernel::revocable::AsyncRevocable; +/// use core::sync::atomic::{AtomicBool, Ordering}; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// static DROPPED: AtomicBool = AtomicBool::new(false); +/// +/// impl Drop for Example { +/// fn drop(&mut self) { +/// DROPPED.store(true, Ordering::Relaxed); +/// } +/// } +/// +/// fn add_two(v: &AsyncRevocable<Example>) -> Option<u32> { +/// let guard = v.try_access()?; +/// Some(guard.a + guard.b) +/// } +/// +/// let v = AsyncRevocable::new(Example { a: 10, b: 20 }); +/// assert_eq!(add_two(&v), Some(30)); +/// +/// let guard = v.try_access().unwrap(); +/// assert!(!v.is_revoked()); +/// assert!(!DROPPED.load(Ordering::Relaxed)); +/// v.revoke(); +/// assert!(!DROPPED.load(Ordering::Relaxed)); +/// assert!(v.is_revoked()); +/// assert!(v.try_access().is_none()); +/// assert_eq!(guard.a + guard.b, 30); +/// drop(guard); +/// assert!(DROPPED.load(Ordering::Relaxed)); +/// ``` +pub struct AsyncRevocable<T> { + usage_count: AtomicU32, + data: MaybeUninit<UnsafeCell<T>>, +} + +// SAFETY: `AsyncRevocable` is `Send` if the wrapped object is also `Send`. This is because while +// the functionality exposed by `AsyncRevocable` can be accessed from any thread/CPU, it is +// possible that this isn't supported by the wrapped object. +unsafe impl<T: Send> Send for AsyncRevocable<T> {} + +// SAFETY: `AsyncRevocable` is `Sync` if the wrapped object is both `Send` and `Sync`. We require +// `Send` from the wrapped object as well because of `AsyncRevocable::revoke`, which can trigger +// the `Drop` implementation of the wrapped object from an arbitrary thread. +unsafe impl<T: Sync + Send> Sync for AsyncRevocable<T> {} + +const REVOKED: u32 = 0x80000000; +const COUNT_MASK: u32 = !REVOKED; +const SATURATED_COUNT: u32 = REVOKED - 1; + +impl<T> AsyncRevocable<T> { + /// Creates a new asynchronously revocable instance of the given data. + pub fn new(data: T) -> Self { + Self { + usage_count: AtomicU32::new(0), + data: MaybeUninit::new(UnsafeCell::new(data)), + } + } + + /// Tries to access the \[revocable\] wrapped object. + /// + /// Returns `None` if the object has been revoked and is therefore no longer accessible. + /// + /// Returns a guard that gives access to the object otherwise; the object is guaranteed to + /// remain accessible while the guard is alive. + pub fn try_access(&self) -> Option<AsyncRevocableGuard<'_, T>> { + loop { + let count = self.usage_count.load(Ordering::Relaxed); + + // Fail attempt to access if the object is already revoked. + if count & REVOKED != 0 { + return None; + } + + // No need to increment if the count is saturated. + if count == SATURATED_COUNT + || self + .usage_count + .compare_exchange(count, count + 1, Ordering::Relaxed, Ordering::Relaxed) + .is_ok() + { + return Some(AsyncRevocableGuard { revocable: self }); + } + } + } + + /// Revokes access to the protected object. + /// + /// Returns `true` if access has been revoked, or `false` when the object has already been + /// revoked by a previous call to [`AsyncRevocable::revoke`]. + /// + /// This call is non-blocking, that is, no new users of the revocable object will be allowed, + /// but potential current users are able to continue to use it and the thread won't wait for + /// them to finish. In such cases, the object will be dropped when the last user completes. + pub fn revoke(&self) -> bool { + // Set the `REVOKED` bit. + // + // The acquire barrier matches up with the release when decrementing the usage count. + let prev = self.usage_count.fetch_or(REVOKED, Ordering::Acquire); + if prev & REVOKED != 0 { + // Another thread already revoked this object. + return false; + } + + if prev == 0 { + // SAFETY: This thread just revoked the object and the usage count is zero, so the + // object is valid and there will be no future users. + unsafe { drop_in_place(UnsafeCell::raw_get(self.data.as_ptr())) }; + } + + true + } + + /// Returns whether access to the object has been revoked. + pub fn is_revoked(&self) -> bool { + self.usage_count.load(Ordering::Relaxed) & REVOKED != 0 + } +} + +impl<T> Drop for AsyncRevocable<T> { + fn drop(&mut self) { + let count = *self.usage_count.get_mut(); + if count != REVOKED { + // The object hasn't been dropped yet, so we do it now. + + // This matches with the release when decrementing the usage count. + fence(Ordering::Acquire); + + // SAFETY: Since `count` is does not indicate a count of 0 and the REVOKED bit set, the + // object is still valid. + unsafe { drop_in_place(UnsafeCell::raw_get(self.data.as_ptr())) }; + } + } +} + +/// A guard that allows access to a revocable object and keeps it alive. +/// +/// # Invariants +/// +/// The owner owns an increment on the usage count (which may have saturated it), which keeps the +/// revocable object alive. +pub struct AsyncRevocableGuard<'a, T> { + revocable: &'a AsyncRevocable<T>, +} + +impl<T> Deref for AsyncRevocableGuard<'_, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: The type invariants guarantee that the caller owns an increment. + unsafe { &*self.revocable.data.assume_init_ref().get() } + } +} + +impl<T> Drop for AsyncRevocableGuard<'_, T> { + fn drop(&mut self) { + loop { + let count = self.revocable.usage_count.load(Ordering::Relaxed); + let actual_count = count & COUNT_MASK; + if actual_count == SATURATED_COUNT { + // The count is saturated, so we won't decrement (nor do we drop the object). + return; + } + + if actual_count == 0 { + // Trying to underflow the count. + panic!("actual_count is zero"); + } + + // On success, we use release ordering, which matches with the acquire in one of the + // places where we drop the object, namely: below, in `AsyncRevocable::revoke`, or in + // `AsyncRevocable::drop`. + if self + .revocable + .usage_count + .compare_exchange(count, count - 1, Ordering::Release, Ordering::Relaxed) + .is_ok() + { + if count == 1 | REVOKED { + // `count` is now zero and it is revoked, so free it now. + + // This matches with the release above (which may have happened in other + // threads concurrently). + fence(Ordering::Acquire); + + // SAFETY: Since `count` was 1, the object is still alive. + unsafe { drop_in_place(UnsafeCell::raw_get(self.revocable.data.as_ptr())) }; + } + + return; + } + } + } +} diff --git a/rust/kernel/soc/apple/mod.rs b/rust/kernel/soc/apple/mod.rs new file mode 100644 index 000000000000..dd69db63677d --- /dev/null +++ b/rust/kernel/soc/apple/mod.rs @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Apple SoC drivers + +#[cfg(CONFIG_APPLE_RTKIT = "y")] +pub mod rtkit; diff --git a/rust/kernel/soc/apple/rtkit.rs b/rust/kernel/soc/apple/rtkit.rs new file mode 100644 index 000000000000..bde63cd00193 --- /dev/null +++ b/rust/kernel/soc/apple/rtkit.rs @@ -0,0 +1,277 @@ +// SPDX-License-Identifier: GPL-2.0-only OR MIT + +//! Support for Apple RTKit coprocessors. +//! +//! C header: [`include/linux/soc/apple/rtkit.h`](../../../../include/linux/gpio/driver.h) + +use crate::{ + bindings, device, + error::{code::*, from_kernel_err_ptr, to_result, Error, Result}, + str::CStr, + types::{ForeignOwnable, ScopeGuard}, +}; + +use alloc::boxed::Box; +use core::marker::PhantomData; +use core::ptr; +use macros::vtable; + +/// Trait to represent allocatable buffers for the RTKit core. +/// +/// Users must implement this trait for their own representation of those allocations. +pub trait Buffer { + /// Returns the IOVA (virtual address) of the buffer from RTKit's point of view, or an error if + /// unavailable. + fn iova(&self) -> Result<usize>; + + /// Returns a mutable byte slice of the buffer contents, or an + /// error if unavailable. + fn buf(&mut self) -> Result<&mut [u8]>; +} + +/// Callback operations for an RTKit client. +#[vtable] +pub trait Operations { + /// Arbitrary user context type. + type Data: ForeignOwnable + Send + Sync; + + /// Type representing an allocated buffer for RTKit. + type Buffer: Buffer; + + /// Called when RTKit crashes. + fn crashed(_data: <Self::Data as ForeignOwnable>::Borrowed<'_>) {} + + /// Called when a message was received on a non-system endpoint. Called in non-IRQ context. + fn recv_message( + _data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + _endpoint: u8, + _message: u64, + ) { + } + + /// Called in IRQ context when a message was received on a non-system endpoint. + /// + /// Must return `true` if the message is handled, or `false` to process it in + /// the handling thread. + fn recv_message_early( + _data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + _endpoint: u8, + _message: u64, + ) -> bool { + false + } + + /// Allocate a buffer for use by RTKit. + fn shmem_alloc( + _data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + _size: usize, + ) -> Result<Self::Buffer> { + Err(EINVAL) + } + + /// Map an existing buffer used by RTKit at a device-specified virtual address. + fn shmem_map( + _data: <Self::Data as ForeignOwnable>::Borrowed<'_>, + _iova: usize, + _size: usize, + ) -> Result<Self::Buffer> { + Err(EINVAL) + } +} + +/// Represents `struct apple_rtkit *`. +/// +/// # Invariants +/// +/// The rtk pointer is valid. +/// The data pointer is a valid pointer from T::Data::into_foreign(). +pub struct RtKit<T: Operations> { + rtk: *mut bindings::apple_rtkit, + data: *mut core::ffi::c_void, + _p: PhantomData<T>, +} + +unsafe extern "C" fn crashed_callback<T: Operations>(cookie: *mut core::ffi::c_void) { + T::crashed(unsafe { T::Data::borrow(cookie) }); +} + +unsafe extern "C" fn recv_message_callback<T: Operations>( + cookie: *mut core::ffi::c_void, + endpoint: u8, + message: u64, +) { + T::recv_message(unsafe { T::Data::borrow(cookie) }, endpoint, message); +} + +unsafe extern "C" fn recv_message_early_callback<T: Operations>( + cookie: *mut core::ffi::c_void, + endpoint: u8, + message: u64, +) -> bool { + T::recv_message_early(unsafe { T::Data::borrow(cookie) }, endpoint, message) +} + +unsafe extern "C" fn shmem_setup_callback<T: Operations>( + cookie: *mut core::ffi::c_void, + bfr: *mut bindings::apple_rtkit_shmem, +) -> core::ffi::c_int { + // SAFETY: `bfr` is a valid buffer + let bfr_mut = unsafe { &mut *bfr }; + + let buf = if bfr_mut.iova != 0 { + bfr_mut.is_mapped = true; + T::shmem_map( + // SAFETY: `cookie` came from a previous call to `into_foreign`. + unsafe { T::Data::borrow(cookie) }, + bfr_mut.iova as usize, + bfr_mut.size, + ) + } else { + bfr_mut.is_mapped = false; + // SAFETY: `cookie` came from a previous call to `into_foreign`. + T::shmem_alloc(unsafe { T::Data::borrow(cookie) }, bfr_mut.size) + }; + + let mut buf = match buf { + Err(e) => { + return e.to_kernel_errno(); + } + Ok(buf) => buf, + }; + + let iova = match buf.iova() { + Err(e) => { + return e.to_kernel_errno(); + } + Ok(iova) => iova, + }; + + let slice = match buf.buf() { + Err(e) => { + return e.to_kernel_errno(); + } + Ok(slice) => slice, + }; + + if slice.len() < bfr_mut.size { + return ENOMEM.to_kernel_errno(); + } + + bfr_mut.iova = iova as u64; + bfr_mut.buffer = slice.as_mut_ptr() as *mut _; + + // Now box the returned buffer type and stash it in the private pointer of the + // `apple_rtkit_shmem` struct for safekeeping. + match Box::try_new(buf) { + Err(e) => Error::from(e).to_kernel_errno(), + Ok(boxed) => { + bfr_mut.private = Box::into_raw(boxed) as *mut _; + 0 + } + } +} + +unsafe extern "C" fn shmem_destroy_callback<T: Operations>( + _cookie: *mut core::ffi::c_void, + bfr: *mut bindings::apple_rtkit_shmem, +) { + let bfr_mut = unsafe { &mut *bfr }; + // SAFETY: Per shmem_setup_callback, this has to be a pointer to a Buffer if it is set. + if !bfr_mut.private.is_null() { + unsafe { + core::mem::drop(Box::from_raw(bfr_mut.private as *mut T::Buffer)); + } + bfr_mut.private = core::ptr::null_mut(); + } +} + +impl<T: Operations> RtKit<T> { + const VTABLE: bindings::apple_rtkit_ops = bindings::apple_rtkit_ops { + crashed: Some(crashed_callback::<T>), + recv_message: Some(recv_message_callback::<T>), + recv_message_early: Some(recv_message_early_callback::<T>), + shmem_setup: if T::HAS_SHMEM_ALLOC || T::HAS_SHMEM_MAP { + Some(shmem_setup_callback::<T>) + } else { + None + }, + shmem_destroy: if T::HAS_SHMEM_ALLOC || T::HAS_SHMEM_MAP { + Some(shmem_destroy_callback::<T>) + } else { + None + }, + }; + + /// Creates a new RTKit client for a given device and optional mailbox name or index. + pub fn new( + dev: &dyn device::RawDevice, + mbox_name: Option<&'static CStr>, + mbox_idx: usize, + data: T::Data, + ) -> Result<Self> { + let ptr = data.into_foreign() as *mut _; + let guard = ScopeGuard::new(|| { + // SAFETY: `ptr` came from a previous call to `into_foreign`. + unsafe { T::Data::from_foreign(ptr) }; + }); + // SAFETY: This just calls the C init function. + let rtk = unsafe { + from_kernel_err_ptr(bindings::apple_rtkit_init( + dev.raw_device(), + ptr, + match mbox_name { + Some(s) => s.as_char_ptr(), + None => ptr::null(), + }, + mbox_idx.try_into()?, + &Self::VTABLE, + )) + }?; + + guard.dismiss(); + // INVARIANT: `rtk` and `data` are valid here. + Ok(Self { + rtk, + data: ptr, + _p: PhantomData, + }) + } + + /// Boots (wakes up) the RTKit coprocessor. + pub fn boot(&mut self) -> Result { + // SAFETY: `rtk` is valid per the type invariant. + to_result(unsafe { bindings::apple_rtkit_boot(self.rtk) }) + } + + /// Starts a non-system endpoint. + pub fn start_endpoint(&mut self, endpoint: u8) -> Result { + // SAFETY: `rtk` is valid per the type invariant. + to_result(unsafe { bindings::apple_rtkit_start_ep(self.rtk, endpoint) }) + } + + /// Sends a message to a given endpoint. + pub fn send_message(&mut self, endpoint: u8, message: u64) -> Result { + // SAFETY: `rtk` is valid per the type invariant. + to_result(unsafe { + bindings::apple_rtkit_send_message(self.rtk, endpoint, message, ptr::null_mut(), false) + }) + } +} + +// SAFETY: `RtKit` operations require a mutable reference +unsafe impl<T: Operations> Sync for RtKit<T> {} + +// SAFETY: `RtKit` operations require a mutable reference +unsafe impl<T: Operations> Send for RtKit<T> {} + +impl<T: Operations> Drop for RtKit<T> { + fn drop(&mut self) { + // SAFETY: The pointer is valid by the type invariant. + unsafe { bindings::apple_rtkit_free(self.rtk) }; + + // Free context data. + // + // SAFETY: This matches the call to `into_foreign` from `new` in the success case. + unsafe { T::Data::from_foreign(self.data) }; + } +} diff --git a/rust/kernel/soc/mod.rs b/rust/kernel/soc/mod.rs new file mode 100644 index 000000000000..e3024042e74f --- /dev/null +++ b/rust/kernel/soc/mod.rs @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! SoC drivers + +pub mod apple; diff --git a/rust/kernel/std_vendor.rs b/rust/kernel/std_vendor.rs index b3e68b24a8c6..2e864354cac1 100644 --- a/rust/kernel/std_vendor.rs +++ b/rust/kernel/std_vendor.rs @@ -137,6 +137,7 @@ /// [`std::dbg`]: https://doc.rust-lang.org/std/macro.dbg.html /// [`eprintln`]: https://doc.rust-lang.org/std/macro.eprintln.html /// [`printk`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html +#[allow(rustdoc::broken_intra_doc_links)] #[macro_export] macro_rules! dbg { // NOTE: We cannot use `concat!` to make a static string as a format argument diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs new file mode 100644 index 000000000000..c86faf35999c --- /dev/null +++ b/rust/kernel/sync.rs @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Synchronisation primitives. +//! +//! This module contains the kernel APIs related to synchronisation that have been ported or +//! wrapped for usage by Rust code in the kernel. + +mod arc; +mod condvar; +mod guard; +mod mutex; +pub mod rcu; +mod revocable; +pub mod smutex; + +use crate::{bindings, str::CStr}; +use core::{cell::UnsafeCell, mem::MaybeUninit, pin::Pin}; + +pub use arc::{Arc, ArcBorrow, UniqueArc}; +pub use condvar::CondVar; +pub use guard::{Guard, Lock, LockFactory, LockInfo, LockIniter, ReadLock, WriteLock}; +pub use mutex::{Mutex, RevocableMutex, RevocableMutexGuard}; +pub use revocable::{Revocable, RevocableGuard}; + +/// Represents a lockdep class. It's a wrapper around C's `lock_class_key`. +#[repr(transparent)] +pub struct LockClassKey(UnsafeCell<MaybeUninit<bindings::lock_class_key>>); + +// SAFETY: This is a wrapper around a lock class key, so it is safe to use references to it from +// any thread. +unsafe impl Sync for LockClassKey {} + +impl LockClassKey { + /// Creates a new lock class key. + pub const fn new() -> Self { + Self(UnsafeCell::new(MaybeUninit::uninit())) + } + + #[allow(dead_code)] + pub(crate) fn get(&self) -> *mut bindings::lock_class_key { + self.0.get().cast() + } +} + +/// Safely initialises an object that has an `init` function that takes a name and a lock class as +/// arguments, examples of these are [`Mutex`] and [`SpinLock`]. Each of them also provides a more +/// specialised name that uses this macro. +#[doc(hidden)] +#[macro_export] +macro_rules! init_with_lockdep { + ($obj:expr, $name:expr) => {{ + static CLASS1: $crate::sync::LockClassKey = $crate::sync::LockClassKey::new(); + static CLASS2: $crate::sync::LockClassKey = $crate::sync::LockClassKey::new(); + let obj = $obj; + let name = $crate::c_str!($name); + $crate::sync::NeedsLockClass::init(obj, name, &CLASS1, &CLASS2) + }}; +} + +/// A trait for types that need a lock class during initialisation. +/// +/// Implementers of this trait benefit from the [`init_with_lockdep`] macro that generates a new +/// class for each initialisation call site. +pub trait NeedsLockClass { + /// Initialises the type instance so that it can be safely used. + /// + /// Callers are encouraged to use the [`init_with_lockdep`] macro as it automatically creates a + /// new lock class on each usage. + fn init( + self: Pin<&mut Self>, + name: &'static CStr, + key1: &'static LockClassKey, + key2: &'static LockClassKey, + ); +} diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs new file mode 100644 index 000000000000..a1cc6c0882c4 --- /dev/null +++ b/rust/kernel/sync/arc.rs @@ -0,0 +1,577 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A reference-counted pointer. +//! +//! This module implements a way for users to create reference-counted objects and pointers to +//! them. Such a pointer automatically increments and decrements the count, and drops the +//! underlying object when it reaches zero. It is also safe to use concurrently from multiple +//! threads. +//! +//! It is different from the standard library's [`Arc`] in a few ways: +//! 1. It is backed by the kernel's `refcount_t` type. +//! 2. It does not support weak references, which allows it to be half the size. +//! 3. It saturates the reference count instead of aborting when it goes over a threshold. +//! 4. It does not provide a `get_mut` method, so the ref counted object is pinned. +//! +//! [`Arc`]: https://doc.rust-lang.org/std/sync/struct.Arc.html + +use crate::{ + bindings, + error::Result, + types::{ForeignOwnable, Opaque}, +}; +use alloc::boxed::Box; +use core::{ + any::Any, + fmt, + marker::{PhantomData, Unsize}, + mem::{ManuallyDrop, MaybeUninit}, + ops::{Deref, DerefMut}, + pin::Pin, + ptr::NonNull, +}; + +/// A reference-counted pointer to an instance of `T`. +/// +/// The reference count is incremented when new instances of [`Arc`] are created, and decremented +/// when they are dropped. When the count reaches zero, the underlying `T` is also dropped. +/// +/// # Invariants +/// +/// The reference count on an instance of [`Arc`] is always non-zero. +/// The object pointed to by [`Arc`] is always pinned. +/// +/// # Examples +/// +/// ``` +/// use kernel::sync::Arc; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// // Create a ref-counted instance of `Example`. +/// let obj = Arc::try_new(Example { a: 10, b: 20 })?; +/// +/// // Get a new pointer to `obj` and increment the refcount. +/// let cloned = obj.clone(); +/// +/// // Assert that both `obj` and `cloned` point to the same underlying object. +/// assert!(core::ptr::eq(&*obj, &*cloned)); +/// +/// // Destroy `obj` and decrement its refcount. +/// drop(obj); +/// +/// // Check that the values are still accessible through `cloned`. +/// assert_eq!(cloned.a, 10); +/// assert_eq!(cloned.b, 20); +/// +/// // The refcount drops to zero when `cloned` goes out of scope, and the memory is freed. +/// ``` +/// +/// Using `Arc<T>` as the type of `self`: +/// +/// ``` +/// use kernel::sync::Arc; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// impl Example { +/// fn take_over(self: Arc<Self>) { +/// // ... +/// } +/// +/// fn use_reference(self: &Arc<Self>) { +/// // ... +/// } +/// } +/// +/// let obj = Arc::try_new(Example { a: 10, b: 20 })?; +/// obj.use_reference(); +/// obj.take_over(); +/// ``` +/// +/// Coercion from `Arc<Example>` to `Arc<dyn MyTrait>`: +/// +/// ``` +/// use kernel::sync::{Arc, ArcBorrow}; +/// +/// trait MyTrait { +/// // Trait has a function whose `self` type is `Arc<Self>`. +/// fn example1(self: Arc<Self>) {} +/// +/// // Trait has a function whose `self` type is `ArcBorrow<'_, Self>`. +/// fn example2(self: ArcBorrow<'_, Self>) {} +/// } +/// +/// struct Example; +/// impl MyTrait for Example {} +/// +/// // `obj` has type `Arc<Example>`. +/// let obj: Arc<Example> = Arc::try_new(Example)?; +/// +/// // `coerced` has type `Arc<dyn MyTrait>`. +/// let coerced: Arc<dyn MyTrait> = obj; +/// ``` +pub struct Arc<T: ?Sized> { + ptr: NonNull<ArcInner<T>>, + _p: PhantomData<ArcInner<T>>, +} + +#[repr(C)] +struct ArcInner<T: ?Sized> { + refcount: Opaque<bindings::refcount_t>, + data: T, +} + +// This is to allow [`Arc`] (and variants) to be used as the type of `self`. +impl<T: ?Sized> core::ops::Receiver for Arc<T> {} + +// This is to allow coercion from `Arc<T>` to `Arc<U>` if `T` can be converted to the +// dynamically-sized type (DST) `U`. +impl<T: ?Sized + Unsize<U>, U: ?Sized> core::ops::CoerceUnsized<Arc<U>> for Arc<T> {} + +// This is to allow `Arc<U>` to be dispatched on when `Arc<T>` can be coerced into `Arc<U>`. +impl<T: ?Sized + Unsize<U>, U: ?Sized> core::ops::DispatchFromDyn<Arc<U>> for Arc<T> {} + +// SAFETY: It is safe to send `Arc<T>` to another thread when the underlying `T` is `Sync` because +// it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs +// `T` to be `Send` because any thread that has an `Arc<T>` may ultimately access `T` directly, for +// example, when the reference count reaches zero and `T` is dropped. +unsafe impl<T: ?Sized + Sync + Send> Send for Arc<T> {} + +// SAFETY: It is safe to send `&Arc<T>` to another thread when the underlying `T` is `Sync` for the +// same reason as above. `T` needs to be `Send` as well because a thread can clone an `&Arc<T>` +// into an `Arc<T>`, which may lead to `T` being accessed by the same reasoning as above. +unsafe impl<T: ?Sized + Sync + Send> Sync for Arc<T> {} + +impl<T> Arc<T> { + /// Constructs a new reference counted instance of `T`. + pub fn try_new(contents: T) -> Result<Self> { + // INVARIANT: The refcount is initialised to a non-zero value. + let value = ArcInner { + // SAFETY: There are no safety requirements for this FFI call. + refcount: Opaque::new(unsafe { bindings::REFCOUNT_INIT(1) }), + data: contents, + }; + + let inner = Box::try_new(value)?; + + // SAFETY: We just created `inner` with a reference count of 1, which is owned by the new + // `Arc` object. + Ok(unsafe { Self::from_inner(Box::leak(inner).into()) }) + } +} + +impl<T: ?Sized> Arc<T> { + /// Constructs a new [`Arc`] from an existing [`ArcInner`]. + /// + /// # Safety + /// + /// The caller must ensure that `inner` points to a valid location and has a non-zero reference + /// count, one of which will be owned by the new [`Arc`] instance. + unsafe fn from_inner(inner: NonNull<ArcInner<T>>) -> Self { + // INVARIANT: By the safety requirements, the invariants hold. + Arc { + ptr: inner, + _p: PhantomData, + } + } + + /// Returns an [`ArcBorrow`] from the given [`Arc`]. + /// + /// This is useful when the argument of a function call is an [`ArcBorrow`] (e.g., in a method + /// receiver), but we have an [`Arc`] instead. Getting an [`ArcBorrow`] is free when optimised. + #[inline] + pub fn as_arc_borrow(&self) -> ArcBorrow<'_, T> { + // SAFETY: The constraint that the lifetime of the shared reference must outlive that of + // the returned `ArcBorrow` ensures that the object remains alive and that no mutable + // reference can be created. + unsafe { ArcBorrow::new(self.ptr) } + } +} + +impl<T: 'static> ForeignOwnable for Arc<T> { + type Borrowed<'a> = ArcBorrow<'a, T>; + + fn into_foreign(self) -> *const core::ffi::c_void { + ManuallyDrop::new(self).ptr.as_ptr() as _ + } + + unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> ArcBorrow<'a, T> { + // SAFETY: By the safety requirement of this function, we know that `ptr` came from + // a previous call to `Arc::into_foreign`. + let inner = NonNull::new(ptr as *mut ArcInner<T>).unwrap(); + + // SAFETY: The safety requirements of `from_foreign` ensure that the object remains alive + // for the lifetime of the returned value. Additionally, the safety requirements of + // `ForeignOwnable::borrow_mut` ensure that no new mutable references are created. + unsafe { ArcBorrow::new(inner) } + } + + unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self { + // SAFETY: By the safety requirement of this function, we know that `ptr` came from + // a previous call to `Arc::into_foreign`, which guarantees that `ptr` is valid and + // holds a reference count increment that is transferrable to us. + unsafe { Self::from_inner(NonNull::new(ptr as _).unwrap()) } + } +} + +impl Arc<dyn Any + Send + Sync> { + /// Attempt to downcast the `Arc<dyn Any + Send + Sync>` to a concrete type. + pub fn downcast<T>(self) -> core::result::Result<Arc<T>, Self> + where + T: Any + Send + Sync, + { + if (*self).is::<T>() { + // SAFETY: We have just checked that the type is correct, so we can cast the pointer. + unsafe { + let ptr = self.ptr.cast::<ArcInner<T>>(); + core::mem::forget(self); + Ok(Arc::from_inner(ptr)) + } + } else { + Err(self) + } + } +} + +impl<T: ?Sized> Deref for Arc<T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is + // safe to dereference it. + unsafe { &self.ptr.as_ref().data } + } +} + +impl<T: ?Sized> Clone for Arc<T> { + fn clone(&self) -> Self { + // INVARIANT: C `refcount_inc` saturates the refcount, so it cannot overflow to zero. + // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is + // safe to increment the refcount. + unsafe { bindings::refcount_inc(self.ptr.as_ref().refcount.get()) }; + + // SAFETY: We just incremented the refcount. This increment is now owned by the new `Arc`. + unsafe { Self::from_inner(self.ptr) } + } +} + +impl<T: ?Sized> Drop for Arc<T> { + fn drop(&mut self) { + // SAFETY: By the type invariant, there is necessarily a reference to the object. We cannot + // touch `refcount` after it's decremented to a non-zero value because another thread/CPU + // may concurrently decrement it to zero and free it. It is ok to have a raw pointer to + // freed/invalid memory as long as it is never dereferenced. + let refcount = unsafe { self.ptr.as_ref() }.refcount.get(); + + // INVARIANT: If the refcount reaches zero, there are no other instances of `Arc`, and + // this instance is being dropped, so the broken invariant is not observable. + // SAFETY: Also by the type invariant, we are allowed to decrement the refcount. + let is_zero = unsafe { bindings::refcount_dec_and_test(refcount) }; + if is_zero { + // The count reached zero, we must free the memory. + // + // SAFETY: The pointer was initialised from the result of `Box::leak`. + unsafe { Box::from_raw(self.ptr.as_ptr()) }; + } + } +} + +impl<T: ?Sized> From<UniqueArc<T>> for Arc<T> { + fn from(item: UniqueArc<T>) -> Self { + item.inner + } +} + +impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> { + fn from(item: Pin<UniqueArc<T>>) -> Self { + // SAFETY: The type invariants of `Arc` guarantee that the data is pinned. + unsafe { Pin::into_inner_unchecked(item).inner } + } +} + +/// A borrowed reference to an [`Arc`] instance. +/// +/// For cases when one doesn't ever need to increment the refcount on the allocation, it is simpler +/// to use just `&T`, which we can trivially get from an `Arc<T>` instance. +/// +/// However, when one may need to increment the refcount, it is preferable to use an `ArcBorrow<T>` +/// over `&Arc<T>` because the latter results in a double-indirection: a pointer (shared reference) +/// to a pointer (`Arc<T>`) to the object (`T`). An [`ArcBorrow`] eliminates this double +/// indirection while still allowing one to increment the refcount and getting an `Arc<T>` when/if +/// needed. +/// +/// # Invariants +/// +/// There are no mutable references to the underlying [`Arc`], and it remains valid for the +/// lifetime of the [`ArcBorrow`] instance. +/// +/// # Example +/// +/// ``` +/// use crate::sync::{Arc, ArcBorrow}; +/// +/// struct Example; +/// +/// fn do_something(e: ArcBorrow<'_, Example>) -> Arc<Example> { +/// e.into() +/// } +/// +/// let obj = Arc::try_new(Example)?; +/// let cloned = do_something(obj.as_arc_borrow()); +/// +/// // Assert that both `obj` and `cloned` point to the same underlying object. +/// assert!(core::ptr::eq(&*obj, &*cloned)); +/// ``` +/// +/// Using `ArcBorrow<T>` as the type of `self`: +/// +/// ``` +/// use crate::sync::{Arc, ArcBorrow}; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// impl Example { +/// fn use_reference(self: ArcBorrow<'_, Self>) { +/// // ... +/// } +/// } +/// +/// let obj = Arc::try_new(Example { a: 10, b: 20 })?; +/// obj.as_arc_borrow().use_reference(); +/// ``` +pub struct ArcBorrow<'a, T: ?Sized + 'a> { + inner: NonNull<ArcInner<T>>, + _p: PhantomData<&'a ()>, +} + +// This is to allow [`ArcBorrow`] (and variants) to be used as the type of `self`. +impl<T: ?Sized> core::ops::Receiver for ArcBorrow<'_, T> {} + +// This is to allow `ArcBorrow<U>` to be dispatched on when `ArcBorrow<T>` can be coerced into +// `ArcBorrow<U>`. +impl<T: ?Sized + Unsize<U>, U: ?Sized> core::ops::DispatchFromDyn<ArcBorrow<'_, U>> + for ArcBorrow<'_, T> +{ +} + +impl<T: ?Sized> Clone for ArcBorrow<'_, T> { + fn clone(&self) -> Self { + *self + } +} + +impl<T: ?Sized> Copy for ArcBorrow<'_, T> {} + +impl<T: ?Sized> ArcBorrow<'_, T> { + /// Creates a new [`ArcBorrow`] instance. + /// + /// # Safety + /// + /// Callers must ensure the following for the lifetime of the returned [`ArcBorrow`] instance: + /// 1. That `inner` remains valid; + /// 2. That no mutable references to `inner` are created. + unsafe fn new(inner: NonNull<ArcInner<T>>) -> Self { + // INVARIANT: The safety requirements guarantee the invariants. + Self { + inner, + _p: PhantomData, + } + } +} + +impl<T: ?Sized> From<ArcBorrow<'_, T>> for Arc<T> { + fn from(b: ArcBorrow<'_, T>) -> Self { + // SAFETY: The existence of `b` guarantees that the refcount is non-zero. `ManuallyDrop` + // guarantees that `drop` isn't called, so it's ok that the temporary `Arc` doesn't own the + // increment. + ManuallyDrop::new(unsafe { Arc::from_inner(b.inner) }) + .deref() + .clone() + } +} + +impl<T: ?Sized> Deref for ArcBorrow<'_, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + // SAFETY: By the type invariant, the underlying object is still alive with no mutable + // references to it, so it is safe to create a shared reference. + unsafe { &self.inner.as_ref().data } + } +} + +/// A refcounted object that is known to have a refcount of 1. +/// +/// It is mutable and can be converted to an [`Arc`] so that it can be shared. +/// +/// # Invariants +/// +/// `inner` always has a reference count of 1. +/// +/// # Examples +/// +/// In the following example, we make changes to the inner object before turning it into an +/// `Arc<Test>` object (after which point, it cannot be mutated directly). Note that `x.into()` +/// cannot fail. +/// +/// ``` +/// use kernel::sync::{Arc, UniqueArc}; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// fn test() -> Result<Arc<Example>> { +/// let mut x = UniqueArc::try_new(Example { a: 10, b: 20 })?; +/// x.a += 1; +/// x.b += 1; +/// Ok(x.into()) +/// } +/// +/// # test().unwrap(); +/// ``` +/// +/// In the following example we first allocate memory for a ref-counted `Example` but we don't +/// initialise it on allocation. We do initialise it later with a call to [`UniqueArc::write`], +/// followed by a conversion to `Arc<Example>`. This is particularly useful when allocation happens +/// in one context (e.g., sleepable) and initialisation in another (e.g., atomic): +/// +/// ``` +/// use kernel::sync::{Arc, UniqueArc}; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// fn test() -> Result<Arc<Example>> { +/// let x = UniqueArc::try_new_uninit()?; +/// Ok(x.write(Example { a: 10, b: 20 }).into()) +/// } +/// +/// # test().unwrap(); +/// ``` +/// +/// In the last example below, the caller gets a pinned instance of `Example` while converting to +/// `Arc<Example>`; this is useful in scenarios where one needs a pinned reference during +/// initialisation, for example, when initialising fields that are wrapped in locks. +/// +/// ``` +/// use kernel::sync::{Arc, UniqueArc}; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// fn test() -> Result<Arc<Example>> { +/// let mut pinned = Pin::from(UniqueArc::try_new(Example { a: 10, b: 20 })?); +/// // We can modify `pinned` because it is `Unpin`. +/// pinned.as_mut().a += 1; +/// Ok(pinned.into()) +/// } +/// +/// # test().unwrap(); +/// ``` +pub struct UniqueArc<T: ?Sized> { + inner: Arc<T>, +} + +impl<T> UniqueArc<T> { + /// Tries to allocate a new [`UniqueArc`] instance. + pub fn try_new(value: T) -> Result<Self> { + Ok(Self { + // INVARIANT: The newly-created object has a ref-count of 1. + inner: Arc::try_new(value)?, + }) + } + + /// Tries to allocate a new [`UniqueArc`] instance whose contents are not initialised yet. + pub fn try_new_uninit() -> Result<UniqueArc<MaybeUninit<T>>> { + Ok(UniqueArc::<MaybeUninit<T>> { + // INVARIANT: The newly-created object has a ref-count of 1. + inner: Arc::try_new(MaybeUninit::uninit())?, + }) + } +} + +impl<T> UniqueArc<MaybeUninit<T>> { + /// Converts a `UniqueArc<MaybeUninit<T>>` into a `UniqueArc<T>` by writing a value into it. + pub fn write(mut self, value: T) -> UniqueArc<T> { + self.deref_mut().write(value); + unsafe { self.assume_init() } + } + + /// Returns a UniqueArc<T>, assuming the MaybeUninit<T> has already been initialized. + /// + /// # Safety + /// The contents of the UniqueArc must have already been fully initialized. + pub unsafe fn assume_init(self) -> UniqueArc<T> { + let inner = ManuallyDrop::new(self).inner.ptr; + UniqueArc { + // SAFETY: The new `Arc` is taking over `ptr` from `self.inner` (which won't be + // dropped). The types are compatible because `MaybeUninit<T>` is compatible with `T`. + inner: unsafe { Arc::from_inner(inner.cast()) }, + } + } +} + +impl<T: ?Sized> From<UniqueArc<T>> for Pin<UniqueArc<T>> { + fn from(obj: UniqueArc<T>) -> Self { + // SAFETY: It is not possible to move/replace `T` inside a `Pin<UniqueArc<T>>` (unless `T` + // is `Unpin`), so it is ok to convert it to `Pin<UniqueArc<T>>`. + unsafe { Pin::new_unchecked(obj) } + } +} + +impl<T: ?Sized> Deref for UniqueArc<T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.inner.deref() + } +} + +impl<T: ?Sized> DerefMut for UniqueArc<T> { + fn deref_mut(&mut self) -> &mut Self::Target { + // SAFETY: By the `Arc` type invariant, there is necessarily a reference to the object, so + // it is safe to dereference it. Additionally, we know there is only one reference when + // it's inside a `UniqueArc`, so it is safe to get a mutable reference. + unsafe { &mut self.inner.ptr.as_mut().data } + } +} + +impl<T: fmt::Display + ?Sized> fmt::Display for UniqueArc<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.deref(), f) + } +} + +impl<T: fmt::Display + ?Sized> fmt::Display for Arc<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.deref(), f) + } +} + +impl<T: fmt::Debug + ?Sized> fmt::Debug for UniqueArc<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.deref(), f) + } +} + +impl<T: fmt::Debug + ?Sized> fmt::Debug for Arc<T> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.deref(), f) + } +} diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs new file mode 100644 index 000000000000..4610f0bda650 --- /dev/null +++ b/rust/kernel/sync/condvar.rs @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A condition variable. +//! +//! This module allows Rust code to use the kernel's [`struct wait_queue_head`] as a condition +//! variable. + +use super::{Guard, Lock, LockClassKey, LockInfo, NeedsLockClass}; +use crate::{bindings, str::CStr, types::Opaque}; +use core::{marker::PhantomPinned, pin::Pin}; + +/// Safely initialises a [`CondVar`] with the given name, generating a new lock class. +#[macro_export] +macro_rules! condvar_init { + ($condvar:expr, $name:literal) => { + $crate::init_with_lockdep!($condvar, $name) + }; +} + +// TODO: `bindgen` is not generating this constant. Figure out why. +const POLLFREE: u32 = 0x4000; + +/// Exposes the kernel's [`struct wait_queue_head`] as a condition variable. It allows the caller to +/// atomically release the given lock and go to sleep. It reacquires the lock when it wakes up. And +/// it wakes up when notified by another thread (via [`CondVar::notify_one`] or +/// [`CondVar::notify_all`]) or because the thread received a signal. +/// +/// [`struct wait_queue_head`]: ../../../include/linux/wait.h +pub struct CondVar { + pub(crate) wait_list: Opaque<bindings::wait_queue_head>, + + /// A condvar needs to be pinned because it contains a [`struct list_head`] that is + /// self-referential, so it cannot be safely moved once it is initialised. + _pin: PhantomPinned, +} + +// SAFETY: `CondVar` only uses a `struct wait_queue_head`, which is safe to use on any thread. +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl Send for CondVar {} + +// SAFETY: `CondVar` only uses a `struct wait_queue_head`, which is safe to use on multiple threads +// concurrently. +unsafe impl Sync for CondVar {} + +impl CondVar { + /// Constructs a new conditional variable. + /// + /// # Safety + /// + /// The caller must call `CondVar::init` before using the conditional variable. + pub const unsafe fn new() -> Self { + Self { + wait_list: Opaque::uninit(), + _pin: PhantomPinned, + } + } + + /// Atomically releases the given lock (whose ownership is proven by the guard) and puts the + /// thread to sleep. It wakes up when notified by [`CondVar::notify_one`] or + /// [`CondVar::notify_all`], or when the thread receives a signal. + /// + /// Returns whether there is a signal pending. + #[must_use = "wait returns if a signal is pending, so the caller must check the return value"] + pub fn wait<L: Lock<I>, I: LockInfo>(&self, guard: &mut Guard<'_, L, I>) -> bool { + let lock = guard.lock; + let wait = Opaque::<bindings::wait_queue_entry>::uninit(); + + // SAFETY: `wait` points to valid memory. + unsafe { bindings::init_wait(wait.get()) }; + + // SAFETY: Both `wait` and `wait_list` point to valid memory. + unsafe { + bindings::prepare_to_wait_exclusive( + self.wait_list.get(), + wait.get(), + bindings::TASK_INTERRUPTIBLE as _, + ) + }; + + // SAFETY: The guard is evidence that the caller owns the lock. + unsafe { lock.unlock(&mut guard.context) }; + + // SAFETY: No arguments, switches to another thread. + unsafe { bindings::schedule() }; + + guard.context = lock.lock_noguard(); + + // SAFETY: Both `wait` and `wait_list` point to valid memory. + unsafe { bindings::finish_wait(self.wait_list.get(), wait.get()) }; + + // Replace when kernel::task is upstream + //Task::current().signal_pending() + unsafe { bindings::signal_pending(bindings::get_current()) != 0 } + } + + /// Calls the kernel function to notify the appropriate number of threads with the given flags. + fn notify(&self, count: i32, flags: u32) { + // SAFETY: `wait_list` points to valid memory. + unsafe { + bindings::__wake_up( + self.wait_list.get(), + bindings::TASK_NORMAL, + count, + flags as _, + ) + }; + } + + /// Wakes a single waiter up, if any. This is not 'sticky' in the sense that if no thread is + /// waiting, the notification is lost completely (as opposed to automatically waking up the + /// next waiter). + pub fn notify_one(&self) { + self.notify(1, 0); + } + + /// Wakes all waiters up, if any. This is not 'sticky' in the sense that if no thread is + /// waiting, the notification is lost completely (as opposed to automatically waking up the + /// next waiter). + pub fn notify_all(&self) { + self.notify(0, 0); + } + + /// Wakes all waiters up. If they were added by `epoll`, they are also removed from the list of + /// waiters. This is useful when cleaning up a condition variable that may be waited on by + /// threads that use `epoll`. + pub fn free_waiters(&self) { + self.notify(1, bindings::POLLHUP | POLLFREE); + } +} + +impl NeedsLockClass for CondVar { + fn init( + self: Pin<&mut Self>, + name: &'static CStr, + key: &'static LockClassKey, + _: &'static LockClassKey, + ) { + unsafe { + bindings::__init_waitqueue_head(self.wait_list.get(), name.as_char_ptr(), key.get()) + }; + } +} diff --git a/rust/kernel/sync/guard.rs b/rust/kernel/sync/guard.rs new file mode 100644 index 000000000000..1546e2c7dfeb --- /dev/null +++ b/rust/kernel/sync/guard.rs @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A generic lock guard and trait. +//! +//! This module contains a lock guard that can be used with any locking primitive that implements +//! the ([`Lock`]) trait. It also contains the definition of the trait, which can be leveraged by +//! other constructs to work on generic locking primitives. + +use super::{LockClassKey, NeedsLockClass}; +use crate::{ + str::CStr, + types::{Bool, False, True}, +}; +use core::pin::Pin; + +/// Allows mutual exclusion primitives that implement the [`Lock`] trait to automatically unlock +/// when a guard goes out of scope. It also provides a safe and convenient way to access the data +/// protected by the lock. +#[must_use = "the lock unlocks immediately when the guard is unused"] +pub struct Guard<'a, L: Lock<I> + ?Sized, I: LockInfo = WriteLock> { + pub(crate) lock: &'a L, + pub(crate) context: L::GuardContext, +} + +// SAFETY: `Guard` is sync when the data protected by the lock is also sync. This is more +// conservative than the default compiler implementation; more details can be found on +// <https://github.com/rust-lang/rust/issues/41622> -- it refers to `MutexGuard` from the standard +// library. +unsafe impl<L, I> Sync for Guard<'_, L, I> +where + L: Lock<I> + ?Sized, + L::Inner: Sync, + I: LockInfo, +{ +} + +impl<L: Lock<I> + ?Sized, I: LockInfo> core::ops::Deref for Guard<'_, L, I> { + type Target = L::Inner; + + fn deref(&self) -> &Self::Target { + // SAFETY: The caller owns the lock, so it is safe to deref the protected data. + unsafe { &*self.lock.locked_data().get() } + } +} + +impl<L: Lock<I> + ?Sized, I: LockInfo<Writable = True>> core::ops::DerefMut for Guard<'_, L, I> { + fn deref_mut(&mut self) -> &mut Self::Target { + // SAFETY: The caller owns the lock, so it is safe to deref the protected data. + unsafe { &mut *self.lock.locked_data().get() } + } +} + +impl<L: Lock<I> + ?Sized, I: LockInfo> Drop for Guard<'_, L, I> { + fn drop(&mut self) { + // SAFETY: The caller owns the lock, so it is safe to unlock it. + unsafe { self.lock.unlock(&mut self.context) }; + } +} + +impl<'a, L: Lock<I> + ?Sized, I: LockInfo> Guard<'a, L, I> { + /// Constructs a new immutable lock guard. + /// + /// # Safety + /// + /// The caller must ensure that it owns the lock. + pub(crate) unsafe fn new(lock: &'a L, context: L::GuardContext) -> Self { + Self { lock, context } + } +} + +/// Specifies properties of a lock. +pub trait LockInfo { + /// Determines if the data protected by a lock is writable. + type Writable: Bool; +} + +/// A marker for locks that only allow reading. +pub struct ReadLock; +impl LockInfo for ReadLock { + type Writable = False; +} + +/// A marker for locks that allow reading and writing. +pub struct WriteLock; +impl LockInfo for WriteLock { + type Writable = True; +} + +/// A generic mutual exclusion primitive. +/// +/// [`Guard`] is written such that any mutual exclusion primitive that can implement this trait can +/// also benefit from having an automatic way to unlock itself. +/// +/// # Safety +/// +/// - Implementers of this trait with the [`WriteLock`] marker must ensure that only one thread/CPU +/// may access the protected data once the lock is held, that is, between calls to `lock_noguard` +/// and `unlock`. +/// - Implementers of all other markers must ensure that a mutable reference to the protected data +/// is not active in any thread/CPU because at least one shared reference is active between calls +/// to `lock_noguard` and `unlock`. +pub unsafe trait Lock<I: LockInfo = WriteLock> { + /// The type of the data protected by the lock. + type Inner: ?Sized; + + /// The type of context, if any, that needs to be stored in the guard. + type GuardContext; + + /// Acquires the lock, making the caller its owner. + #[must_use] + fn lock_noguard(&self) -> Self::GuardContext; + + /// Reacquires the lock, making the caller its owner. + /// + /// The guard context before the last unlock is passed in. + /// + /// Locks that don't require this state on relock can simply use the default implementation + /// that calls [`Lock::lock_noguard`]. + fn relock(&self, ctx: &mut Self::GuardContext) { + *ctx = self.lock_noguard(); + } + + /// Releases the lock, giving up ownership of the lock. + /// + /// # Safety + /// + /// It must only be called by the current owner of the lock. + unsafe fn unlock(&self, context: &mut Self::GuardContext); + + /// Returns the data protected by the lock. + fn locked_data(&self) -> &core::cell::UnsafeCell<Self::Inner>; +} + +/// A creator of instances of a mutual exclusion (lock) primitive. +pub trait LockFactory { + /// The parametrised type of the mutual exclusion primitive that can be created by this factory. + type LockedType<T>; + + /// Constructs a new instance of the mutual exclusion primitive. + /// + /// # Safety + /// + /// The caller must call [`LockIniter::init_lock`] before using the lock. + unsafe fn new_lock<T>(data: T) -> Self::LockedType<T>; +} + +/// A lock that can be initialised with a single lock class key. +pub trait LockIniter { + /// Initialises the lock instance so that it can be safely used. + fn init_lock(self: Pin<&mut Self>, name: &'static CStr, key: &'static LockClassKey); +} + +impl<L: LockIniter> NeedsLockClass for L { + fn init( + self: Pin<&mut Self>, + name: &'static CStr, + key: &'static LockClassKey, + _: &'static LockClassKey, + ) { + self.init_lock(name, key); + } +} diff --git a/rust/kernel/sync/mutex.rs b/rust/kernel/sync/mutex.rs new file mode 100644 index 000000000000..c40396c15453 --- /dev/null +++ b/rust/kernel/sync/mutex.rs @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A kernel mutex. +//! +//! This module allows Rust code to use the kernel's [`struct mutex`]. + +use super::{Guard, Lock, LockClassKey, LockFactory, LockIniter, WriteLock}; +use crate::{bindings, str::CStr, types::Opaque}; +use core::{cell::UnsafeCell, marker::PhantomPinned, pin::Pin}; + +/// Safely initialises a [`Mutex`] with the given name, generating a new lock class. +#[macro_export] +macro_rules! mutex_init { + ($mutex:expr, $name:literal) => { + $crate::init_with_lockdep!($mutex, $name) + }; +} + +/// Exposes the kernel's [`struct mutex`]. When multiple threads attempt to lock the same mutex, +/// only one at a time is allowed to progress, the others will block (sleep) until the mutex is +/// unlocked, at which point another thread will be allowed to wake up and make progress. +/// +/// A [`Mutex`] must first be initialised with a call to [`Mutex::init_lock`] before it can be +/// used. The [`mutex_init`] macro is provided to automatically assign a new lock class to a mutex +/// instance. +/// +/// Since it may block, [`Mutex`] needs to be used with care in atomic contexts. +/// +/// [`struct mutex`]: ../../../include/linux/mutex.h +pub struct Mutex<T: ?Sized> { + /// The kernel `struct mutex` object. + mutex: Opaque<bindings::mutex>, + + /// A mutex needs to be pinned because it contains a [`struct list_head`] that is + /// self-referential, so it cannot be safely moved once it is initialised. + _pin: PhantomPinned, + + /// The data protected by the mutex. + data: UnsafeCell<T>, +} + +// SAFETY: `Mutex` can be transferred across thread boundaries iff the data it protects can. +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl<T: ?Sized + Send> Send for Mutex<T> {} + +// SAFETY: `Mutex` serialises the interior mutability it provides, so it is `Sync` as long as the +// data it protects is `Send`. +unsafe impl<T: ?Sized + Send> Sync for Mutex<T> {} + +impl<T> Mutex<T> { + /// Constructs a new mutex. + /// + /// # Safety + /// + /// The caller must call [`Mutex::init_lock`] before using the mutex. + pub const unsafe fn new(t: T) -> Self { + Self { + mutex: Opaque::uninit(), + data: UnsafeCell::new(t), + _pin: PhantomPinned, + } + } +} + +impl<T: ?Sized> Mutex<T> { + /// Locks the mutex and gives the caller access to the data protected by it. Only one thread at + /// a time is allowed to access the protected data. + pub fn lock(&self) -> Guard<'_, Self> { + let ctx = self.lock_noguard(); + // SAFETY: The mutex was just acquired. + unsafe { Guard::new(self, ctx) } + } +} + +impl<T> LockFactory for Mutex<T> { + type LockedType<U> = Mutex<U>; + + unsafe fn new_lock<U>(data: U) -> Mutex<U> { + // SAFETY: The safety requirements of `new_lock` also require that `init_lock` be called. + unsafe { Mutex::new(data) } + } +} + +impl<T> LockIniter for Mutex<T> { + fn init_lock(self: Pin<&mut Self>, name: &'static CStr, key: &'static LockClassKey) { + unsafe { bindings::__mutex_init(self.mutex.get(), name.as_char_ptr(), key.get()) }; + } +} + +pub struct EmptyGuardContext; + +// SAFETY: The underlying kernel `struct mutex` object ensures mutual exclusion. +unsafe impl<T: ?Sized> Lock for Mutex<T> { + type Inner = T; + type GuardContext = EmptyGuardContext; + + fn lock_noguard(&self) -> EmptyGuardContext { + // SAFETY: `mutex` points to valid memory. + unsafe { bindings::mutex_lock(self.mutex.get()) }; + EmptyGuardContext + } + + unsafe fn unlock(&self, _: &mut EmptyGuardContext) { + // SAFETY: The safety requirements of the function ensure that the mutex is owned by the + // caller. + unsafe { bindings::mutex_unlock(self.mutex.get()) }; + } + + fn locked_data(&self) -> &UnsafeCell<T> { + &self.data + } +} + +/// A revocable mutex. +/// +/// That is, a mutex to which access can be revoked at runtime. It is a specialisation of the more +/// generic [`super::revocable::Revocable`]. +/// +/// # Examples +/// +/// ``` +/// # use kernel::sync::RevocableMutex; +/// # use kernel::revocable_init; +/// # use core::pin::Pin; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// fn read_sum(v: &RevocableMutex<Example>) -> Option<u32> { +/// let guard = v.try_write()?; +/// Some(guard.a + guard.b) +/// } +/// +/// // SAFETY: We call `revocable_init` immediately below. +/// let mut v = unsafe { RevocableMutex::new(Example { a: 10, b: 20 }) }; +/// // SAFETY: We never move out of `v`. +/// let pinned = unsafe { Pin::new_unchecked(&mut v) }; +/// revocable_init!(pinned, "example::v"); +/// assert_eq!(read_sum(&v), Some(30)); +/// v.revoke(); +/// assert_eq!(read_sum(&v), None); +/// ``` +pub type RevocableMutex<T> = super::revocable::Revocable<Mutex<()>, T>; + +/// A guard for a revocable mutex. +pub type RevocableMutexGuard<'a, T, I = WriteLock> = + super::revocable::RevocableGuard<'a, Mutex<()>, T, I>; diff --git a/rust/kernel/sync/rcu.rs b/rust/kernel/sync/rcu.rs new file mode 100644 index 000000000000..1a1c8ea49359 --- /dev/null +++ b/rust/kernel/sync/rcu.rs @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! RCU support. +//! +//! C header: [`include/linux/rcupdate.h`](../../../../include/linux/rcupdate.h) + +use crate::bindings; +use core::marker::PhantomData; + +/// Evidence that the RCU read side lock is held on the current thread/CPU. +/// +/// The type is explicitly not `Send` because this property is per-thread/CPU. +/// +/// # Invariants +/// +/// The RCU read side lock is actually held while instances of this guard exist. +pub struct Guard { + _not_send: PhantomData<*mut ()>, +} + +impl Guard { + /// Acquires the RCU read side lock and returns a guard. + pub fn new() -> Self { + // SAFETY: An FFI call with no additional requirements. + unsafe { bindings::rcu_read_lock() }; + // INVARIANT: The RCU read side lock was just acquired above. + Self { + _not_send: PhantomData, + } + } + + /// Explicitly releases the RCU read side lock. + pub fn unlock(self) {} +} + +impl Default for Guard { + fn default() -> Self { + Self::new() + } +} + +impl Drop for Guard { + fn drop(&mut self) { + // SAFETY: By the type invariants, the rcu read side is locked, so it is ok to unlock it. + unsafe { bindings::rcu_read_unlock() }; + } +} + +/// Acquires the RCU read side lock. +pub fn read_lock() -> Guard { + Guard::new() +} diff --git a/rust/kernel/sync/revocable.rs b/rust/kernel/sync/revocable.rs new file mode 100644 index 000000000000..db716182a2a8 --- /dev/null +++ b/rust/kernel/sync/revocable.rs @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Synchronisation primitives where access to their contents can be revoked at runtime. + +use crate::{ + str::CStr, + sync::{Guard, Lock, LockClassKey, LockFactory, LockInfo, NeedsLockClass, ReadLock, WriteLock}, + types::True, +}; +use core::{ + mem::MaybeUninit, + ops::{Deref, DerefMut}, + pin::Pin, +}; + +/// The state within the revocable synchronisation primitive. +/// +/// We don't use simply `Option<T>` because we need to drop in-place because the contents are +/// implicitly pinned. +/// +/// # Invariants +/// +/// The `is_available` field determines if `data` is initialised. +pub struct Inner<T> { + is_available: bool, + data: MaybeUninit<T>, +} + +impl<T> Inner<T> { + fn new(data: T) -> Self { + // INVARIANT: `data` is initialised and `is_available` is `true`, so the state matches. + Self { + is_available: true, + data: MaybeUninit::new(data), + } + } + + fn drop_in_place(&mut self) { + if !self.is_available { + // Already dropped. + return; + } + + // INVARIANT: `data` is being dropped and `is_available` is set to `false`, so the state + // matches. + self.is_available = false; + + // SAFETY: By the type invariants, `data` is valid because `is_available` was true. + unsafe { self.data.assume_init_drop() }; + } +} + +impl<T> Drop for Inner<T> { + fn drop(&mut self) { + self.drop_in_place(); + } +} + +/// Revocable synchronisation primitive. +/// +/// That is, it wraps synchronisation primitives so that access to their contents can be revoked at +/// runtime, rendering them inacessible. +/// +/// Once access is revoked and all concurrent users complete (i.e., all existing instances of +/// [`RevocableGuard`] are dropped), the wrapped object is also dropped. +/// +/// For better ergonomics, we advise the use of specialisations of this struct, for example, +/// [`super::RevocableMutex`] and [`super::RevocableRwSemaphore`]. Callers that do not need to +/// sleep while holding on to a guard should use [`crate::revocable::Revocable`] instead, which is +/// more efficient as it uses RCU to keep objects alive. +/// +/// # Examples +/// +/// ``` +/// # use kernel::sync::{Mutex, Revocable}; +/// # use kernel::revocable_init; +/// # use core::pin::Pin; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// fn add_two(v: &Revocable<Mutex<()>, Example>) -> Option<u32> { +/// let mut guard = v.try_write()?; +/// guard.a += 2; +/// guard.b += 2; +/// Some(guard.a + guard.b) +/// } +/// +/// // SAFETY: We call `revocable_init` immediately below. +/// let mut v = unsafe { Revocable::<Mutex<()>, Example>::new(Example { a: 10, b: 20 }) }; +/// // SAFETY: We never move out of `v`. +/// let pinned = unsafe { Pin::new_unchecked(&mut v) }; +/// revocable_init!(pinned, "example::v"); +/// assert_eq!(add_two(&v), Some(34)); +/// v.revoke(); +/// assert_eq!(add_two(&v), None); +/// ``` +pub struct Revocable<F: LockFactory, T> { + inner: F::LockedType<Inner<T>>, +} + +/// Safely initialises a [`Revocable`] instance with the given name, generating a new lock class. +#[macro_export] +macro_rules! revocable_init { + ($mutex:expr, $name:literal) => { + $crate::init_with_lockdep!($mutex, $name) + }; +} + +impl<F: LockFactory, T> Revocable<F, T> { + /// Creates a new revocable instance of the given lock. + /// + /// # Safety + /// + /// The caller must call [`Revocable::init`] before using the revocable synch primitive. + pub unsafe fn new(data: T) -> Self { + Self { + // SAFETY: The safety requirements of this function require that `Revocable::init` + // be called before the returned object can be used. Lock initialisation is called + // from `Revocable::init`. + inner: unsafe { F::new_lock(Inner::new(data)) }, + } + } +} + +impl<F: LockFactory, T> NeedsLockClass for Revocable<F, T> +where + F::LockedType<Inner<T>>: NeedsLockClass, +{ + fn init( + self: Pin<&mut Self>, + name: &'static CStr, + key1: &'static LockClassKey, + key2: &'static LockClassKey, + ) { + // SAFETY: `inner` is pinned when `self` is. + let inner = unsafe { self.map_unchecked_mut(|r| &mut r.inner) }; + inner.init(name, key1, key2); + } +} + +impl<F: LockFactory, T> Revocable<F, T> +where + F::LockedType<Inner<T>>: Lock<Inner = Inner<T>>, +{ + /// Revokes access to and drops the wrapped object. + /// + /// Revocation and dropping happen after ongoing accessors complete. + pub fn revoke(&self) { + self.lock().drop_in_place(); + } + + /// Tries to lock the \[revocable\] wrapped object in write (exclusive) mode. + /// + /// Returns `None` if the object has been revoked and is therefore no longer accessible. + /// + /// Returns a guard that gives access to the object otherwise; the object is guaranteed to + /// remain accessible while the guard is alive. Callers are allowed to sleep while holding on + /// to the returned guard. + pub fn try_write(&self) -> Option<RevocableGuard<'_, F, T, WriteLock>> { + let inner = self.lock(); + if !inner.is_available { + return None; + } + Some(RevocableGuard::new(inner)) + } + + fn lock(&self) -> Guard<'_, F::LockedType<Inner<T>>> { + let ctx = self.inner.lock_noguard(); + // SAFETY: The lock was acquired in the call above. + unsafe { Guard::new(&self.inner, ctx) } + } +} + +impl<F: LockFactory, T> Revocable<F, T> +where + F::LockedType<Inner<T>>: Lock<ReadLock, Inner = Inner<T>>, +{ + /// Tries to lock the \[revocable\] wrapped object in read (shared) mode. + /// + /// Returns `None` if the object has been revoked and is therefore no longer accessible. + /// + /// Returns a guard that gives access to the object otherwise; the object is guaranteed to + /// remain accessible while the guard is alive. Callers are allowed to sleep while holding on + /// to the returned guard. + pub fn try_read(&self) -> Option<RevocableGuard<'_, F, T, ReadLock>> { + let ctx = self.inner.lock_noguard(); + // SAFETY: The lock was acquired in the call above. + let inner = unsafe { Guard::new(&self.inner, ctx) }; + if !inner.is_available { + return None; + } + Some(RevocableGuard::new(inner)) + } +} + +/// A guard that allows access to a revocable object and keeps it alive. +pub struct RevocableGuard<'a, F: LockFactory, T, I: LockInfo> +where + F::LockedType<Inner<T>>: Lock<I, Inner = Inner<T>>, +{ + guard: Guard<'a, F::LockedType<Inner<T>>, I>, +} + +impl<'a, F: LockFactory, T, I: LockInfo> RevocableGuard<'a, F, T, I> +where + F::LockedType<Inner<T>>: Lock<I, Inner = Inner<T>>, +{ + fn new(guard: Guard<'a, F::LockedType<Inner<T>>, I>) -> Self { + Self { guard } + } +} + +impl<F: LockFactory, T, I: LockInfo<Writable = True>> RevocableGuard<'_, F, T, I> +where + F::LockedType<Inner<T>>: Lock<I, Inner = Inner<T>>, +{ + /// Returns a pinned mutable reference to the wrapped object. + pub fn as_pinned_mut(&mut self) -> Pin<&mut T> { + // SAFETY: Revocable mutexes must be pinned, so we choose to always project the data as + // pinned as well (i.e., we guarantee we never move it). + unsafe { Pin::new_unchecked(&mut *self) } + } +} + +impl<F: LockFactory, T, I: LockInfo> Deref for RevocableGuard<'_, F, T, I> +where + F::LockedType<Inner<T>>: Lock<I, Inner = Inner<T>>, +{ + type Target = T; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.guard.data.as_ptr() } + } +} + +impl<F: LockFactory, T, I: LockInfo<Writable = True>> DerefMut for RevocableGuard<'_, F, T, I> +where + F::LockedType<Inner<T>>: Lock<I, Inner = Inner<T>>, +{ + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.guard.data.as_mut_ptr() } + } +} diff --git a/rust/kernel/sync/smutex.rs b/rust/kernel/sync/smutex.rs new file mode 100644 index 000000000000..6cf92260ace6 --- /dev/null +++ b/rust/kernel/sync/smutex.rs @@ -0,0 +1,290 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! A simple mutex implementation. +//! +//! Differently from [`super::Mutex`], this implementation does not require pinning, so the +//! ergonomics are much improved, though the implementation is not as feature-rich as the C-based +//! one. The main advantage is that it doesn't impose unsafe blocks on callers. +//! +//! The mutex is made up of 2 words in addition to the data it protects. The first one is accessed +//! concurrently by threads trying to acquire and release the mutex, it contains a "stack" of +//! waiters and a "locked" bit; the second one is only accessible by the thread holding the mutex, +//! it contains a queue of waiters. Waiters are moved from the stack to the queue when the mutex is +//! next unlocked while the stack is non-empty and the queue is empty. A single waiter is popped +//! from the wait queue when the owner of the mutex unlocks it. +//! +//! The initial state of the mutex is `<locked=0, stack=[], queue=[]>`, meaning that it isn't +//! locked and both the waiter stack and queue are empty. +//! +//! A lock operation transitions the mutex to state `<locked=1, stack=[], queue=[]>`. +//! +//! An unlock operation transitions the mutex back to the initial state, however, an attempt to +//! lock the mutex while it's already locked results in a waiter being created (on the stack) and +//! pushed onto the stack, so the state is `<locked=1, stack=[W1], queue=[]>`. +//! +//! Another thread trying to lock the mutex results in another waiter being pushed onto the stack, +//! so the state becomes `<locked=1, stack=[W2, W1], queue=[]>`. +//! +//! In such states (queue is empty but stack is non-empty), the unlock operation is performed in +//! three steps: +//! 1. The stack is popped (but the mutex remains locked), so the state is: +//! `<locked=1, stack=[], queue=[]>` +//! 2. The stack is turned into a queue by reversing it, so the state is: +//! `<locked=1, stack=[], queue=[W1, W2]> +//! 3. Finally, the lock is released, and the first waiter is awakened, so the state is: +//! `<locked=0, stack=[], queue=[W2]>` +//! +//! The mutex remains accessible to any threads attempting to lock it in any of the intermediate +//! states above. For example, while it is locked, other threads may add waiters to the stack +//! (which is ok because we want to release the ones on the queue first); another example is that +//! another thread may acquire the mutex before waiter W1 in the example above, this makes the +//! mutex unfair but this is desirable because the thread is running already and may in fact +//! release the lock before W1 manages to get scheduled -- it also mitigates the lock convoy +//! problem when the releasing thread wants to immediately acquire the lock again: it will be +//! allowed to do so (as long as W1 doesn't get to it first). +//! +//! When the waiter queue is non-empty, unlocking the mutex always results in the first waiter being +//! popped form the queue and awakened. + +use super::{mutex::EmptyGuardContext, Guard, Lock, LockClassKey, LockFactory, LockIniter}; +use crate::{bindings, str::CStr, types::Opaque}; +use core::sync::atomic::{AtomicUsize, Ordering}; +use core::{cell::UnsafeCell, pin::Pin}; + +/// The value that is OR'd into the [`Mutex::waiter_stack`] when the mutex is locked. +const LOCKED: usize = 1; + +/// A simple mutex. +/// +/// This is mutual-exclusion primitive. It guarantees that only one thread at a time may access the +/// data it protects. When multiple threads attempt to lock the same mutex, only one at a time is +/// allowed to progress, the others will block (sleep) until the mutex is unlocked, at which point +/// another thread will be allowed to wake up and make progress. +/// +/// # Examples +/// +/// ``` +/// # use kernel::{Result, sync::Arc, sync::smutex::Mutex}; +/// +/// struct Example { +/// a: u32, +/// b: u32, +/// } +/// +/// static EXAMPLE: Mutex<Example> = Mutex::new(Example { a: 10, b: 20 }); +/// +/// fn inc_a(example: &Mutex<Example>) { +/// let mut guard = example.lock(); +/// guard.a += 1; +/// } +/// +/// fn sum(example: &Mutex<Example>) -> u32 { +/// let guard = example.lock(); +/// guard.a + guard.b +/// } +/// +/// fn try_new(a: u32, b: u32) -> Result<Arc<Mutex<Example>>> { +/// Arc::try_new(Mutex::new(Example { a, b })) +/// } +/// +/// assert_eq!(EXAMPLE.lock().a, 10); +/// assert_eq!(sum(&EXAMPLE), 30); +/// +/// inc_a(&EXAMPLE); +/// +/// assert_eq!(EXAMPLE.lock().a, 11); +/// assert_eq!(sum(&EXAMPLE), 31); +/// +/// # try_new(42, 43); +/// ``` +pub struct Mutex<T: ?Sized> { + /// A stack of waiters. + /// + /// It is accessed atomically by threads lock/unlocking the mutex. Additionally, the + /// least-significant bit is used to indicate whether the mutex is locked or not. + waiter_stack: AtomicUsize, + + /// A queue of waiters. + /// + /// This is only accessible to the holder of the mutex. When the owner of the mutex is + /// unlocking it, it will move waiters from the stack to the queue when the queue is empty and + /// the stack non-empty. + waiter_queue: UnsafeCell<*mut Waiter>, + + /// The data protected by the mutex. + data: UnsafeCell<T>, +} + +// SAFETY: `Mutex` can be transferred across thread boundaries iff the data it protects can. +#[allow(clippy::non_send_fields_in_send_ty)] +unsafe impl<T: ?Sized + Send> Send for Mutex<T> {} + +// SAFETY: `Mutex` serialises the interior mutability it provides, so it is `Sync` as long as the +// data it protects is `Send`. +unsafe impl<T: ?Sized + Send> Sync for Mutex<T> {} + +impl<T> Mutex<T> { + /// Creates a new instance of the mutex. + pub const fn new(data: T) -> Self { + Self { + waiter_stack: AtomicUsize::new(0), + waiter_queue: UnsafeCell::new(core::ptr::null_mut()), + data: UnsafeCell::new(data), + } + } +} + +impl<T: ?Sized> Mutex<T> { + /// Locks the mutex and gives the caller access to the data protected by it. Only one thread at + /// a time is allowed to access the protected data. + pub fn lock(&self) -> Guard<'_, Self> { + let ctx = self.lock_noguard(); + // SAFETY: The mutex was just acquired. + unsafe { Guard::new(self, ctx) } + } +} + +impl<T> LockFactory for Mutex<T> { + type LockedType<U> = Mutex<U>; + + unsafe fn new_lock<U>(data: U) -> Mutex<U> { + Mutex::new(data) + } +} + +impl<T> LockIniter for Mutex<T> { + fn init_lock(self: Pin<&mut Self>, _name: &'static CStr, _key: &'static LockClassKey) {} +} + +// SAFETY: The mutex implementation ensures mutual exclusion. +unsafe impl<T: ?Sized> Lock for Mutex<T> { + type Inner = T; + type GuardContext = EmptyGuardContext; + + fn lock_noguard(&self) -> EmptyGuardContext { + loop { + // Try the fast path: the caller owns the mutex if we manage to set the `LOCKED` bit. + // + // The `acquire` order matches with one of the `release` ones in `unlock`. + if self.waiter_stack.fetch_or(LOCKED, Ordering::Acquire) & LOCKED == 0 { + return EmptyGuardContext; + } + + // Slow path: we'll likely need to wait, so initialise a local waiter struct. + let mut waiter = Waiter { + completion: Opaque::uninit(), + next: core::ptr::null_mut(), + }; + + // SAFETY: The completion object was just allocated on the stack and is valid for + // writes. + unsafe { bindings::init_completion(waiter.completion.get()) }; + + // Try to enqueue the waiter by pushing into onto the waiter stack. We want to do it + // only while the mutex is locked by another thread. + loop { + // We use relaxed here because we're just reading the value we'll CAS later (which + // has a stronger ordering on success). + let mut v = self.waiter_stack.load(Ordering::Relaxed); + if v & LOCKED == 0 { + // The mutex was released by another thread, so try to acquire it. + // + // The `acquire` order matches with one of the `release` ones in `unlock`. + v = self.waiter_stack.fetch_or(LOCKED, Ordering::Acquire); + if v & LOCKED == 0 { + return EmptyGuardContext; + } + } + + waiter.next = (v & !LOCKED) as _; + + // The `release` order matches with `acquire` in `unlock` when the stack is swapped + // out. We use release order here to ensure that the other thread can see our + // waiter fully initialised. + if self + .waiter_stack + .compare_exchange( + v, + (&mut waiter as *mut _ as usize) | LOCKED, + Ordering::Release, + Ordering::Relaxed, + ) + .is_ok() + { + break; + } + } + + // Wait for the owner to lock to wake this thread up. + // + // SAFETY: Completion object was previously initialised with `init_completion` and + // remains valid. + unsafe { bindings::wait_for_completion(waiter.completion.get()) }; + } + } + + unsafe fn unlock(&self, _: &mut EmptyGuardContext) { + // SAFETY: The caller owns the mutex, so it is safe to manipulate the local wait queue. + let mut waiter = unsafe { *self.waiter_queue.get() }; + loop { + // If we have a non-empty local queue of waiters, pop the first one, release the mutex, + // and wake it up (the popped waiter). + if !waiter.is_null() { + // SAFETY: The caller owns the mutex, so it is safe to manipulate the local wait + // queue. + unsafe { *self.waiter_queue.get() = (*waiter).next }; + + // The `release` order matches with one of the `acquire` ones in `lock_noguard`. + self.waiter_stack.fetch_and(!LOCKED, Ordering::Release); + + // Wake up the first waiter. + // + // SAFETY: The completion object was initialised before being added to the wait + // stack and is only removed above, when called completed. So it is safe for + // writes. + unsafe { bindings::complete_all((*waiter).completion.get()) }; + return; + } + + // Try the fast path when there are no local waiters. + // + // The `release` order matches with one of the `acquire` ones in `lock_noguard`. + if self + .waiter_stack + .compare_exchange(LOCKED, 0, Ordering::Release, Ordering::Relaxed) + .is_ok() + { + return; + } + + // We don't have a local queue, so pull the whole stack off, reverse it, and use it as a + // local queue. Since we're manipulating this queue, we need to keep ownership of the + // mutex. + // + // The `acquire` order matches with the `release` one in `lock_noguard` where a waiter + // is pushed onto the stack. It ensures that we see the fully-initialised waiter. + let mut stack = + (self.waiter_stack.swap(LOCKED, Ordering::Acquire) & !LOCKED) as *mut Waiter; + while !stack.is_null() { + // SAFETY: The caller still owns the mutex, so it is safe to manipulate the + // elements of the wait queue, which will soon become that wait queue. + let next = unsafe { (*stack).next }; + + // SAFETY: Same as above. + unsafe { (*stack).next = waiter }; + + waiter = stack; + stack = next; + } + } + } + + fn locked_data(&self) -> &UnsafeCell<T> { + &self.data + } +} + +struct Waiter { + completion: Opaque<bindings::completion>, + next: *mut Waiter, +} diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs new file mode 100644 index 000000000000..881f45492268 --- /dev/null +++ b/rust/kernel/time.rs @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! Timekeeping functions. +//! +//! C header: [`include/linux/ktime.h`](../../../../include/linux/ktime.h) +//! C header: [`include/linux/timekeeping.h`](../../../../include/linux/timekeeping.h) + +use crate::bindings; +use core::time::Duration; + +/// Returns the kernel time elapsed since boot, excluding time spent sleeping, as a [`Duration`]. +pub fn ktime_get() -> Duration { + Duration::from_nanos(unsafe { bindings::ktime_get() }.try_into().unwrap()) +} + +/// Returns the kernel time elapsed since boot, including time spent sleeping, as a [`Duration`]. +pub fn ktime_get_boottime() -> Duration { + Duration::from_nanos( + unsafe { bindings::ktime_get_with_offset(bindings::tk_offsets_TK_OFFS_BOOT) } + .try_into() + .unwrap(), + ) +} diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs index e84e51ec9716..6c49adf161c3 100644 --- a/rust/kernel/types.rs +++ b/rust/kernel/types.rs @@ -2,7 +2,220 @@ //! Kernel types. -use core::{cell::UnsafeCell, mem::MaybeUninit}; +use alloc::boxed::Box; +use core::{ + cell::UnsafeCell, + mem::MaybeUninit, + ops::{Deref, DerefMut}, +}; + +/// Used to transfer ownership to and from foreign (non-Rust) languages. +/// +/// Ownership is transferred from Rust to a foreign language by calling [`Self::into_foreign`] and +/// later may be transferred back to Rust by calling [`Self::from_foreign`]. +/// +/// This trait is meant to be used in cases when Rust objects are stored in C objects and +/// eventually "freed" back to Rust. +pub trait ForeignOwnable: Sized { + /// Type of values borrowed between calls to [`ForeignOwnable::into_foreign`] and + /// [`ForeignOwnable::from_foreign`]. + type Borrowed<'a>; + + /// Converts a Rust-owned object to a foreign-owned one. + /// + /// The foreign representation is a pointer to void. + fn into_foreign(self) -> *const core::ffi::c_void; + + /// Borrows a foreign-owned object. + /// + /// # Safety + /// + /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for + /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet. + /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow_mut`] + /// for this object must have been dropped. + unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> Self::Borrowed<'a>; + + /// Mutably borrows a foreign-owned object. + /// + /// # Safety + /// + /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for + /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet. + /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] and + /// [`ForeignOwnable::borrow_mut`] for this object must have been dropped. + unsafe fn borrow_mut(ptr: *const core::ffi::c_void) -> ScopeGuard<Self, fn(Self)> { + // SAFETY: The safety requirements ensure that `ptr` came from a previous call to + // `into_foreign`. + ScopeGuard::new_with_data(unsafe { Self::from_foreign(ptr) }, |d| { + d.into_foreign(); + }) + } + + /// Converts a foreign-owned object back to a Rust-owned one. + /// + /// # Safety + /// + /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for + /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet. + /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] and + /// [`ForeignOwnable::borrow_mut`] for this object must have been dropped. + unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self; +} + +impl<T: 'static> ForeignOwnable for Box<T> { + type Borrowed<'a> = &'a T; + + fn into_foreign(self) -> *const core::ffi::c_void { + Box::into_raw(self) as _ + } + + unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> &'a T { + // SAFETY: The safety requirements for this function ensure that the object is still alive, + // so it is safe to dereference the raw pointer. + // The safety requirements of `from_foreign` also ensure that the object remains alive for + // the lifetime of the returned value. + unsafe { &*ptr.cast() } + } + + unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self { + // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous + // call to `Self::into_foreign`. + unsafe { Box::from_raw(ptr as _) } + } +} + +impl ForeignOwnable for () { + type Borrowed<'a> = (); + + fn into_foreign(self) -> *const core::ffi::c_void { + core::ptr::NonNull::dangling().as_ptr() + } + + unsafe fn borrow<'a>(_: *const core::ffi::c_void) -> Self::Borrowed<'a> {} + + unsafe fn from_foreign(_: *const core::ffi::c_void) -> Self {} +} + +/// Runs a cleanup function/closure when dropped. +/// +/// The [`ScopeGuard::dismiss`] function prevents the cleanup function from running. +/// +/// # Examples +/// +/// In the example below, we have multiple exit paths and we want to log regardless of which one is +/// taken: +/// ``` +/// # use kernel::ScopeGuard; +/// fn example1(arg: bool) { +/// let _log = ScopeGuard::new(|| pr_info!("example1 completed\n")); +/// +/// if arg { +/// return; +/// } +/// +/// pr_info!("Do something...\n"); +/// } +/// +/// # example1(false); +/// # example1(true); +/// ``` +/// +/// In the example below, we want to log the same message on all early exits but a different one on +/// the main exit path: +/// ``` +/// # use kernel::ScopeGuard; +/// fn example2(arg: bool) { +/// let log = ScopeGuard::new(|| pr_info!("example2 returned early\n")); +/// +/// if arg { +/// return; +/// } +/// +/// // (Other early returns...) +/// +/// log.dismiss(); +/// pr_info!("example2 no early return\n"); +/// } +/// +/// # example2(false); +/// # example2(true); +/// ``` +/// +/// In the example below, we need a mutable object (the vector) to be accessible within the log +/// function, so we wrap it in the [`ScopeGuard`]: +/// ``` +/// # use kernel::ScopeGuard; +/// fn example3(arg: bool) -> Result { +/// let mut vec = +/// ScopeGuard::new_with_data(Vec::new(), |v| pr_info!("vec had {} elements\n", v.len())); +/// +/// vec.try_push(10u8)?; +/// if arg { +/// return Ok(()); +/// } +/// vec.try_push(20u8)?; +/// Ok(()) +/// } +/// +/// # assert_eq!(example3(false), Ok(())); +/// # assert_eq!(example3(true), Ok(())); +/// ``` +/// +/// # Invariants +/// +/// The value stored in the struct is nearly always `Some(_)`, except between +/// [`ScopeGuard::dismiss`] and [`ScopeGuard::drop`]: in this case, it will be `None` as the value +/// will have been returned to the caller. Since [`ScopeGuard::dismiss`] consumes the guard, +/// callers won't be able to use it anymore. +pub struct ScopeGuard<T, F: FnOnce(T)>(Option<(T, F)>); + +impl<T, F: FnOnce(T)> ScopeGuard<T, F> { + /// Creates a new guarded object wrapping the given data and with the given cleanup function. + pub fn new_with_data(data: T, cleanup_func: F) -> Self { + // INVARIANT: The struct is being initialised with `Some(_)`. + Self(Some((data, cleanup_func))) + } + + /// Prevents the cleanup function from running and returns the guarded data. + pub fn dismiss(mut self) -> T { + // INVARIANT: This is the exception case in the invariant; it is not visible to callers + // because this function consumes `self`. + self.0.take().unwrap().0 + } +} + +impl ScopeGuard<(), fn(())> { + /// Creates a new guarded object with the given cleanup function. + pub fn new(cleanup: impl FnOnce()) -> ScopeGuard<(), impl FnOnce(())> { + ScopeGuard::new_with_data((), move |_| cleanup()) + } +} + +impl<T, F: FnOnce(T)> Deref for ScopeGuard<T, F> { + type Target = T; + + fn deref(&self) -> &T { + // The type invariants guarantee that `unwrap` will succeed. + &self.0.as_ref().unwrap().0 + } +} + +impl<T, F: FnOnce(T)> DerefMut for ScopeGuard<T, F> { + fn deref_mut(&mut self) -> &mut T { + // The type invariants guarantee that `unwrap` will succeed. + &mut self.0.as_mut().unwrap().0 + } +} + +impl<T, F: FnOnce(T)> Drop for ScopeGuard<T, F> { + fn drop(&mut self) { + // Run the cleanup function if one is still present. + if let Some((data, cleanup)) = self.0.take() { + cleanup(data) + } + } +} /// Stores an opaque value. /// @@ -35,3 +248,84 @@ pub enum Either<L, R> { /// Constructs an instance of [`Either`] containing a value of type `R`. Right(R), } + +/// A trait for boolean types. +/// +/// This is meant to be used in type states to allow boolean constraints in implementation blocks. +/// In the example below, the implementation containing `MyType::set_value` could _not_ be +/// constrained to type states containing `Writable = true` if `Writable` were a constant instead +/// of a type. +/// +/// # Safety +/// +/// No additional implementations of [`Bool`] should be provided, as [`True`] and [`False`] are +/// already provided. +/// +/// # Examples +/// +/// ``` +/// # use kernel::{Bool, False, True}; +/// use core::marker::PhantomData; +/// +/// // Type state specifies whether the type is writable. +/// trait MyTypeState { +/// type Writable: Bool; +/// } +/// +/// // In state S1, the type is writable. +/// struct S1; +/// impl MyTypeState for S1 { +/// type Writable = True; +/// } +/// +/// // In state S2, the type is not writable. +/// struct S2; +/// impl MyTypeState for S2 { +/// type Writable = False; +/// } +/// +/// struct MyType<T: MyTypeState> { +/// value: u32, +/// _p: PhantomData<T>, +/// } +/// +/// impl<T: MyTypeState> MyType<T> { +/// fn new(value: u32) -> Self { +/// Self { +/// value, +/// _p: PhantomData, +/// } +/// } +/// } +/// +/// // This implementation block only applies if the type state is writable. +/// impl<T> MyType<T> +/// where +/// T: MyTypeState<Writable = True>, +/// { +/// fn set_value(&mut self, v: u32) { +/// self.value = v; +/// } +/// } +/// +/// let mut x = MyType::<S1>::new(10); +/// let mut y = MyType::<S2>::new(20); +/// +/// x.set_value(30); +/// +/// // The code below fails to compile because `S2` is not writable. +/// // y.set_value(40); +/// ``` +pub unsafe trait Bool {} + +/// Represents the `true` value for types with [`Bool`] bound. +pub struct True; + +// SAFETY: This is one of the only two implementations of `Bool`. +unsafe impl Bool for True {} + +/// Represents the `false` value for types wth [`Bool`] bound. +pub struct False; + +// SAFETY: This is one of the only two implementations of `Bool`. +unsafe impl Bool for False {} diff --git a/rust/kernel/user_ptr.rs b/rust/kernel/user_ptr.rs new file mode 100644 index 000000000000..084535675c4a --- /dev/null +++ b/rust/kernel/user_ptr.rs @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! User pointers. +//! +//! C header: [`include/linux/uaccess.h`](../../../../include/linux/uaccess.h) + +use crate::{ + bindings, + error::code::*, + error::Result, + io_buffer::{IoBufferReader, IoBufferWriter}, +}; +use alloc::vec::Vec; + +/// A reference to an area in userspace memory, which can be either +/// read-only or read-write. +/// +/// All methods on this struct are safe: invalid pointers return +/// `EFAULT`. Concurrent access, *including data races to/from userspace +/// memory*, is permitted, because fundamentally another userspace +/// thread/process could always be modifying memory at the same time +/// (in the same way that userspace Rust's [`std::io`] permits data races +/// with the contents of files on disk). In the presence of a race, the +/// exact byte values read/written are unspecified but the operation is +/// well-defined. Kernelspace code should validate its copy of data +/// after completing a read, and not expect that multiple reads of the +/// same address will return the same value. +/// +/// All APIs enforce the invariant that a given byte of memory from userspace +/// may only be read once. By preventing double-fetches we avoid TOCTOU +/// vulnerabilities. This is accomplished by taking `self` by value to prevent +/// obtaining multiple readers on a given [`UserSlicePtr`], and the readers +/// only permitting forward reads. +/// +/// Constructing a [`UserSlicePtr`] performs no checks on the provided +/// address and length, it can safely be constructed inside a kernel thread +/// with no current userspace process. Reads and writes wrap the kernel APIs +/// `copy_from_user` and `copy_to_user`, which check the memory map of the +/// current process and enforce that the address range is within the user +/// range (no additional calls to `access_ok` are needed). +/// +/// [`std::io`]: https://doc.rust-lang.org/std/io/index.html +pub struct UserSlicePtr(*mut core::ffi::c_void, usize); + +impl UserSlicePtr { + /// Constructs a user slice from a raw pointer and a length in bytes. + /// + /// # Safety + /// + /// Callers must be careful to avoid time-of-check-time-of-use + /// (TOCTOU) issues. The simplest way is to create a single instance of + /// [`UserSlicePtr`] per user memory block as it reads each byte at + /// most once. + pub unsafe fn new(ptr: *mut core::ffi::c_void, length: usize) -> Self { + UserSlicePtr(ptr, length) + } + + /// Reads the entirety of the user slice. + /// + /// Returns `EFAULT` if the address does not currently point to + /// mapped, readable memory. + pub fn read_all(self) -> Result<Vec<u8>> { + self.reader().read_all() + } + + /// Constructs a [`UserSlicePtrReader`]. + pub fn reader(self) -> UserSlicePtrReader { + UserSlicePtrReader(self.0, self.1) + } + + /// Writes the provided slice into the user slice. + /// + /// Returns `EFAULT` if the address does not currently point to + /// mapped, writable memory (in which case some data from before the + /// fault may be written), or `data` is larger than the user slice + /// (in which case no data is written). + pub fn write_all(self, data: &[u8]) -> Result { + self.writer().write_slice(data) + } + + /// Constructs a [`UserSlicePtrWriter`]. + pub fn writer(self) -> UserSlicePtrWriter { + UserSlicePtrWriter(self.0, self.1) + } + + /// Constructs both a [`UserSlicePtrReader`] and a [`UserSlicePtrWriter`]. + pub fn reader_writer(self) -> (UserSlicePtrReader, UserSlicePtrWriter) { + ( + UserSlicePtrReader(self.0, self.1), + UserSlicePtrWriter(self.0, self.1), + ) + } +} + +/// A reader for [`UserSlicePtr`]. +/// +/// Used to incrementally read from the user slice. +pub struct UserSlicePtrReader(*mut core::ffi::c_void, usize); + +impl IoBufferReader for UserSlicePtrReader { + /// Returns the number of bytes left to be read from this. + /// + /// Note that even reading less than this number of bytes may fail. + fn len(&self) -> usize { + self.1 + } + + /// Reads raw data from the user slice into a raw kernel buffer. + /// + /// # Safety + /// + /// The output buffer must be valid. + unsafe fn read_raw(&mut self, out: *mut u8, len: usize) -> Result { + if len > self.1 || len > u32::MAX as usize { + return Err(EFAULT); + } + let res = unsafe { bindings::copy_from_user(out as _, self.0, len as _) }; + if res != 0 { + return Err(EFAULT); + } + // Since this is not a pointer to a valid object in our program, + // we cannot use `add`, which has C-style rules for defined + // behavior. + self.0 = self.0.wrapping_add(len); + self.1 -= len; + Ok(()) + } +} + +/// A writer for [`UserSlicePtr`]. +/// +/// Used to incrementally write into the user slice. +pub struct UserSlicePtrWriter(*mut core::ffi::c_void, usize); + +impl IoBufferWriter for UserSlicePtrWriter { + fn len(&self) -> usize { + self.1 + } + + fn clear(&mut self, mut len: usize) -> Result { + let mut ret = Ok(()); + if len > self.1 { + ret = Err(EFAULT); + len = self.1; + } + + // SAFETY: The buffer will be validated by `clear_user`. We ensure that `len` is within + // bounds in the check above. + let left = unsafe { bindings::clear_user(self.0, len as _) } as usize; + if left != 0 { + ret = Err(EFAULT); + len -= left; + } + + self.0 = self.0.wrapping_add(len); + self.1 -= len; + ret + } + + unsafe fn write_raw(&mut self, data: *const u8, len: usize) -> Result { + if len > self.1 || len > u32::MAX as usize { + return Err(EFAULT); + } + let res = unsafe { bindings::copy_to_user(self.0, data as _, len as _) }; + if res != 0 { + return Err(EFAULT); + } + // Since this is not a pointer to a valid object in our program, + // we cannot use `add`, which has C-style rules for defined + // behavior. + self.0 = self.0.wrapping_add(len); + self.1 -= len; + Ok(()) + } +} diff --git a/rust/kernel/xarray.rs b/rust/kernel/xarray.rs new file mode 100644 index 000000000000..4f09e8e613da --- /dev/null +++ b/rust/kernel/xarray.rs @@ -0,0 +1,299 @@ +// SPDX-License-Identifier: GPL-2.0 + +//! XArray abstraction. +//! +//! C header: [`include/linux/xarray.h`](../../include/linux/xarray.h) + +use crate::{ + bindings, + error::{Error, Result}, + types::{ForeignOwnable, Opaque, ScopeGuard}, +}; +use core::{marker::PhantomData, ops::Deref}; + +/// Flags passed to `XArray::new` to configure the `XArray`. +type Flags = bindings::gfp_t; + +/// Flag values passed to `XArray::new` to configure the `XArray`. +pub mod flags { + /// Use IRQ-safe locking + pub const LOCK_IRQ: super::Flags = bindings::BINDINGS_XA_FLAGS_LOCK_IRQ; + /// Use softirq-safe locking + pub const LOCK_BH: super::Flags = bindings::BINDINGS_XA_FLAGS_LOCK_BH; + /// Track which entries are free (distinct from None) + pub const TRACK_FREE: super::Flags = bindings::BINDINGS_XA_FLAGS_TRACK_FREE; + /// Initialize array index 0 as busy + pub const ZERO_BUSY: super::Flags = bindings::BINDINGS_XA_FLAGS_ZERO_BUSY; + /// Use GFP_ACCOUNT for internal memory allocations + pub const ACCOUNT: super::Flags = bindings::BINDINGS_XA_FLAGS_ACCOUNT; + /// Create an allocating `XArray` starting at index 0 + pub const ALLOC: super::Flags = bindings::BINDINGS_XA_FLAGS_ALLOC; + /// Create an allocating `XArray` starting at index 1 + pub const ALLOC1: super::Flags = bindings::BINDINGS_XA_FLAGS_ALLOC1; +} + +/// Wrapper for a value owned by the `XArray` which holds the `XArray` lock until dropped. +/// +/// # Invariants +/// +/// The `*mut T` is always non-NULL and owned by the referenced `XArray` +pub struct Guard<'a, T: ForeignOwnable>(*mut T, &'a Opaque<bindings::xarray>); + +impl<'a, T: ForeignOwnable> Guard<'a, T> { + /// Borrow the underlying value wrapped by the `Guard`. + /// + /// Returns a `T::Borrowed` type for the owned `ForeignOwnable` type. + pub fn borrow<'b>(&'b self) -> T::Borrowed<'b> + where + 'a: 'b, + { + // SAFETY: The value is owned by the `XArray`, the lifetime it is borrowed for must not + // outlive the `XArray` itself, nor the Guard that holds the lock ensuring the value + // remains in the `XArray`. + unsafe { T::borrow(self.0 as _) } + } +} + +// Convenience impl for `ForeignOwnable` types whose `Borrowed` +// form implements Deref. +impl<'a, T: ForeignOwnable> Deref for Guard<'a, T> +where + T::Borrowed<'static>: Deref, +{ + type Target = <T::Borrowed<'static> as Deref>::Target; + + fn deref(&self) -> &Self::Target { + // SAFETY: See the `borrow()` method. The dereferenced `T::Borrowed` value + // must share the same lifetime, so we can return a reference to it. + // TODO: Is this really sound? + unsafe { &*(T::borrow(self.0 as _).deref() as *const _) } + } +} + +impl<'a, T: ForeignOwnable> Drop for Guard<'a, T> { + fn drop(&mut self) { + // SAFETY: The XArray we have a reference to owns the C xarray object. + unsafe { bindings::xa_unlock(self.1.get()) }; + } +} + +/// Represents a reserved slot in an `XArray`, which does not yet have a value but has an assigned +/// index and may not be allocated by any other user. If the Reservation is dropped without +/// being filled, the entry is marked as available again. +/// +/// Users must ensure that reserved slots are not filled by other mechanisms, or otherwise their +/// contents may be dropped and replaced (which will print a warning). +pub struct Reservation<'a, T: ForeignOwnable>(&'a XArray<T>, usize, PhantomData<T>); + +impl<'a, T: ForeignOwnable> Reservation<'a, T> { + /// Store a value into the reserved slot. + pub fn store(self, value: T) -> Result<usize> { + if self.0.replace(self.1, value)?.is_some() { + crate::pr_err!("XArray: Reservation stored but the entry already had data!\n"); + // Consider it a success anyway, not much we can do + } + let index = self.1; + core::mem::forget(self); + Ok(index) + } + + /// Returns the index of this reservation. + pub fn index(&self) -> usize { + self.1 + } +} + +impl<'a, T: ForeignOwnable> Drop for Reservation<'a, T> { + fn drop(&mut self) { + if self.0.remove(self.1).is_some() { + crate::pr_err!("XArray: Reservation dropped but the entry was not empty!\n"); + } + } +} + +/// An array which efficiently maps sparse integer indices to owned objects. +/// +/// This is similar to a `Vec<Option<T>>`, but more efficient when there are holes in the +/// index space, and can be efficiently grown. +/// +/// This structure is expected to often be used with an inner type that can either be efficiently +/// cloned, such as an `Arc<T>`. +pub struct XArray<T: ForeignOwnable> { + xa: Opaque<bindings::xarray>, + _p: PhantomData<T>, +} + +impl<T: ForeignOwnable> XArray<T> { + /// Creates a new `XArray` with the given flags. + pub fn new(flags: Flags) -> Result<XArray<T>> { + let xa = Opaque::uninit(); + + // SAFETY: We have just created `xa`. This data structure does not require + // pinning. + unsafe { bindings::xa_init_flags(xa.get(), flags) }; + + // INVARIANT: Initialize the `XArray` with a valid `xa`. + Ok(XArray { + xa, + _p: PhantomData, + }) + } + + /// Replaces an entry with a new value, returning the old value (if any). + pub fn replace(&self, index: usize, value: T) -> Result<Option<T>> { + let new = value.into_foreign(); + let guard = ScopeGuard::new(|| unsafe { + T::from_foreign(new); + }); + + let old = unsafe { + bindings::xa_store( + self.xa.get(), + index.try_into()?, + new as *mut _, + bindings::GFP_KERNEL, + ) + }; + + let err = unsafe { bindings::xa_err(old) }; + if err != 0 { + Err(Error::from_kernel_errno(err)) + } else if old.is_null() { + guard.dismiss(); + Ok(None) + } else { + guard.dismiss(); + Ok(Some(unsafe { T::from_foreign(old) })) + } + } + + /// Replaces an entry with a new value, dropping the old value (if any). + pub fn set(&self, index: usize, value: T) -> Result { + self.replace(index, value)?; + Ok(()) + } + + /// Looks up and returns a reference to an entry in the array, returning a `Guard` if it + /// exists. + /// + /// This guard blocks all other actions on the `XArray`. Callers are expected to drop the + /// `Guard` eagerly to avoid blocking other users, such as by taking a clone of the value. + pub fn get(&self, index: usize) -> Option<Guard<'_, T>> { + // SAFETY: `self.xa` is always valid by the type invariant. + let p = unsafe { + bindings::xa_lock(self.xa.get()); + bindings::xa_load(self.xa.get(), index.try_into().ok()?) + }; + + if p.is_null() { + unsafe { bindings::xa_unlock(self.xa.get()) }; + None + } else { + Some(Guard(p as _, &self.xa)) + } + } + + /// Removes and returns an entry, returning it if it existed. + pub fn remove(&self, index: usize) -> Option<T> { + let p = unsafe { bindings::xa_erase(self.xa.get(), index.try_into().ok()?) }; + if p.is_null() { + None + } else { + Some(unsafe { T::from_foreign(p) }) + } + } + + /// Allocate a new index in the array, optionally storing a new value into it, with + /// configurable bounds for the index range to allocate from. + /// + /// If `value` is `None`, then the index is reserved from further allocation but remains + /// free for storing a value into it. + pub fn alloc_limits(&self, value: Option<T>, min: u32, max: u32) -> Result<usize> { + let new = value.map_or(core::ptr::null(), |a| a.into_foreign()); + let mut id: u32 = 0; + + // SAFETY: `self.xa` is always valid by the type invariant. If this succeeds, it + // takes ownership of the passed `T` (if any). If it fails, we must drop the + // `T` again. + let ret = unsafe { + bindings::xa_alloc( + self.xa.get(), + &mut id, + new as *mut _, + bindings::xa_limit { min, max }, + bindings::GFP_KERNEL, + ) + }; + + if ret < 0 { + // Make sure to drop the value we failed to store + if !new.is_null() { + // SAFETY: If `new` is not NULL, it came from the `ForeignOwnable` we got + // from the caller. + unsafe { T::from_foreign(new) }; + } + Err(Error::from_kernel_errno(ret)) + } else { + Ok(id as usize) + } + } + + /// Allocate a new index in the array, optionally storing a new value into it. + /// + /// If `value` is `None`, then the index is reserved from further allocation but remains + /// free for storing a value into it. + pub fn alloc(&self, value: Option<T>) -> Result<usize> { + self.alloc_limits(value, 0, u32::MAX) + } + + /// Reserve a new index in the array within configurable bounds for the index. + /// + /// Returns a `Reservation` object, which can then be used to store a value at this index or + /// otherwise free it for reuse. + pub fn reserve_limits(&self, min: u32, max: u32) -> Result<Reservation<'_, T>> { + Ok(Reservation( + self, + self.alloc_limits(None, min, max)?, + PhantomData, + )) + } + + /// Reserve a new index in the array. + /// + /// Returns a `Reservation` object, which can then be used to store a value at this index or + /// otherwise free it for reuse. + pub fn reserve(&self) -> Result<Reservation<'_, T>> { + Ok(Reservation(self, self.alloc(None)?, PhantomData)) + } +} + +impl<T: ForeignOwnable> Drop for XArray<T> { + fn drop(&mut self) { + // SAFETY: `self.xa` is valid by the type invariant, and as we have the only reference to + // the `XArray` we can safely iterate its contents and drop everything. + unsafe { + let mut index: core::ffi::c_ulong = 0; + let mut entry = bindings::xa_find( + self.xa.get(), + &mut index, + core::ffi::c_ulong::MAX, + bindings::BINDINGS_XA_PRESENT, + ); + while !entry.is_null() { + T::from_foreign(entry); + entry = bindings::xa_find_after( + self.xa.get(), + &mut index, + core::ffi::c_ulong::MAX, + bindings::BINDINGS_XA_PRESENT, + ); + } + + bindings::xa_destroy(self.xa.get()); + } + } +} + +// SAFETY: XArray is thread-safe and all mutation operations are internally locked. +unsafe impl<T: Send + ForeignOwnable> Send for XArray<T> {} +unsafe impl<T: Sync + ForeignOwnable> Sync for XArray<T> {} diff --git a/rust/macros/concat_idents.rs b/rust/macros/concat_idents.rs index 7e4b450f3a50..d6614b900aa2 100644 --- a/rust/macros/concat_idents.rs +++ b/rust/macros/concat_idents.rs @@ -14,10 +14,28 @@ fn expect_ident(it: &mut token_stream::IntoIter) -> Ident { pub(crate) fn concat_idents(ts: TokenStream) -> TokenStream { let mut it = ts.into_iter(); - let a = expect_ident(&mut it); - assert_eq!(expect_punct(&mut it), ','); + let mut out = TokenStream::new(); + let a = loop { + let ident = expect_ident(&mut it); + let punct = expect_punct(&mut it); + match punct.as_char() { + ',' => break ident, + ':' => { + let punct2 = expect_punct(&mut it); + assert_eq!(punct2.as_char(), ':'); + out.extend([ + TokenTree::Ident(ident), + TokenTree::Punct(punct), + TokenTree::Punct(punct2), + ]); + } + _ => panic!("Expected , or ::"), + } + }; + let b = expect_ident(&mut it); assert!(it.next().is_none(), "only two idents can be concatenated"); let res = Ident::new(&format!("{a}{b}"), b.span()); - TokenStream::from_iter([TokenTree::Ident(res)]) + out.extend([TokenTree::Ident(res)]); + out } diff --git a/rust/macros/helpers.rs b/rust/macros/helpers.rs index cf7ad950dc1e..517f0bbfcf79 100644 --- a/rust/macros/helpers.rs +++ b/rust/macros/helpers.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 -use proc_macro::{token_stream, TokenTree}; +use proc_macro::{token_stream, Group, Punct, TokenTree}; pub(crate) fn try_ident(it: &mut token_stream::IntoIter) -> Option<String> { if let Some(TokenTree::Ident(ident)) = it.next() { @@ -38,9 +38,9 @@ pub(crate) fn expect_ident(it: &mut token_stream::IntoIter) -> String { try_ident(it).expect("Expected Ident") } -pub(crate) fn expect_punct(it: &mut token_stream::IntoIter) -> char { +pub(crate) fn expect_punct(it: &mut token_stream::IntoIter) -> Punct { if let TokenTree::Punct(punct) = it.next().expect("Reached end of token stream for Punct") { - punct.as_char() + punct } else { panic!("Expected Punct"); } @@ -56,8 +56,36 @@ pub(crate) fn expect_string_ascii(it: &mut token_stream::IntoIter) -> String { string } +pub(crate) fn expect_literal(it: &mut token_stream::IntoIter) -> String { + try_literal(it).expect("Expected Literal") +} + +pub(crate) fn expect_group(it: &mut token_stream::IntoIter) -> Group { + if let TokenTree::Group(group) = it.next().expect("Reached end of token stream for Group") { + group + } else { + panic!("Expected Group"); + } +} + pub(crate) fn expect_end(it: &mut token_stream::IntoIter) { if it.next().is_some() { panic!("Expected end"); } } + +pub(crate) fn get_literal(it: &mut token_stream::IntoIter, expected_name: &str) -> String { + assert_eq!(expect_ident(it), expected_name); + assert_eq!(expect_punct(it), ':'); + let literal = expect_literal(it); + assert_eq!(expect_punct(it), ','); + literal +} + +pub(crate) fn get_string(it: &mut token_stream::IntoIter, expected_name: &str) -> String { + assert_eq!(expect_ident(it), expected_name); + assert_eq!(expect_punct(it), ':'); + let string = expect_string(it); + assert_eq!(expect_punct(it), ','); + string +} diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs index c1d385e345b9..3ab9bae4ab52 100644 --- a/rust/macros/lib.rs +++ b/rust/macros/lib.rs @@ -5,6 +5,7 @@ mod concat_idents; mod helpers; mod module; +mod versions; mod vtable; use proc_macro::TokenStream; @@ -73,6 +74,12 @@ pub fn module(ts: TokenStream) -> TokenStream { module::module(ts) } +/// Declares multiple variants of a structure or impl code +#[proc_macro_attribute] +pub fn versions(attr: TokenStream, item: TokenStream) -> TokenStream { + versions::versions(attr, item) +} + /// Declares or implements a vtable trait. /// /// Linux's use of pure vtables is very close to Rust traits, but they differ diff --git a/rust/macros/module.rs b/rust/macros/module.rs index a7e363c2b044..1f30435d7ad4 100644 --- a/rust/macros/module.rs +++ b/rust/macros/module.rs @@ -1,9 +1,59 @@ // SPDX-License-Identifier: GPL-2.0 use crate::helpers::*; -use proc_macro::{token_stream, Literal, TokenStream, TokenTree}; +use proc_macro::{token_stream, Delimiter, Group, Literal, TokenStream, TokenTree}; use std::fmt::Write; +#[derive(Clone, PartialEq)] +enum ParamType { + Ident(String), + Array { vals: String, max_length: usize }, +} + +fn expect_array_fields(it: &mut token_stream::IntoIter) -> ParamType { + assert_eq!(expect_punct(it), '<'); + let vals = expect_ident(it); + assert_eq!(expect_punct(it), ','); + let max_length_str = expect_literal(it); + let max_length = max_length_str + .parse::<usize>() + .expect("Expected usize length"); + assert_eq!(expect_punct(it), '>'); + ParamType::Array { vals, max_length } +} + +fn expect_type(it: &mut token_stream::IntoIter) -> ParamType { + if let TokenTree::Ident(ident) = it + .next() + .expect("Reached end of token stream for param type") + { + match ident.to_string().as_ref() { + "ArrayParam" => expect_array_fields(it), + _ => ParamType::Ident(ident.to_string()), + } + } else { + panic!("Expected Param Type") + } +} + +fn expect_string_array(it: &mut token_stream::IntoIter) -> Vec<String> { + let group = expect_group(it); + assert_eq!(group.delimiter(), Delimiter::Bracket); + let mut values = Vec::new(); + let mut it = group.stream().into_iter(); + + while let Some(val) = try_string(&mut it) { + assert!(val.is_ascii(), "Expected ASCII string"); + values.push(val); + match it.next() { + Some(TokenTree::Punct(punct)) => assert_eq!(punct.as_char(), ','), + None => break, + _ => panic!("Expected ',' or end of array"), + } + } + values +} + struct ModInfoBuilder<'a> { module: &'a str, counter: usize, @@ -69,6 +119,113 @@ impl<'a> ModInfoBuilder<'a> { self.emit_only_builtin(field, content); self.emit_only_loadable(field, content); } + + fn emit_param(&mut self, field: &str, param: &str, content: &str) { + let content = format!("{param}:{content}", param = param, content = content); + self.emit(field, &content); + } +} + +fn permissions_are_readonly(perms: &str) -> bool { + let (radix, digits) = if let Some(n) = perms.strip_prefix("0x") { + (16, n) + } else if let Some(n) = perms.strip_prefix("0o") { + (8, n) + } else if let Some(n) = perms.strip_prefix("0b") { + (2, n) + } else { + (10, perms) + }; + match u32::from_str_radix(digits, radix) { + Ok(perms) => perms & 0o222 == 0, + Err(_) => false, + } +} + +fn param_ops_path(param_type: &str) -> &'static str { + match param_type { + "bool" => "kernel::module_param::PARAM_OPS_BOOL", + "i8" => "kernel::module_param::PARAM_OPS_I8", + "u8" => "kernel::module_param::PARAM_OPS_U8", + "i16" => "kernel::module_param::PARAM_OPS_I16", + "u16" => "kernel::module_param::PARAM_OPS_U16", + "i32" => "kernel::module_param::PARAM_OPS_I32", + "u32" => "kernel::module_param::PARAM_OPS_U32", + "i64" => "kernel::module_param::PARAM_OPS_I64", + "u64" => "kernel::module_param::PARAM_OPS_U64", + "isize" => "kernel::module_param::PARAM_OPS_ISIZE", + "usize" => "kernel::module_param::PARAM_OPS_USIZE", + "str" => "kernel::module_param::PARAM_OPS_STR", + t => panic!("Unrecognized type {}", t), + } +} + +#[allow(clippy::type_complexity)] +fn try_simple_param_val( + param_type: &str, +) -> Box<dyn Fn(&mut token_stream::IntoIter) -> Option<String>> { + match param_type { + "bool" => Box::new(try_ident), + "str" => Box::new(|param_it| { + try_string(param_it) + .map(|s| format!("kernel::module_param::StringParam::Ref(b\"{}\")", s)) + }), + _ => Box::new(try_literal), + } +} + +fn get_default(param_type: &ParamType, param_it: &mut token_stream::IntoIter) -> String { + let try_param_val = match param_type { + ParamType::Ident(ref param_type) + | ParamType::Array { + vals: ref param_type, + max_length: _, + } => try_simple_param_val(param_type), + }; + assert_eq!(expect_ident(param_it), "default"); + assert_eq!(expect_punct(param_it), ':'); + let default = match param_type { + ParamType::Ident(_) => try_param_val(param_it).expect("Expected default param value"), + ParamType::Array { + vals: _, + max_length: _, + } => { + let group = expect_group(param_it); + assert_eq!(group.delimiter(), Delimiter::Bracket); + let mut default_vals = Vec::new(); + let mut it = group.stream().into_iter(); + + while let Some(default_val) = try_param_val(&mut it) { + default_vals.push(default_val); + match it.next() { + Some(TokenTree::Punct(punct)) => assert_eq!(punct.as_char(), ','), + None => break, + _ => panic!("Expected ',' or end of array default values"), + } + } + + let mut default_array = "kernel::module_param::ArrayParam::create(&[".to_string(); + default_array.push_str( + &default_vals + .iter() + .map(|val| val.to_string()) + .collect::<Vec<String>>() + .join(","), + ); + default_array.push_str("])"); + default_array + } + }; + assert_eq!(expect_punct(param_it), ','); + default +} + +fn generated_array_ops_name(vals: &str, max_length: usize) -> String { + format!( + "__generated_array_ops_{vals}_{max_length}", + vals = vals, + max_length = max_length + ) } #[derive(Debug, Default)] @@ -78,15 +235,23 @@ struct ModuleInfo { name: String, author: Option<String>, description: Option<String>, - alias: Option<String>, + alias: Option<Vec<String>>, + params: Option<Group>, } impl ModuleInfo { fn parse(it: &mut token_stream::IntoIter) -> Self { let mut info = ModuleInfo::default(); - const EXPECTED_KEYS: &[&str] = - &["type", "name", "author", "description", "license", "alias"]; + const EXPECTED_KEYS: &[&str] = &[ + "type", + "name", + "author", + "description", + "license", + "alias", + "params", + ]; const REQUIRED_KEYS: &[&str] = &["type", "name", "license"]; let mut seen_keys = Vec::new(); @@ -104,7 +269,7 @@ impl ModuleInfo { ); } - assert_eq!(expect_punct(it), ':'); + assert_eq!(expect_punct(it).as_char(), ':'); match key.as_str() { "type" => info.type_ = expect_ident(it), @@ -112,14 +277,15 @@ impl ModuleInfo { "author" => info.author = Some(expect_string(it)), "description" => info.description = Some(expect_string(it)), "license" => info.license = expect_string_ascii(it), - "alias" => info.alias = Some(expect_string_ascii(it)), + "alias" => info.alias = Some(expect_string_array(it)), + "params" => info.params = Some(expect_group(it)), _ => panic!( "Unknown key \"{}\". Valid keys are: {:?}.", key, EXPECTED_KEYS ), } - assert_eq!(expect_punct(it), ','); + assert_eq!(expect_punct(it).as_char(), ','); seen_keys.push(key); } @@ -163,8 +329,10 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { modinfo.emit("description", &description); } modinfo.emit("license", &info.license); - if let Some(alias) = info.alias { - modinfo.emit("alias", &alias); + if let Some(aliases) = info.alias { + for alias in aliases { + modinfo.emit("alias", &alias); + } } // Built-in modules also export the `file` modinfo string. @@ -172,6 +340,195 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { std::env::var("RUST_MODFILE").expect("Unable to fetch RUST_MODFILE environmental variable"); modinfo.emit_only_builtin("file", &file); + let mut array_types_to_generate = Vec::new(); + if let Some(params) = info.params { + assert_eq!(params.delimiter(), Delimiter::Brace); + + let mut it = params.stream().into_iter(); + + loop { + let param_name = match it.next() { + Some(TokenTree::Ident(ident)) => ident.to_string(), + Some(_) => panic!("Expected Ident or end"), + None => break, + }; + + assert_eq!(expect_punct(&mut it), ':'); + let param_type = expect_type(&mut it); + let group = expect_group(&mut it); + assert_eq!(expect_punct(&mut it), ','); + + assert_eq!(group.delimiter(), Delimiter::Brace); + + let mut param_it = group.stream().into_iter(); + let param_default = get_default(¶m_type, &mut param_it); + let param_permissions = get_literal(&mut param_it, "permissions"); + let param_description = get_string(&mut param_it, "description"); + expect_end(&mut param_it); + + // TODO: More primitive types. + // TODO: Other kinds: unsafes, etc. + let (param_kernel_type, ops): (String, _) = match param_type { + ParamType::Ident(ref param_type) => ( + param_type.to_string(), + param_ops_path(param_type).to_string(), + ), + ParamType::Array { + ref vals, + max_length, + } => { + array_types_to_generate.push((vals.clone(), max_length)); + ( + format!("__rust_array_param_{}_{}", vals, max_length), + generated_array_ops_name(vals, max_length), + ) + } + }; + + modinfo.emit_param("parmtype", ¶m_name, ¶m_kernel_type); + modinfo.emit_param("parm", ¶m_name, ¶m_description); + let param_type_internal = match param_type { + ParamType::Ident(ref param_type) => match param_type.as_ref() { + "str" => "kernel::module_param::StringParam".to_string(), + other => other.to_string(), + }, + ParamType::Array { + ref vals, + max_length, + } => format!( + "kernel::module_param::ArrayParam<{vals}, {max_length}>", + vals = vals, + max_length = max_length + ), + }; + let read_func = if permissions_are_readonly(¶m_permissions) { + format!( + " + fn read(&self) + -> &<{param_type_internal} as kernel::module_param::ModuleParam>::Value {{ + // SAFETY: Parameters do not need to be locked because they are + // read only or sysfs is not enabled. + unsafe {{ + <{param_type_internal} as kernel::module_param::ModuleParam>::value( + &__{name}_{param_name}_value + ) + }} + }} + ", + name = info.name, + param_name = param_name, + param_type_internal = param_type_internal, + ) + } else { + format!( + " + fn read<'lck>(&self, lock: &'lck kernel::KParamGuard) + -> &'lck <{param_type_internal} as kernel::module_param::ModuleParam>::Value {{ + // SAFETY: Parameters are locked by `KParamGuard`. + unsafe {{ + <{param_type_internal} as kernel::module_param::ModuleParam>::value( + &__{name}_{param_name}_value + ) + }} + }} + ", + name = info.name, + param_name = param_name, + param_type_internal = param_type_internal, + ) + }; + let kparam = format!( + " + kernel::bindings::kernel_param__bindgen_ty_1 {{ + arg: unsafe {{ &__{name}_{param_name}_value }} + as *const _ as *mut core::ffi::c_void, + }}, + ", + name = info.name, + param_name = param_name, + ); + write!( + modinfo.buffer, + " + static mut __{name}_{param_name}_value: {param_type_internal} = {param_default}; + + struct __{name}_{param_name}; + + impl __{name}_{param_name} {{ {read_func} }} + + const {param_name}: __{name}_{param_name} = __{name}_{param_name}; + + // Note: the C macro that generates the static structs for the `__param` section + // asks for them to be `aligned(sizeof(void *))`. However, that was put in place + // in 2003 in commit 38d5b085d2a0 (\"[PATCH] Fix over-alignment problem on x86-64\") + // to undo GCC over-alignment of static structs of >32 bytes. It seems that is + // not the case anymore, so we simplify to a transparent representation here + // in the expectation that it is not needed anymore. + // TODO: Revisit this to confirm the above comment and remove it if it happened. + #[repr(transparent)] + struct __{name}_{param_name}_RacyKernelParam(kernel::bindings::kernel_param); + + unsafe impl Sync for __{name}_{param_name}_RacyKernelParam {{ + }} + + #[cfg(not(MODULE))] + const __{name}_{param_name}_name: *const core::ffi::c_char = + b\"{name}.{param_name}\\0\" as *const _ as *const core::ffi::c_char; + + #[cfg(MODULE)] + const __{name}_{param_name}_name: *const core::ffi::c_char = + b\"{param_name}\\0\" as *const _ as *const core::ffi::c_char; + + #[link_section = \"__param\"] + #[used] + static __{name}_{param_name}_struct: __{name}_{param_name}_RacyKernelParam = + __{name}_{param_name}_RacyKernelParam(kernel::bindings::kernel_param {{ + name: __{name}_{param_name}_name, + // SAFETY: `__this_module` is constructed by the kernel at load time + // and will not be freed until the module is unloaded. + #[cfg(MODULE)] + mod_: unsafe {{ &kernel::bindings::__this_module as *const _ as *mut _ }}, + #[cfg(not(MODULE))] + mod_: core::ptr::null_mut(), + ops: unsafe {{ &{ops} }} as *const kernel::bindings::kernel_param_ops, + perm: {permissions}, + level: -1, + flags: 0, + __bindgen_anon_1: {kparam} + }}); + ", + name = info.name, + param_type_internal = param_type_internal, + read_func = read_func, + param_default = param_default, + param_name = param_name, + ops = ops, + permissions = param_permissions, + kparam = kparam, + ) + .unwrap(); + } + } + + let mut generated_array_types = String::new(); + + for (vals, max_length) in array_types_to_generate { + let ops_name = generated_array_ops_name(&vals, max_length); + write!( + generated_array_types, + " + kernel::make_param_ops!( + {ops_name}, + kernel::module_param::ArrayParam<{vals}, {{ {max_length} }}> + ); + ", + ops_name = ops_name, + vals = vals, + max_length = max_length, + ) + .unwrap(); + } + format!( " /// The module name. @@ -250,7 +607,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { }} fn __init() -> core::ffi::c_int {{ - match <{type_} as kernel::Module>::init(&THIS_MODULE) {{ + match <{type_} as kernel::Module>::init(kernel::c_str!(\"{name}\"), &THIS_MODULE) {{ Ok(m) => {{ unsafe {{ __MOD = Some(m); @@ -271,10 +628,13 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream { }} {modinfo} + + {generated_array_types} ", type_ = info.type_, name = info.name, modinfo = modinfo.buffer, + generated_array_types = generated_array_types, initcall_section = ".initcall6.init" ) .parse() diff --git a/rust/macros/versions.rs b/rust/macros/versions.rs new file mode 100644 index 000000000000..6dfdfd7d25c5 --- /dev/null +++ b/rust/macros/versions.rs @@ -0,0 +1,289 @@ +use proc_macro::{Group, Ident, Punct, Spacing, Span, TokenStream, TokenTree}; + +//use crate::helpers::expect_punct; + +fn expect_group(it: &mut impl Iterator<Item = TokenTree>) -> Group { + if let Some(TokenTree::Group(group)) = it.next() { + group + } else { + panic!("Expected Group") + } +} + +fn expect_punct(it: &mut impl Iterator<Item = TokenTree>) -> String { + if let Some(TokenTree::Punct(punct)) = it.next() { + punct.to_string() + } else { + panic!("Expected Group") + } +} + +fn drop_until_punct(it: &mut impl Iterator<Item = TokenTree>, delimiter: &str) { + let mut depth: isize = 0; + for token in it.by_ref() { + if let TokenTree::Punct(punct) = token { + match punct.as_char() { + '<' => { + depth += 1; + } + '>' => { + depth -= 1; + } + _ => { + if depth == 0 && delimiter.contains(&punct.to_string()) { + break; + } + } + } + } + } +} + +struct VersionConfig { + fields: &'static [&'static str], + enums: &'static [&'static [&'static str]], + versions: &'static [&'static [&'static str]], +} + +static AGX_VERSIONS: VersionConfig = VersionConfig { + fields: &["G", "V"], + enums: &[&["G13", "G14"], &["V12_3", "V12_4", "V13_0B4", "V13_2"]], + versions: &[ + &["G13", "V12_3"], + &["G14", "V12_4"], + // &["G13", "V13_0B4"], + // &["G14", "V13_0B4"], + &["G13", "V13_2"], + &["G14", "V13_2"], + ], +}; + +fn check_version( + config: &VersionConfig, + ver: &[usize], + it: &mut impl Iterator<Item = TokenTree>, +) -> bool { + let first = it.next().unwrap(); + let val: bool = match &first { + TokenTree::Group(group) => check_version(config, ver, &mut group.stream().into_iter()), + TokenTree::Ident(ident) => { + let key = config + .fields + .iter() + .position(|&r| r == ident.to_string()) + .unwrap_or_else(|| panic!("Unknown field {}", ident)); + let mut operator = expect_punct(it); + let mut rhs_token = it.next().unwrap(); + if let TokenTree::Punct(punct) = &rhs_token { + operator.extend(std::iter::once(punct.as_char())); + rhs_token = it.next().unwrap(); + } + let rhs_name = if let TokenTree::Ident(ident) = &rhs_token { + ident.to_string() + } else { + panic!("Unexpected token {}", ident) + }; + + let rhs = config.enums[key] + .iter() + .position(|&r| r == rhs_name) + .unwrap_or_else(|| panic!("Unknown value for {}:{}", ident, rhs_name)); + let lhs = ver[key]; + + match operator.as_str() { + "==" => lhs == rhs, + "!=" => lhs != rhs, + ">" => lhs > rhs, + ">=" => lhs >= rhs, + "<" => lhs < rhs, + "<=" => lhs <= rhs, + _ => panic!("Unknown operator {}", operator), + } + } + _ => { + panic!("Unknown token {}", first) + } + }; + + let boolop = it.next(); + match boolop { + Some(TokenTree::Punct(punct)) => { + let right = expect_punct(it); + if right != punct.to_string() { + panic!("Unexpected op {}{}", punct, right); + } + match punct.as_char() { + '&' => val && check_version(config, ver, it), + '|' => val || check_version(config, ver, it), + _ => panic!("Unexpected op {}{}", right, right), + } + } + Some(a) => panic!("Unexpected op {}", a), + None => val, + } +} + +fn filter_versions( + config: &VersionConfig, + tag: &str, + ver: &[usize], + tree: impl IntoIterator<Item = TokenTree>, + is_struct: bool, +) -> Vec<TokenTree> { + let mut out = Vec::<TokenTree>::new(); + let mut it = tree.into_iter(); + + while let Some(token) = it.next() { + let mut tail: Option<TokenTree> = None; + match &token { + TokenTree::Punct(punct) if punct.to_string() == "#" => { + let group = expect_group(&mut it); + let mut grp_it = group.stream().into_iter(); + let attr = grp_it.next().unwrap(); + match attr { + TokenTree::Ident(ident) if ident.to_string() == "ver" => { + if check_version(config, ver, &mut grp_it) { + } else if is_struct { + drop_until_punct(&mut it, ","); + } else { + let first = it.next().unwrap(); + match &first { + TokenTree::Group(_) => (), + _ => { + drop_until_punct(&mut it, ",;"); + } + } + } + } + _ => { + out.push(token.clone()); + out.push(TokenTree::Group(group.clone())); + } + } + continue; + } + TokenTree::Punct(punct) if punct.to_string() == ":" => { + let next = it.next(); + match next { + Some(TokenTree::Punct(punct)) if punct.to_string() == ":" => { + let next = it.next(); + match next { + Some(TokenTree::Ident(idtag)) if idtag.to_string() == "ver" => { + let ident = match out.pop() { + Some(TokenTree::Ident(ident)) => ident, + a => panic!("$ver not following ident: {:?}", a), + }; + let name = ident.to_string() + tag; + let new_ident = Ident::new(name.as_str(), ident.span()); + out.push(TokenTree::Ident(new_ident)); + continue; + } + Some(a) => { + out.push(token.clone()); + out.push(token.clone()); + tail = Some(a); + } + None => { + out.push(token.clone()); + out.push(token.clone()); + } + } + } + Some(a) => { + out.push(token.clone()); + tail = Some(a); + } + None => { + out.push(token.clone()); + continue; + } + } + } + _ => { + tail = Some(token); + } + } + match &tail { + Some(TokenTree::Group(group)) => { + let new_body = + filter_versions(config, tag, ver, &mut group.stream().into_iter(), is_struct); + let mut stream = TokenStream::new(); + stream.extend(new_body); + let mut filtered_group = Group::new(group.delimiter(), stream); + filtered_group.set_span(group.span()); + out.push(TokenTree::Group(filtered_group)); + } + Some(token) => { + out.push(token.clone()); + } + None => {} + } + } + + out +} + +pub(crate) fn versions(attr: TokenStream, item: TokenStream) -> TokenStream { + let config = match attr.to_string().as_str() { + "AGX" => &AGX_VERSIONS, + _ => panic!("Unknown version group {}", attr), + }; + + let mut it = item.into_iter(); + let mut out = TokenStream::new(); + let mut body: Vec<TokenTree> = Vec::new(); + let mut is_struct = false; + + while let Some(token) = it.next() { + match token { + TokenTree::Punct(punct) if punct.to_string() == "#" => { + body.push(TokenTree::Punct(punct)); + body.push(it.next().unwrap()); + } + TokenTree::Ident(ident) + if ["struct", "enum", "union", "const", "type"] + .contains(&ident.to_string().as_str()) => + { + is_struct = ident.to_string() != "const"; + body.push(TokenTree::Ident(ident)); + body.push(it.next().unwrap()); + // This isn't valid syntax in a struct definition, so add it for the user + body.push(TokenTree::Punct(Punct::new(':', Spacing::Joint))); + body.push(TokenTree::Punct(Punct::new(':', Spacing::Alone))); + body.push(TokenTree::Ident(Ident::new("ver", Span::call_site()))); + break; + } + TokenTree::Ident(ident) if ident.to_string() == "impl" => { + body.push(TokenTree::Ident(ident)); + break; + } + TokenTree::Ident(ident) if ident.to_string() == "fn" => { + body.push(TokenTree::Ident(ident)); + break; + } + _ => { + body.push(token); + } + } + } + + body.extend(it); + + for ver in config.versions { + let tag = ver.join(""); + let mut ver_num = Vec::<usize>::new(); + for (i, comp) in ver.iter().enumerate() { + let idx = config.enums[i].iter().position(|&r| r == *comp).unwrap(); + ver_num.push(idx); + } + out.extend(filter_versions( + config, + &tag, + &ver_num, + body.clone(), + is_struct, + )); + } + + out +} diff --git a/samples/rust/rust_print.rs b/samples/rust/rust_print.rs index 8b39d9cef6d1..165a8d7b1c07 100644 --- a/samples/rust/rust_print.rs +++ b/samples/rust/rust_print.rs @@ -15,6 +15,30 @@ module! { struct RustPrint; +fn arc_print() -> Result { + use kernel::sync::*; + + let a = Arc::try_new(1)?; + let b = UniqueArc::try_new("hello, world")?; + + // Prints the value of data in `a`. + pr_info!("{}", a); + + // Uses ":?" to print debug fmt of `b`. + pr_info!("{:?}", b); + + let a: Arc<&str> = b.into(); + let c = a.clone(); + + // Uses `dbg` to print, will move `c`. + dbg!(c); + + // Prints debug fmt with pretty-print "#" and number-in-hex "x". + pr_info!("{:#x?}", a); + + Ok(()) +} + impl kernel::Module for RustPrint { fn init(_module: &'static ThisModule) -> Result<Self> { pr_info!("Rust printing macros sample (init)\n"); @@ -43,6 +67,8 @@ impl kernel::Module for RustPrint { pr_cont!(" is {}", "continued"); pr_cont!(" with {}\n", "args"); + arc_print()?; + Ok(RustPrint) } } diff --git a/scripts/Makefile.build b/scripts/Makefile.build index a0d5c6cca76d..d43fbd12570a 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -277,7 +277,7 @@ $(obj)/%.lst: $(src)/%.c FORCE # Compile Rust sources (.rs) # --------------------------------------------------------------------------- -rust_allowed_features := core_ffi_c +rust_allowed_features := allocator_api,const_refs_to_cell,new_uninit rust_common_cmd = \ RUST_MODFILE=$(modfile) $(RUSTC_OR_CLIPPY) $(rust_flags) \ diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 3c6cbe2b278d..fe0e4ba54492 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -148,7 +148,18 @@ fn main() { let mut ts = TargetSpec::new(); // `llvm-target`s are taken from `scripts/Makefile.clang`. - if cfg.has("X86_64") { + if cfg.has("ARM64") { + ts.push("arch", "aarch64"); + ts.push( + "data-layout", + "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", + ); + ts.push("disable-redzone", true); + ts.push("features", "+strict-align,+neon,+fp-armv8"); + ts.push("llvm-target", "aarch64-linux-gnu"); + ts.push("max-atomic-width", 128); + ts.push("target-pointer-width", "64"); + } else if cfg.has("X86_64") { ts.push("arch", "x86_64"); ts.push( "data-layout", diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh index a814f1efb39d..a368a3e56bca 100755 --- a/scripts/min-tool-version.sh +++ b/scripts/min-tool-version.sh @@ -31,7 +31,7 @@ llvm) fi ;; rustc) - echo 1.62.0 + echo 1.66.0 ;; bindgen) echo 0.56.0 |