summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/process/changes.rst2
-rw-r--r--Documentation/rust/arch-support.rst1
-rw-r--r--MAINTAINERS1
-rw-r--r--Makefile2
-rw-r--r--arch/arm64/Kconfig1
-rw-r--r--arch/arm64/Makefile4
-rw-r--r--drivers/gpu/drm/Kconfig2
-rw-r--r--drivers/gpu/drm/Makefile1
-rw-r--r--drivers/gpu/drm/asahi/Kconfig39
-rw-r--r--drivers/gpu/drm/asahi/Makefile3
-rw-r--r--drivers/gpu/drm/asahi/alloc.rs1046
-rw-r--r--drivers/gpu/drm/asahi/asahi.rs53
-rw-r--r--drivers/gpu/drm/asahi/buffer.rs694
-rw-r--r--drivers/gpu/drm/asahi/channel.rs542
-rw-r--r--drivers/gpu/drm/asahi/debug.rs129
-rw-r--r--drivers/gpu/drm/asahi/driver.rs166
-rw-r--r--drivers/gpu/drm/asahi/event.rs229
-rw-r--r--drivers/gpu/drm/asahi/file.rs718
-rw-r--r--drivers/gpu/drm/asahi/float.rs381
-rw-r--r--drivers/gpu/drm/asahi/fw/buffer.rs170
-rw-r--r--drivers/gpu/drm/asahi/fw/channels.rs385
-rw-r--r--drivers/gpu/drm/asahi/fw/compute.rs107
-rw-r--r--drivers/gpu/drm/asahi/fw/event.rs100
-rw-r--r--drivers/gpu/drm/asahi/fw/fragment.rs276
-rw-r--r--drivers/gpu/drm/asahi/fw/initdata.rs1264
-rw-r--r--drivers/gpu/drm/asahi/fw/job.rs56
-rw-r--r--drivers/gpu/drm/asahi/fw/microseq.rs384
-rw-r--r--drivers/gpu/drm/asahi/fw/mod.rs15
-rw-r--r--drivers/gpu/drm/asahi/fw/types.rs233
-rw-r--r--drivers/gpu/drm/asahi/fw/vertex.rs177
-rw-r--r--drivers/gpu/drm/asahi/fw/workqueue.rs168
-rw-r--r--drivers/gpu/drm/asahi/gem.rs301
-rw-r--r--drivers/gpu/drm/asahi/gpu.rs1088
-rw-r--r--drivers/gpu/drm/asahi/hw/mod.rs522
-rw-r--r--drivers/gpu/drm/asahi/hw/t600x.rs140
-rw-r--r--drivers/gpu/drm/asahi/hw/t8103.rs80
-rw-r--r--drivers/gpu/drm/asahi/hw/t8112.rs82
-rw-r--r--drivers/gpu/drm/asahi/initdata.rs777
-rw-r--r--drivers/gpu/drm/asahi/mem.rs133
-rw-r--r--drivers/gpu/drm/asahi/microseq.rs61
-rw-r--r--drivers/gpu/drm/asahi/mmu.rs1249
-rw-r--r--drivers/gpu/drm/asahi/object.rs704
-rw-r--r--drivers/gpu/drm/asahi/place.rs343
-rw-r--r--drivers/gpu/drm/asahi/queue/common.rs52
-rw-r--r--drivers/gpu/drm/asahi/queue/compute.rs371
-rw-r--r--drivers/gpu/drm/asahi/queue/mod.rs725
-rw-r--r--drivers/gpu/drm/asahi/queue/render.rs1173
-rw-r--r--drivers/gpu/drm/asahi/regs.rs387
-rw-r--r--drivers/gpu/drm/asahi/slotalloc.rs292
-rw-r--r--drivers/gpu/drm/asahi/util.rs44
-rw-r--r--drivers/gpu/drm/asahi/workqueue.rs880
-rw-r--r--drivers/gpu/drm/drm_gem.c1
-rw-r--r--drivers/gpu/drm/drm_gem_shmem_helper.c9
-rw-r--r--drivers/gpu/drm/drm_prime.c5
-rw-r--r--drivers/gpu/drm/scheduler/sched_main.c37
-rw-r--r--drivers/iommu/io-pgtable-arm.c101
-rw-r--r--drivers/iommu/io-pgtable.c1
-rw-r--r--include/drm/drm_gem.h8
-rw-r--r--include/drm/drm_gem_shmem_helper.h3
-rw-r--r--include/drm/gpu_scheduler.h8
-rw-r--r--include/linux/io-pgtable.h6
-rw-r--r--include/uapi/drm/asahi_drm.h560
-rw-r--r--rust/Makefile18
-rw-r--r--rust/alloc/alloc.rs26
-rw-r--r--rust/alloc/borrow.rs498
-rw-r--r--rust/alloc/boxed.rs435
-rw-r--r--rust/alloc/collections/mod.rs3
-rw-r--r--rust/alloc/lib.rs44
-rw-r--r--rust/alloc/raw_vec.rs14
-rw-r--r--rust/alloc/slice.rs100
-rw-r--r--rust/alloc/vec/drain.rs75
-rw-r--r--rust/alloc/vec/drain_filter.rs60
-rw-r--r--rust/alloc/vec/into_iter.rs78
-rw-r--r--rust/alloc/vec/is_zero.rs73
-rw-r--r--rust/alloc/vec/mod.rs491
-rw-r--r--rust/alloc/vec/set_len_on_drop.rs28
-rw-r--r--rust/alloc/vec/spec_extend.rs170
-rw-r--r--rust/bindings/bindings_helper.h51
-rw-r--r--rust/bindings/lib.rs2
-rw-r--r--rust/compiler_builtins.rs5
-rw-r--r--rust/helpers.c515
-rw-r--r--rust/kernel/build_assert.rs1
-rw-r--r--rust/kernel/delay.rs104
-rw-r--r--rust/kernel/device.rs536
-rw-r--r--rust/kernel/dma_fence.rs532
-rw-r--r--rust/kernel/driver.rs475
-rw-r--r--rust/kernel/drm/device.rs76
-rw-r--r--rust/kernel/drm/drv.rs342
-rw-r--r--rust/kernel/drm/file.rs113
-rw-r--r--rust/kernel/drm/gem/mod.rs387
-rw-r--r--rust/kernel/drm/gem/shmem.rs385
-rw-r--r--rust/kernel/drm/ioctl.rs147
-rw-r--r--rust/kernel/drm/mm.rs309
-rw-r--r--rust/kernel/drm/mod.rs12
-rw-r--r--rust/kernel/drm/sched.rs358
-rw-r--r--rust/kernel/drm/syncobj.rs77
-rw-r--r--rust/kernel/error.rs281
-rw-r--r--rust/kernel/io_buffer.rs153
-rw-r--r--rust/kernel/io_mem.rs292
-rw-r--r--rust/kernel/io_pgtable.rs353
-rw-r--r--rust/kernel/ioctl.rs64
-rw-r--r--rust/kernel/lib.rs144
-rw-r--r--rust/kernel/module_param.rs501
-rw-r--r--rust/kernel/of.rs546
-rw-r--r--rust/kernel/platform.rs286
-rw-r--r--rust/kernel/prelude.rs10
-rw-r--r--rust/kernel/revocable.rs425
-rw-r--r--rust/kernel/soc/apple/mod.rs6
-rw-r--r--rust/kernel/soc/apple/rtkit.rs277
-rw-r--r--rust/kernel/soc/mod.rs5
-rw-r--r--rust/kernel/std_vendor.rs1
-rw-r--r--rust/kernel/sync.rs75
-rw-r--r--rust/kernel/sync/arc.rs577
-rw-r--r--rust/kernel/sync/condvar.rs142
-rw-r--r--rust/kernel/sync/guard.rs162
-rw-r--r--rust/kernel/sync/mutex.rs149
-rw-r--r--rust/kernel/sync/rcu.rs52
-rw-r--r--rust/kernel/sync/revocable.rs246
-rw-r--r--rust/kernel/sync/smutex.rs290
-rw-r--r--rust/kernel/time.rs23
-rw-r--r--rust/kernel/types.rs296
-rw-r--r--rust/kernel/user_ptr.rs175
-rw-r--r--rust/kernel/xarray.rs299
-rw-r--r--rust/macros/concat_idents.rs24
-rw-r--r--rust/macros/helpers.rs34
-rw-r--r--rust/macros/lib.rs7
-rw-r--r--rust/macros/module.rs380
-rw-r--r--rust/macros/versions.rs289
-rw-r--r--samples/rust/rust_print.rs26
-rw-r--r--scripts/Makefile.build2
-rw-r--r--scripts/generate_rust_target.rs13
-rwxr-xr-xscripts/min-tool-version.sh2
132 files changed, 29854 insertions, 785 deletions
diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index ef540865ad22..685964798479 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -31,7 +31,7 @@ you probably needn't concern yourself with pcmciautils.
====================== =============== ========================================
GNU C 5.1 gcc --version
Clang/LLVM (optional) 11.0.0 clang --version
-Rust (optional) 1.62.0 rustc --version
+Rust (optional) 1.66.0 rustc --version
bindgen (optional) 0.56.0 bindgen --version
GNU make 3.82 make --version
bash 4.2 bash --version
diff --git a/Documentation/rust/arch-support.rst b/Documentation/rust/arch-support.rst
index 6982b63775da..3776059a385a 100644
--- a/Documentation/rust/arch-support.rst
+++ b/Documentation/rust/arch-support.rst
@@ -15,5 +15,6 @@ support corresponds to ``S`` values in the ``MAINTAINERS`` file.
============ ================ ==============================================
Architecture Level of support Constraints
============ ================ ==============================================
+``arm64`` Maintained None.
``x86`` Maintained ``x86_64`` only.
============ ================ ==============================================
diff --git a/MAINTAINERS b/MAINTAINERS
index f65def435395..f6d9a4c7d22a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18246,6 +18246,7 @@ L: rust-for-linux@vger.kernel.org
S: Supported
W: https://github.com/Rust-for-Linux/linux
B: https://github.com/Rust-for-Linux/linux/issues
+C: zulip://rust-for-linux.zulipchat.com
T: git https://github.com/Rust-for-Linux/linux.git rust-next
F: Documentation/rust/
F: rust/
diff --git a/Makefile b/Makefile
index 3f6628780eb2..8e5621ddadb2 100644
--- a/Makefile
+++ b/Makefile
@@ -1602,7 +1602,7 @@ endif # CONFIG_MODULES
CLEAN_FILES += include/ksym vmlinux.symvers modules-only.symvers \
modules.builtin modules.builtin.modinfo modules.nsdeps \
compile_commands.json .thinlto-cache rust/test rust/doc \
- .vmlinux.objs .vmlinux.export.c
+ rust-project.json .vmlinux.objs .vmlinux.export.c
# Directories & files removed with 'make mrproper'
MRPROPER_FILES += include/config include/generated \
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c5ccca26a408..b0a60403cda2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -207,6 +207,7 @@ config ARM64
select HAVE_FUNCTION_ARG_ACCESS_API
select MMU_GATHER_RCU_TABLE_FREE
select HAVE_RSEQ
+ select HAVE_RUST
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index d62bd221828f..33ae20fc3f56 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -41,6 +41,8 @@ KBUILD_CFLAGS += -mgeneral-regs-only \
KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
KBUILD_AFLAGS += $(compat_vdso)
+KBUILD_RUSTFLAGS += -C target-feature="-neon,-fp-armv8"
+
KBUILD_CFLAGS += $(call cc-option,-mabi=lp64)
KBUILD_AFLAGS += $(call cc-option,-mabi=lp64)
@@ -85,8 +87,10 @@ PACRET-$(CONFIG_UNWIND_PATCH_PAC_INTO_SCS) := pac-ret
ifeq ($(CONFIG_ARM64_BTI_KERNEL),y)
branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI) := -mbranch-protection=$(PACRET-y)+bti
+KBUILD_RUSTFLAGS += -Z branch-protection=bti,pac-ret,leaf
else
branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET) := -mbranch-protection=$(PACRET-y)
+KBUILD_RUSTFLAGS += -Z branch-protection=pac-ret,leaf
endif
# -march=armv8.3-a enables the non-nops instructions for PAC, to avoid the
# compiler to generate them and consequently to break the single image contract
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 264f7f3b395b..fa539047e09e 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -355,6 +355,8 @@ source "drivers/gpu/drm/tidss/Kconfig"
source "drivers/gpu/drm/xlnx/Kconfig"
+source "drivers/gpu/drm/asahi/Kconfig"
+
source "drivers/gpu/drm/gud/Kconfig"
source "drivers/gpu/drm/solomon/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 981696e99943..38c45ba7ac88 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -198,3 +198,4 @@ obj-y += gud/
obj-$(CONFIG_DRM_HYPERV) += hyperv/
obj-y += solomon/
obj-$(CONFIG_DRM_SPRD) += sprd/
+obj-$(CONFIG_DRM_ASAHI) += asahi/
diff --git a/drivers/gpu/drm/asahi/Kconfig b/drivers/gpu/drm/asahi/Kconfig
new file mode 100644
index 000000000000..633ccd0d35ac
--- /dev/null
+++ b/drivers/gpu/drm/asahi/Kconfig
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config RUST_DRM_SCHED
+ bool
+ select DRM_SCHED
+
+config RUST_DRM_GEM_SHMEM_HELPER
+ bool
+ select DRM_GEM_SHMEM_HELPER
+
+config RUST_APPLE_RTKIT
+ bool
+ select APPLE_RTKIT
+
+config DRM_ASAHI
+ tristate "Asahi (DRM support for Apple AGX GPUs)"
+ depends on RUST
+ depends on DRM
+ depends on (ARM64 && ARCH_APPLE) || (COMPILE_TEST && !GENERIC_ATOMIC64)
+ depends on MMU
+ select RUST_DRM_SCHED
+ select IOMMU_SUPPORT
+ select IOMMU_IO_PGTABLE_LPAE
+ select RUST_DRM_GEM_SHMEM_HELPER
+ select RUST_APPLE_RTKIT
+ help
+ DRM driver for Apple AGX GPUs (G13x, found in the M1 SoC family)
+
+config DRM_ASAHI_DEBUG_ALLOCATOR
+ bool "Use debug allocator"
+ depends on DRM_ASAHI
+ help
+ Use an alternate, simpler allocator which significantly reduces
+ performance, but can help find firmware- or GPU-side memory safety
+ issues. However, it can also trigger firmware bugs more easily,
+ so expect GPU crashes.
+
+ Say N unless you are debugging firmware structures or porting to a
+ new firmware version.
diff --git a/drivers/gpu/drm/asahi/Makefile b/drivers/gpu/drm/asahi/Makefile
new file mode 100644
index 000000000000..e67248667987
--- /dev/null
+++ b/drivers/gpu/drm/asahi/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_DRM_ASAHI) += asahi.o
diff --git a/drivers/gpu/drm/asahi/alloc.rs b/drivers/gpu/drm/asahi/alloc.rs
new file mode 100644
index 000000000000..d918b19e9721
--- /dev/null
+++ b/drivers/gpu/drm/asahi/alloc.rs
@@ -0,0 +1,1046 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU kernel object allocator.
+//!
+//! This kernel driver needs to manage a large number of GPU objects, in both firmware/kernel
+//! address space and user address space. This module implements a simple grow-only heap allocator
+//! based on the DRM MM range allocator, and a debug allocator that allocates each object as a
+//! separate GEM object.
+//!
+//! Allocations may optionally have debugging enabled, which adds preambles that store metadata
+//! about the allocation. This is useful for live debugging using the hypervisor or postmortem
+//! debugging with a GPU memory snapshot, since it makes it easier to identify use-after-free and
+//! caching issues.
+
+use kernel::{c_str, drm::mm, error::Result, prelude::*, str::CString, sync::LockClassKey};
+
+use crate::debug::*;
+use crate::driver::AsahiDevice;
+use crate::fw::types::Zeroed;
+use crate::mmu;
+use crate::object::{GpuArray, GpuObject, GpuOnlyArray, GpuStruct, GpuWeakPointer};
+
+use core::cmp::Ordering;
+use core::fmt;
+use core::fmt::{Debug, Formatter};
+use core::marker::PhantomData;
+use core::mem;
+use core::mem::MaybeUninit;
+use core::ptr::NonNull;
+
+const DEBUG_CLASS: DebugFlags = DebugFlags::Alloc;
+
+#[cfg(not(CONFIG_DRM_ASAHI_DEBUG_ALLOCATOR))]
+/// The driver-global allocator type
+pub(crate) type DefaultAllocator = HeapAllocator;
+
+#[cfg(not(CONFIG_DRM_ASAHI_DEBUG_ALLOCATOR))]
+/// The driver-global allocation type
+pub(crate) type DefaultAllocation = HeapAllocation;
+
+#[cfg(CONFIG_DRM_ASAHI_DEBUG_ALLOCATOR)]
+/// The driver-global allocator type
+pub(crate) type DefaultAllocator = SimpleAllocator;
+
+#[cfg(CONFIG_DRM_ASAHI_DEBUG_ALLOCATOR)]
+/// The driver-global allocation type
+pub(crate) type DefaultAllocation = SimpleAllocation;
+
+/// Represents a raw allocation (without any type information).
+pub(crate) trait RawAllocation {
+ /// Returns the CPU-side pointer (if CPU mapping is enabled) as a byte non-null pointer.
+ fn ptr(&self) -> Option<NonNull<u8>>;
+ /// Returns the GPU VA pointer as a u64.
+ fn gpu_ptr(&self) -> u64;
+ /// Returns the size of the allocation in bytes.
+ fn size(&self) -> usize;
+ /// Returns the AsahiDevice that owns this allocation.
+ fn device(&self) -> &AsahiDevice;
+}
+
+/// Represents a typed allocation.
+pub(crate) trait Allocation<T>: Debug {
+ /// Returns the typed CPU-side pointer (if CPU mapping is enabled).
+ fn ptr(&self) -> Option<NonNull<T>>;
+ /// Returns the GPU VA pointer as a u64.
+ fn gpu_ptr(&self) -> u64;
+ /// Returns the size of the allocation in bytes.
+ fn size(&self) -> usize;
+ /// Returns the AsahiDevice that owns this allocation.
+ fn device(&self) -> &AsahiDevice;
+}
+
+/// A generic typed allocation wrapping a RawAllocation.
+///
+/// This is currently the only Allocation implementation, since it is shared by all allocators.
+pub(crate) struct GenericAlloc<T, U: RawAllocation> {
+ alloc: U,
+ alloc_size: usize,
+ debug_offset: usize,
+ padding: usize,
+ _p: PhantomData<T>,
+}
+
+impl<T, U: RawAllocation> Allocation<T> for GenericAlloc<T, U> {
+ fn ptr(&self) -> Option<NonNull<T>> {
+ self.alloc
+ .ptr()
+ .map(|p| unsafe { NonNull::new_unchecked(p.as_ptr().add(self.debug_offset) as *mut T) })
+ }
+ fn gpu_ptr(&self) -> u64 {
+ self.alloc.gpu_ptr() + self.debug_offset as u64
+ }
+ fn size(&self) -> usize {
+ self.alloc_size
+ }
+ fn device(&self) -> &AsahiDevice {
+ self.alloc.device()
+ }
+}
+
+impl<T, U: RawAllocation> Debug for GenericAlloc<T, U> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ f.debug_struct(core::any::type_name::<GenericAlloc<T, U>>())
+ .field("ptr", &format_args!("{:?}", self.ptr()))
+ .field("gpu_ptr", &format_args!("{:#X?}", self.gpu_ptr()))
+ .field("size", &format_args!("{:#X?}", self.size()))
+ .finish()
+ }
+}
+
+/// Debugging data associated with an allocation, when debugging is enabled.
+#[repr(C)]
+struct AllocDebugData {
+ state: u32,
+ _pad: u32,
+ size: u64,
+ base_gpuva: u64,
+ obj_gpuva: u64,
+ name: [u8; 0x20],
+}
+
+/// Magic flag indicating a live allocation.
+const STATE_LIVE: u32 = 0x4556494c;
+/// Magic flag indicating a freed allocation.
+const STATE_DEAD: u32 = 0x44414544;
+
+/// Marker byte to identify when firmware/GPU write beyond the end of an allocation.
+const GUARD_MARKER: u8 = 0x93;
+
+impl<T, U: RawAllocation> Drop for GenericAlloc<T, U> {
+ fn drop(&mut self) {
+ let debug_len = mem::size_of::<AllocDebugData>();
+ if self.debug_offset >= debug_len {
+ if let Some(p) = self.alloc.ptr() {
+ unsafe {
+ let p = p.as_ptr().add(self.debug_offset - debug_len);
+ (p as *mut u32).write(STATE_DEAD);
+ }
+ }
+ }
+ if debug_enabled(DebugFlags::FillAllocations) {
+ if let Some(p) = self.ptr() {
+ unsafe { (p.as_ptr() as *mut u8).write_bytes(0xde, self.size()) };
+ }
+ }
+ if self.padding != 0 {
+ if let Some(p) = self.ptr() {
+ let guard = unsafe {
+ core::slice::from_raw_parts(
+ (p.as_ptr() as *mut u8 as *const u8).add(self.size()),
+ self.padding,
+ )
+ };
+ if let Some(first_err) = guard.iter().position(|&r| r != GUARD_MARKER) {
+ let last_err = guard
+ .iter()
+ .rev()
+ .position(|&r| r != GUARD_MARKER)
+ .unwrap_or(0);
+ dev_warn!(
+ self.device(),
+ "Allocator: Corruption after object of type {} at {:#x}:{:#x} + {:#x}..={:#x}\n",
+ core::any::type_name::<T>(),
+ self.gpu_ptr(),
+ self.size(),
+ first_err,
+ self.padding - last_err - 1
+ );
+ }
+ }
+ }
+ }
+}
+
+static_assert!(mem::size_of::<AllocDebugData>() == 0x40);
+
+/// A trait representing an allocator.
+pub(crate) trait Allocator {
+ /// The raw allocation type used by this allocator.
+ type Raw: RawAllocation;
+ // TODO: Needs associated_type_defaults
+ // type Allocation<T> = GenericAlloc<T, Self::Raw>;
+
+ /// Returns the `AsahiDevice` associated with this allocator.
+ fn device(&self) -> &AsahiDevice;
+ /// Returns whether CPU-side mapping is enabled.
+ fn cpu_maps(&self) -> bool;
+ /// Returns the minimum alignment for allocations.
+ fn min_align(&self) -> usize;
+ /// Allocate an object of the given size in bytes with the given alignment.
+ fn alloc(&mut self, size: usize, align: usize) -> Result<Self::Raw>;
+
+ /// Returns a tuple of (count, size) of how much garbage (freed but not yet reusable objects)
+ /// exists in this allocator. Optional.
+ fn garbage(&self) -> (usize, usize) {
+ (0, 0)
+ }
+ /// Collect garbage for this allocator, up to the given object count. Optional.
+ fn collect_garbage(&mut self, _count: usize) {}
+
+ /// Allocate a new GpuStruct object. See [`GpuObject::new`].
+ #[inline(never)]
+ fn new_object<T: GpuStruct>(
+ &mut self,
+ inner: T,
+ callback: impl for<'a> FnOnce(&'a T) -> T::Raw<'a>,
+ ) -> Result<GpuObject<T, GenericAlloc<T, Self::Raw>>> {
+ GpuObject::<T, GenericAlloc<T, Self::Raw>>::new(self.alloc_object()?, inner, callback)
+ }
+
+ /// Allocate a new GpuStruct object. See [`GpuObject::new_boxed`].
+ #[inline(never)]
+ fn new_boxed<T: GpuStruct>(
+ &mut self,
+ inner: Box<T>,
+ callback: impl for<'a> FnOnce(
+ &'a T,
+ &'a mut MaybeUninit<T::Raw<'a>>,
+ ) -> Result<&'a mut T::Raw<'a>>,
+ ) -> Result<GpuObject<T, GenericAlloc<T, Self::Raw>>> {
+ GpuObject::<T, GenericAlloc<T, Self::Raw>>::new_boxed(self.alloc_object()?, inner, callback)
+ }
+
+ /// Allocate a new GpuStruct object. See [`GpuObject::new_inplace`].
+ #[inline(never)]
+ fn new_inplace<T: GpuStruct>(
+ &mut self,
+ inner: T,
+ callback: impl for<'a> FnOnce(
+ &'a T,
+ &'a mut MaybeUninit<T::Raw<'a>>,
+ ) -> Result<&'a mut T::Raw<'a>>,
+ ) -> Result<GpuObject<T, GenericAlloc<T, Self::Raw>>> {
+ GpuObject::<T, GenericAlloc<T, Self::Raw>>::new_inplace(
+ self.alloc_object()?,
+ inner,
+ callback,
+ )
+ }
+
+ /// Allocate a new GpuStruct object. See [`GpuObject::new_default`].
+ #[inline(never)]
+ fn new_default<T: GpuStruct + Default>(
+ &mut self,
+ ) -> Result<GpuObject<T, GenericAlloc<T, Self::Raw>>>
+ where
+ for<'a> <T as GpuStruct>::Raw<'a>: Default + Zeroed,
+ {
+ GpuObject::<T, GenericAlloc<T, Self::Raw>>::new_default(self.alloc_object()?)
+ }
+
+ /// Allocate a new GpuStruct object. See [`GpuObject::new_prealloc`].
+ #[inline(never)]
+ fn new_prealloc<T: GpuStruct>(
+ &mut self,
+ inner_cb: impl FnOnce(GpuWeakPointer<T>) -> Result<Box<T>>,
+ raw_cb: impl for<'a> FnOnce(
+ &'a T,
+ &'a mut MaybeUninit<T::Raw<'a>>,
+ ) -> Result<&'a mut T::Raw<'a>>,
+ ) -> Result<GpuObject<T, GenericAlloc<T, Self::Raw>>> {
+ GpuObject::<T, GenericAlloc<T, Self::Raw>>::new_prealloc(
+ self.alloc_object()?,
+ inner_cb,
+ raw_cb,
+ )
+ }
+
+ /// Allocate a generic buffer of the given size and alignment, applying the debug features if
+ /// enabled to tag it and detect overflows.
+ fn alloc_generic<T>(
+ &mut self,
+ size: usize,
+ align: usize,
+ ) -> Result<GenericAlloc<T, Self::Raw>> {
+ let padding = if debug_enabled(DebugFlags::DetectOverflows) {
+ size
+ } else {
+ 0
+ };
+
+ let ret: GenericAlloc<T, Self::Raw> =
+ if self.cpu_maps() && debug_enabled(debug::DebugFlags::DebugAllocations) {
+ let debug_align = self.min_align().max(align);
+ let debug_len = mem::size_of::<AllocDebugData>();
+ let debug_offset = (debug_len * 2 + debug_align - 1) & !(debug_align - 1);
+
+ let alloc = self.alloc(size + debug_offset + padding, align)?;
+
+ let mut debug = AllocDebugData {
+ state: STATE_LIVE,
+ _pad: 0,
+ size: size as u64,
+ base_gpuva: alloc.gpu_ptr(),
+ obj_gpuva: alloc.gpu_ptr() + debug_offset as u64,
+ name: [0; 0x20],
+ };
+
+ let name = core::any::type_name::<T>().as_bytes();
+ let len = name.len().min(debug.name.len() - 1);
+ debug.name[..len].copy_from_slice(&name[..len]);
+
+ if let Some(p) = alloc.ptr() {
+ unsafe {
+ let p = p.as_ptr();
+ p.write_bytes(0x42, debug_offset - 2 * debug_len);
+ let cur = p.add(debug_offset - debug_len) as *mut AllocDebugData;
+ let prev = p.add(debug_offset - 2 * debug_len) as *mut AllocDebugData;
+ prev.copy_from(cur, 1);
+ cur.copy_from(&debug, 1);
+ };
+ }
+
+ GenericAlloc {
+ alloc,
+ alloc_size: size,
+ debug_offset,
+ padding,
+ _p: PhantomData,
+ }
+ } else {
+ GenericAlloc {
+ alloc: self.alloc(size + padding, align)?,
+ alloc_size: size,
+ debug_offset: 0,
+ padding,
+ _p: PhantomData,
+ }
+ };
+
+ if debug_enabled(DebugFlags::FillAllocations) {
+ if let Some(p) = ret.ptr() {
+ unsafe { (p.as_ptr() as *mut u8).write_bytes(0xaa, ret.size()) };
+ }
+ }
+
+ if padding != 0 {
+ if let Some(p) = ret.ptr() {
+ unsafe {
+ (p.as_ptr() as *mut u8)
+ .add(ret.size())
+ .write_bytes(GUARD_MARKER, padding);
+ }
+ }
+ }
+
+ Ok(ret)
+ }
+
+ /// Allocate an object of a given type, without actually initializing the allocation.
+ ///
+ /// This is useful to directly call [`GpuObject::new_*`], without borrowing a reference to the
+ /// allocator for the entire duration (e.g. if further allocations need to happen inside the
+ /// callbacks).
+ fn alloc_object<T: GpuStruct>(&mut self) -> Result<GenericAlloc<T, Self::Raw>> {
+ let size = mem::size_of::<T::Raw<'static>>();
+ let align = mem::align_of::<T::Raw<'static>>();
+
+ self.alloc_generic(size, align)
+ }
+
+ /// Allocate an empty `GpuArray` of a given type and length.
+ fn array_empty<T: Sized + Default>(
+ &mut self,
+ count: usize,
+ ) -> Result<GpuArray<T, GenericAlloc<T, Self::Raw>>> {
+ let size = mem::size_of::<T>() * count;
+ let align = mem::align_of::<T>();
+
+ let alloc = self.alloc_generic(size, align)?;
+ GpuArray::<T, GenericAlloc<T, Self::Raw>>::empty(alloc, count)
+ }
+
+ /// Allocate an empty `GpuOnlyArray` of a given type and length.
+ fn array_gpuonly<T: Sized + Default>(
+ &mut self,
+ count: usize,
+ ) -> Result<GpuOnlyArray<T, GenericAlloc<T, Self::Raw>>> {
+ let size = mem::size_of::<T>() * count;
+ let align = mem::align_of::<T>();
+
+ let alloc = self.alloc_generic(size, align)?;
+ GpuOnlyArray::<T, GenericAlloc<T, Self::Raw>>::new(alloc, count)
+ }
+}
+
+/// A simple allocation backed by a separate GEM object.
+///
+/// # Invariants
+/// `ptr` is either None or a valid, non-null pointer to the CPU view of the object.
+/// `gpu_ptr` is the GPU-side VA of the object.
+pub(crate) struct SimpleAllocation {
+ dev: AsahiDevice,
+ ptr: Option<NonNull<u8>>,
+ gpu_ptr: u64,
+ size: usize,
+ vm: mmu::Vm,
+ obj: crate::gem::ObjectRef,
+}
+
+/// SAFETY: `SimpleAllocation` just points to raw memory and should be safe to send across threads.
+unsafe impl Send for SimpleAllocation {}
+unsafe impl Sync for SimpleAllocation {}
+
+impl Drop for SimpleAllocation {
+ fn drop(&mut self) {
+ mod_dev_dbg!(
+ self.device(),
+ "SimpleAllocator: drop object @ {:#x}\n",
+ self.gpu_ptr()
+ );
+ if debug_enabled(DebugFlags::FillAllocations) {
+ if let Ok(vmap) = self.obj.vmap() {
+ vmap.as_mut_slice().fill(0x42);
+ }
+ }
+ self.obj.drop_vm_mappings(self.vm.id());
+ }
+}
+
+impl RawAllocation for SimpleAllocation {
+ fn ptr(&self) -> Option<NonNull<u8>> {
+ self.ptr
+ }
+ fn gpu_ptr(&self) -> u64 {
+ self.gpu_ptr
+ }
+ fn size(&self) -> usize {
+ self.size
+ }
+
+ fn device(&self) -> &AsahiDevice {
+ &self.dev
+ }
+}
+
+/// A simple allocator that allocates each object as its own GEM object, aligned to the end of a
+/// page.
+///
+/// This is very slow, but it has the advantage that over-reads by the firmware or GPU will fault on
+/// the guard page after the allocation, which can be useful to validate that the firmware's or
+/// GPU's idea of object size what we expect.
+pub(crate) struct SimpleAllocator {
+ dev: AsahiDevice,
+ start: u64,
+ end: u64,
+ prot: u32,
+ vm: mmu::Vm,
+ min_align: usize,
+ cpu_maps: bool,
+}
+
+impl SimpleAllocator {
+ /// Create a new `SimpleAllocator` for a given address range and `Vm`.
+ #[allow(dead_code)]
+ #[allow(clippy::too_many_arguments)]
+ pub(crate) fn new(
+ dev: &AsahiDevice,
+ vm: &mmu::Vm,
+ start: u64,
+ end: u64,
+ min_align: usize,
+ prot: u32,
+ _block_size: usize,
+ mut cpu_maps: bool,
+ _name: fmt::Arguments<'_>,
+ _keep_garbage: bool,
+ ) -> Result<SimpleAllocator> {
+ if debug_enabled(DebugFlags::ForceCPUMaps) {
+ cpu_maps = true;
+ }
+ Ok(SimpleAllocator {
+ dev: dev.clone(),
+ vm: vm.clone(),
+ start,
+ end,
+ prot,
+ min_align,
+ cpu_maps,
+ })
+ }
+}
+
+impl Allocator for SimpleAllocator {
+ type Raw = SimpleAllocation;
+
+ fn device(&self) -> &AsahiDevice {
+ &self.dev
+ }
+
+ fn cpu_maps(&self) -> bool {
+ self.cpu_maps
+ }
+
+ fn min_align(&self) -> usize {
+ self.min_align
+ }
+
+ #[inline(never)]
+ fn alloc(&mut self, size: usize, align: usize) -> Result<SimpleAllocation> {
+ let size_aligned = (size + mmu::UAT_PGSZ - 1) & !mmu::UAT_PGMSK;
+ let align = self.min_align.max(align);
+ let offset = (size_aligned - size) & !(align - 1);
+
+ mod_dev_dbg!(
+ &self.dev,
+ "SimpleAllocator::new: size={:#x} size_al={:#x} al={:#x} off={:#x}\n",
+ size,
+ size_aligned,
+ align,
+ offset
+ );
+
+ let mut obj = crate::gem::new_kernel_object(&self.dev, size_aligned)?;
+ let p = obj.vmap()?.as_mut_ptr() as *mut u8;
+ if debug_enabled(DebugFlags::FillAllocations) {
+ obj.vmap()?.as_mut_slice().fill(0xde);
+ }
+ let iova = obj.map_into_range(
+ &self.vm,
+ self.start,
+ self.end,
+ self.min_align.max(mmu::UAT_PGSZ) as u64,
+ self.prot,
+ true,
+ )?;
+
+ let ptr = unsafe { p.add(offset) } as *mut u8;
+ let gpu_ptr = (iova + offset) as u64;
+
+ mod_dev_dbg!(
+ &self.dev,
+ "SimpleAllocator::new -> {:#?} / {:#?} | {:#x} / {:#x}\n",
+ p,
+ ptr,
+ iova,
+ gpu_ptr
+ );
+
+ Ok(SimpleAllocation {
+ dev: self.dev.clone(),
+ ptr: NonNull::new(ptr),
+ gpu_ptr,
+ size,
+ vm: self.vm.clone(),
+ obj,
+ })
+ }
+}
+
+/// Inner data for an allocation from the heap allocator.
+///
+/// This is wrapped in an `mm::Node`.
+pub(crate) struct HeapAllocationInner {
+ dev: AsahiDevice,
+ ptr: Option<NonNull<u8>>,
+ real_size: usize,
+}
+
+/// SAFETY: `SimpleAllocation` just points to raw memory and should be safe to send across threads.
+unsafe impl Send for HeapAllocationInner {}
+unsafe impl Sync for HeapAllocationInner {}
+
+/// Outer view of a heap allocation.
+///
+/// This uses an Option<> so we can move the internal `Node` into the garbage pool when it gets
+/// dropped.
+///
+/// # Invariants
+/// The `Option` must always be `Some(...)` while this object is alive.
+pub(crate) struct HeapAllocation(Option<mm::Node<HeapAllocatorInner, HeapAllocationInner>>);
+
+impl Drop for HeapAllocation {
+ fn drop(&mut self) {
+ let node = self.0.take().unwrap();
+ let size = node.size();
+ let alloc = node.alloc_ref();
+
+ alloc.with(|a| {
+ if let Some(garbage) = a.garbage.as_mut() {
+ if garbage.try_push(node).is_err() {
+ dev_err!(
+ &a.dev,
+ "HeapAllocation[{}]::drop: Failed to keep garbage\n",
+ &*a.name,
+ );
+ }
+ a.total_garbage += size as usize;
+ None
+ } else {
+ // We need to ensure node survives this scope, since dropping it
+ // will try to take the mm lock and deadlock us
+ Some(node)
+ }
+ });
+ }
+}
+
+impl mm::AllocInner<HeapAllocationInner> for HeapAllocatorInner {
+ fn drop_object(
+ &mut self,
+ start: u64,
+ _size: u64,
+ _color: usize,
+ obj: &mut HeapAllocationInner,
+ ) {
+ /* real_size == 0 means it's a guard node */
+ if obj.real_size > 0 {
+ mod_dev_dbg!(
+ obj.dev,
+ "HeapAllocator[{}]: drop object @ {:#x} ({} bytes)\n",
+ &*self.name,
+ start,
+ obj.real_size,
+ );
+ self.allocated -= obj.real_size;
+ }
+ }
+}
+
+impl RawAllocation for HeapAllocation {
+ // SAFETY: This function must always return a valid pointer.
+ // Since the HeapAllocation contains a reference to the
+ // backing_objects array that contains the object backing this pointer,
+ // and objects are only ever added to it, this pointer is guaranteed to
+ // remain valid for the lifetime of the HeapAllocation.
+ fn ptr(&self) -> Option<NonNull<u8>> {
+ self.0.as_ref().unwrap().ptr
+ }
+ // SAFETY: This function must always return a valid GPU pointer.
+ // See the explanation in ptr().
+ fn gpu_ptr(&self) -> u64 {
+ self.0.as_ref().unwrap().start()
+ }
+ fn size(&self) -> usize {
+ self.0.as_ref().unwrap().size() as usize
+ }
+ fn device(&self) -> &AsahiDevice {
+ &self.0.as_ref().unwrap().dev
+ }
+}
+
+/// Inner data for a heap allocator which uses the DRM MM range allocator to manage the heap.
+///
+/// This is wrapped by an `mm::Allocator`.
+struct HeapAllocatorInner {
+ dev: AsahiDevice,
+ allocated: usize,
+ backing_objects: Vec<(crate::gem::ObjectRef, u64)>,
+ garbage: Option<Vec<mm::Node<HeapAllocatorInner, HeapAllocationInner>>>,
+ total_garbage: usize,
+ name: CString,
+ vm_id: u64,
+}
+
+/// A heap allocator which uses the DRM MM range allocator to manage its objects.
+///
+/// The heap is composed of a series of GEM objects. This implementation only ever grows the heap,
+/// never shrinks it.
+pub(crate) struct HeapAllocator {
+ dev: AsahiDevice,
+ start: u64,
+ end: u64,
+ top: u64,
+ prot: u32,
+ vm: mmu::Vm,
+ min_align: usize,
+ block_size: usize,
+ cpu_maps: bool,
+ guard_nodes: Vec<mm::Node<HeapAllocatorInner, HeapAllocationInner>>,
+ mm: mm::Allocator<HeapAllocatorInner, HeapAllocationInner>,
+ name: CString,
+}
+
+static LOCK_KEY: LockClassKey = LockClassKey::new();
+
+impl HeapAllocator {
+ /// Create a new HeapAllocator for a given `Vm` and address range.
+ #[allow(dead_code)]
+ #[allow(clippy::too_many_arguments)]
+ pub(crate) fn new(
+ dev: &AsahiDevice,
+ vm: &mmu::Vm,
+ start: u64,
+ end: u64,
+ min_align: usize,
+ prot: u32,
+ block_size: usize,
+ mut cpu_maps: bool,
+ name: fmt::Arguments<'_>,
+ keep_garbage: bool,
+ ) -> Result<HeapAllocator> {
+ if !min_align.is_power_of_two() {
+ return Err(EINVAL);
+ }
+ if debug_enabled(DebugFlags::ForceCPUMaps) {
+ cpu_maps = true;
+ }
+
+ let name = CString::try_from_fmt(name)?;
+
+ let inner = HeapAllocatorInner {
+ dev: dev.clone(),
+ allocated: 0,
+ backing_objects: Vec::new(),
+ // TODO: This clearly needs a try_clone() or similar
+ name: CString::try_from_fmt(fmt!("{}", &*name))?,
+ vm_id: vm.id(),
+ garbage: if keep_garbage { Some(Vec::new()) } else { None },
+ total_garbage: 0,
+ };
+
+ let mm = mm::Allocator::new(
+ start,
+ end - start + 1,
+ inner,
+ c_str!("HeapAllocator"),
+ &LOCK_KEY,
+ )?;
+
+ Ok(HeapAllocator {
+ dev: dev.clone(),
+ vm: vm.clone(),
+ start,
+ end,
+ top: start,
+ prot,
+ min_align,
+ block_size: block_size.max(min_align),
+ cpu_maps,
+ guard_nodes: Vec::new(),
+ mm,
+ name,
+ })
+ }
+
+ /// Add a new backing block of the given size to this heap.
+ ///
+ /// If CPU mapping is enabled, this also adds a guard node to the range allocator to ensure that
+ /// objects cannot straddle backing block boundaries, since we cannot easily create a contiguous
+ /// CPU VA mapping for them. This can create some fragmentation. If CPU mapping is disabled, we
+ /// skip the guard blocks, since the GPU view of the heap is always contiguous.
+ fn add_block(&mut self, size: usize) -> Result {
+ let size_aligned = (size + mmu::UAT_PGSZ - 1) & !mmu::UAT_PGMSK;
+
+ mod_dev_dbg!(
+ &self.dev,
+ "HeapAllocator[{}]::add_block: size={:#x} size_al={:#x}\n",
+ &*self.name,
+ size,
+ size_aligned,
+ );
+
+ if self.top.saturating_add(size_aligned as u64) >= self.end {
+ dev_err!(
+ &self.dev,
+ "HeapAllocator[{}]::add_block: Exhausted VA space\n",
+ &*self.name,
+ );
+ }
+
+ let mut obj = crate::gem::new_kernel_object(&self.dev, size_aligned)?;
+ if self.cpu_maps && debug_enabled(DebugFlags::FillAllocations) {
+ obj.vmap()?.as_mut_slice().fill(0xde);
+ }
+
+ let gpu_ptr = self.top;
+ if let Err(e) = obj.map_at(&self.vm, gpu_ptr, self.prot, self.cpu_maps) {
+ dev_err!(
+ &self.dev,
+ "HeapAllocator[{}]::add_block: Failed to map at {:#x} ({:?})\n",
+ &*self.name,
+ gpu_ptr,
+ e
+ );
+ return Err(e);
+ }
+
+ self.mm
+ .with_inner(|inner| inner.backing_objects.try_reserve(1))?;
+
+ let mut new_top = self.top + size_aligned as u64;
+ if self.cpu_maps {
+ let guard = self.min_align.max(mmu::UAT_PGSZ);
+ mod_dev_dbg!(
+ &self.dev,
+ "HeapAllocator[{}]::add_block: Adding guard node {:#x}:{:#x}\n",
+ &*self.name,
+ new_top,
+ guard
+ );
+
+ let inner = HeapAllocationInner {
+ dev: self.dev.clone(),
+ ptr: None,
+ real_size: 0,
+ };
+
+ let node = match self.mm.reserve_node(inner, new_top, guard as u64, 0) {
+ Ok(a) => a,
+ Err(a) => {
+ dev_err!(
+ &self.dev,
+ "HeapAllocator[{}]::add_block: Failed to reserve guard node {:#x}:{:#x}: {:?}\n",
+ &*self.name,
+ guard,
+ new_top,
+ a
+ );
+ return Err(EIO);
+ }
+ };
+
+ self.guard_nodes.try_push(node)?;
+
+ new_top += guard as u64;
+ }
+ mod_dev_dbg!(
+ &self.dev,
+ "HeapAllocator[{}]::add_block: top={:#x}\n",
+ &*self.name,
+ new_top
+ );
+
+ self.mm
+ .with_inner(|inner| inner.backing_objects.try_push((obj, gpu_ptr)))?;
+
+ self.top = new_top;
+
+ cls_dev_dbg!(
+ MemStats,
+ &self.dev,
+ "{} Heap: grow to {} bytes\n",
+ &*self.name,
+ self.top - self.start
+ );
+
+ Ok(())
+ }
+
+ /// Find the backing object index that backs a given GPU address.
+ fn find_obj(&mut self, addr: u64) -> Result<usize> {
+ self.mm.with_inner(|inner| {
+ inner
+ .backing_objects
+ .binary_search_by(|obj| {
+ let start = obj.1;
+ let end = obj.1 + obj.0.size() as u64;
+ if start > addr {
+ Ordering::Greater
+ } else if end <= addr {
+ Ordering::Less
+ } else {
+ Ordering::Equal
+ }
+ })
+ .or(Err(ENOENT))
+ })
+ }
+}
+
+impl Allocator for HeapAllocator {
+ type Raw = HeapAllocation;
+
+ fn device(&self) -> &AsahiDevice {
+ &self.dev
+ }
+
+ fn cpu_maps(&self) -> bool {
+ self.cpu_maps
+ }
+
+ fn min_align(&self) -> usize {
+ self.min_align
+ }
+
+ fn alloc(&mut self, size: usize, align: usize) -> Result<HeapAllocation> {
+ if align != 0 && !align.is_power_of_two() {
+ return Err(EINVAL);
+ }
+ let align = self.min_align.max(align);
+ let size_aligned = (size + align - 1) & !(align - 1);
+
+ mod_dev_dbg!(
+ &self.dev,
+ "HeapAllocator[{}]::new: size={:#x} size_al={:#x}\n",
+ &*self.name,
+ size,
+ size_aligned,
+ );
+
+ let inner = HeapAllocationInner {
+ dev: self.dev.clone(),
+ ptr: None,
+ real_size: size,
+ };
+
+ let mut node = match self.mm.insert_node_generic(
+ inner,
+ size_aligned as u64,
+ align as u64,
+ 0,
+ mm::InsertMode::Best,
+ ) {
+ Ok(a) => a,
+ Err(a) => {
+ dev_err!(
+ &self.dev,
+ "HeapAllocator[{}]::new: Failed to insert node of size {:#x} / align {:#x}: {:?}\n",
+ &*self.name, size_aligned, align, a
+ );
+ return Err(a);
+ }
+ };
+
+ self.mm.with_inner(|inner| inner.allocated += size);
+
+ let mut new_object = false;
+ let start = node.start();
+ let end = start + node.size();
+ if end > self.top {
+ if start > self.top {
+ dev_warn!(
+ self.dev,
+ "HeapAllocator[{}]::alloc: top={:#x}, start={:#x}\n",
+ &*self.name,
+ self.top,
+ start
+ );
+ }
+ let block_size = self.block_size.max((end - self.top) as usize);
+ self.add_block(block_size)?;
+ new_object = true;
+ }
+ assert!(end <= self.top);
+
+ if self.cpu_maps {
+ mod_dev_dbg!(
+ self.dev,
+ "HeapAllocator[{}]::alloc: mapping to CPU\n",
+ &*self.name
+ );
+
+ let idx = if new_object {
+ None
+ } else {
+ Some(match self.find_obj(start) {
+ Ok(a) => a,
+ Err(_) => {
+ dev_warn!(
+ self.dev,
+ "HeapAllocator[{}]::alloc: Failed to find object at {:#x}\n",
+ &*self.name,
+ start
+ );
+ return Err(EIO);
+ }
+ })
+ };
+ let (obj_start, obj_size, p) = self.mm.with_inner(|inner| -> Result<_> {
+ let idx = idx.unwrap_or(inner.backing_objects.len() - 1);
+ let obj = &mut inner.backing_objects[idx];
+ let p = obj.0.vmap()?.as_mut_ptr() as *mut u8;
+ Ok((obj.1, obj.0.size(), p))
+ })?;
+ assert!(obj_start <= start);
+ assert!(obj_start + obj_size as u64 >= end);
+ node.as_mut().inner_mut().ptr =
+ NonNull::new(unsafe { p.add((start - obj_start) as usize) });
+ mod_dev_dbg!(
+ self.dev,
+ "HeapAllocator[{}]::alloc: CPU pointer = {:?}\n",
+ &*self.name,
+ node.ptr
+ );
+ }
+
+ mod_dev_dbg!(
+ self.dev,
+ "HeapAllocator[{}]::alloc: Allocated {:#x} bytes @ {:#x}\n",
+ &*self.name,
+ end - start,
+ start
+ );
+
+ Ok(HeapAllocation(Some(node)))
+ }
+
+ fn garbage(&self) -> (usize, usize) {
+ self.mm.with_inner(|inner| {
+ if let Some(g) = inner.garbage.as_ref() {
+ (g.len(), inner.total_garbage)
+ } else {
+ (0, 0)
+ }
+ })
+ }
+
+ fn collect_garbage(&mut self, count: usize) {
+ // Take the garbage out of the inner block, so we can safely drop it without deadlocking
+ let mut garbage = Vec::new();
+
+ if garbage.try_reserve(count).is_err() {
+ dev_crit!(
+ self.dev,
+ "HeapAllocator[{}]:collect_garbage: failed to reserve space\n",
+ &*self.name,
+ );
+ return;
+ }
+
+ self.mm.with_inner(|inner| {
+ if let Some(g) = inner.garbage.as_mut() {
+ for node in g.drain(0..count) {
+ inner.total_garbage -= node.size() as usize;
+ garbage
+ .try_push(node)
+ .expect("try_push() failed after reserve()");
+ }
+ }
+ });
+ }
+}
+
+impl Drop for HeapAllocatorInner {
+ fn drop(&mut self) {
+ mod_dev_dbg!(
+ self.dev,
+ "HeapAllocator[{}]: dropping allocator\n",
+ &*self.name
+ );
+ if self.allocated > 0 {
+ // This should never happen
+ dev_crit!(
+ self.dev,
+ "HeapAllocator[{}]: dropping with {} bytes allocated\n",
+ &*self.name,
+ self.allocated
+ );
+ } else {
+ for mut obj in self.backing_objects.drain(..) {
+ obj.0.drop_vm_mappings(self.vm_id);
+ }
+ }
+ }
+}
diff --git a/drivers/gpu/drm/asahi/asahi.rs b/drivers/gpu/drm/asahi/asahi.rs
new file mode 100644
index 000000000000..e511d83f4cd1
--- /dev/null
+++ b/drivers/gpu/drm/asahi/asahi.rs
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+#![recursion_limit = "1024"]
+
+//! Driver for the Apple AGX GPUs found in Apple Silicon SoCs.
+
+mod alloc;
+mod buffer;
+mod channel;
+mod debug;
+mod driver;
+mod event;
+mod file;
+mod float;
+mod fw;
+mod gem;
+mod gpu;
+mod hw;
+mod initdata;
+mod mem;
+mod microseq;
+mod mmu;
+mod object;
+mod place;
+mod queue;
+mod regs;
+mod slotalloc;
+mod util;
+mod workqueue;
+
+use kernel::module_platform_driver;
+
+module_platform_driver! {
+ type: driver::AsahiDriver,
+ name: "asahi",
+ license: "Dual MIT/GPL",
+ params: {
+ debug_flags: u64 {
+ default: 0,
+ permissions: 0o644,
+ description: "Debug flags",
+ },
+ fault_control: u32 {
+ default: 0,
+ permissions: 0,
+ description: "Fault control (0x0: hard faults, 0xb: macOS default)",
+ },
+ initial_tvb_size: usize {
+ default: 0x8,
+ permissions: 0o644,
+ description: "Initial TVB size in blocks",
+ },
+ },
+}
diff --git a/drivers/gpu/drm/asahi/buffer.rs b/drivers/gpu/drm/asahi/buffer.rs
new file mode 100644
index 000000000000..767ea161176f
--- /dev/null
+++ b/drivers/gpu/drm/asahi/buffer.rs
@@ -0,0 +1,694 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Tiled Vertex Buffer management
+//!
+//! This module manages the Tiled Vertex Buffer, also known as the Parameter Buffer (in imgtec
+//! parlance) or the tiler heap (on other architectures). This buffer holds transformed primitive
+//! data between the vertex/tiling stage and the fragment stage.
+//!
+//! On AGX, the buffer is a heap of 128K blocks split into 32K pages (which must be aligned to a
+//! multiple of 32K in VA space). The buffer can be shared between multiple render jobs, and each
+//! will allocate pages from it during vertex processing and return them during fragment processing.
+//!
+//! If the buffer runs out of free pages, the vertex pass stops and a partial fragment pass occurs,
+//! spilling the intermediate render target state to RAM (a partial render). This is all managed
+//! transparently by the firmware. Since partial renders are less efficient, the kernel must grow
+//! the heap in response to feedback from the firmware to avoid partial renders in the future.
+//! Currently, we only ever grow the heap, and never shrink it.
+//!
+//! AGX also supports memoryless render targets, which can be used for intermediate results within
+//! a render pass. To support partial renders, it seems the GPU/firmware has the ability to borrow
+//! pages from the TVB buffer as a temporary render target buffer. Since this happens during a
+//! partial render itself, if the buffer runs out of space, it requires synchronous growth in
+//! response to a firmware interrupt. This is not currently supported, but may be in the future,
+//! though it is unclear whether it is worth the effort.
+//!
+//! This module is also in charge of managing the temporary objects associated with a single render
+//! pass, which includes the top-level tile array, the tail pointer cache, preemption buffers, and
+//! other miscellaneous structures collectively managed as a "scene".
+//!
+//! To avoid runaway memory usage, there is a maximum size for buffers (at that point it's unlikely
+//! that partial renders will incur much overhead over the buffer data access itself). This is
+//! different depending on whether memoryless render targets are in use, and is currently hardcoded.
+//! to the most common value used by macOS.
+
+use crate::debug::*;
+use crate::fw::buffer;
+use crate::fw::types::*;
+use crate::util::*;
+use crate::{alloc, fw, gpu, mmu, slotalloc};
+use crate::{box_in_place, place};
+use core::sync::atomic::Ordering;
+use kernel::prelude::*;
+use kernel::sync::{smutex::Mutex, Arc};
+
+const DEBUG_CLASS: DebugFlags = DebugFlags::Buffer;
+
+/// There are 127 GPU/firmware-side buffer manager slots (yes, 127, not 128).
+const NUM_BUFFERS: u32 = 127;
+
+/// Page size bits for buffer pages (32K). VAs must be aligned to this size.
+pub(crate) const PAGE_SHIFT: usize = 15;
+/// Page size for buffer pages.
+pub(crate) const PAGE_SIZE: usize = 1 << PAGE_SHIFT;
+/// Number of pages in a buffer block, which should be contiguous in VA space.
+pub(crate) const PAGES_PER_BLOCK: usize = 4;
+/// Size of a buffer block.
+pub(crate) const BLOCK_SIZE: usize = PAGE_SIZE * PAGES_PER_BLOCK;
+
+/// Metadata about the tiling configuration for a scene. This is computed in the `render` module.
+/// based on dimensions, tile size, and other info.
+pub(crate) struct TileInfo {
+ /// Tile count in the X dimension. Tiles are always 32x32.
+ pub(crate) tiles_x: u32,
+ /// Tile count in the Y dimension. Tiles are always 32x32.
+ pub(crate) tiles_y: u32,
+ /// Total tile count.
+ pub(crate) tiles: u32,
+ /// Micro-tile width (16 or 32).
+ pub(crate) utile_width: u32,
+ /// Micro-tile height (16 or 32).
+ pub(crate) utile_height: u32,
+ // Macro-tiles in the X dimension. Always 4.
+ //pub(crate) mtiles_x: u32,
+ // Macro-tiles in the Y dimension. Always 4.
+ //pub(crate) mtiles_y: u32,
+ /// Tiles per macro-tile in the X dimension.
+ pub(crate) tiles_per_mtile_x: u32,
+ /// Tiles per macro-tile in the Y dimension.
+ pub(crate) tiles_per_mtile_y: u32,
+ // Total tiles per macro-tile.
+ //pub(crate) tiles_per_mtile: u32,
+ /// Micro-tiles per macro-tile in the X dimension.
+ pub(crate) utiles_per_mtile_x: u32,
+ /// Micro-tiles per macro-tile in the Y dimension.
+ pub(crate) utiles_per_mtile_y: u32,
+ // Total micro-tiles per macro-tile.
+ //pub(crate) utiles_per_mtile: u32,
+ /// Size of the top-level tilemap, in bytes (for all layers, one cluster).
+ pub(crate) tilemap_size: usize,
+ /// Size of the Tail Pointer Cache, in bytes (for all layers * clusters).
+ pub(crate) tpc_size: usize,
+ /// Number of blocks in the clustering meta buffer (for clustering).
+ pub(crate) meta1_blocks: u32,
+ /// Minimum number of TVB blocks for this render.
+ pub(crate) min_tvb_blocks: usize,
+ /// XXX: Allocation factor for cluster tilemaps and meta4. Always 2?
+ pub(crate) cluster_factor: usize,
+ /// Tiling parameter structure passed to firmware.
+ pub(crate) params: fw::vertex::raw::TilingParameters,
+}
+
+/// A single scene, representing a render pass and its required buffers.
+#[versions(AGX)]
+#[derive(Debug)]
+pub(crate) struct Scene {
+ object: GpuObject<buffer::Scene::ver>,
+ slot: u32,
+ rebind: bool,
+ preempt2_off: usize,
+ preempt3_off: usize,
+ // Note: these are dead code only on some version variants.
+ // It's easier to do this than to propagate the version conditionals everywhere.
+ #[allow(dead_code)]
+ meta2_off: usize,
+ #[allow(dead_code)]
+ meta3_off: usize,
+ #[allow(dead_code)]
+ meta4_off: usize,
+}
+
+#[versions(AGX)]
+impl Scene::ver {
+ /// Returns true if the buffer was bound to a fresh manager slot, and therefore needs an init
+ /// command before a render.
+ pub(crate) fn rebind(&self) -> bool {
+ self.rebind
+ }
+
+ /// Returns the buffer manager slot this scene's buffer was bound to.
+ pub(crate) fn slot(&self) -> u32 {
+ self.slot
+ }
+
+ /// Returns the GPU pointer to the [`buffer::Scene::ver`].
+ pub(crate) fn gpu_pointer(&self) -> GpuPointer<'_, buffer::Scene::ver> {
+ self.object.gpu_pointer()
+ }
+
+ /// Returns the GPU weak pointer to the [`buffer::Scene::ver`].
+ pub(crate) fn weak_pointer(&self) -> GpuWeakPointer<buffer::Scene::ver> {
+ self.object.weak_pointer()
+ }
+
+ /// Returns the GPU weak pointer to the kernel-side temp buffer.
+ /// (purpose unknown...)
+ pub(crate) fn kernel_buffer_pointer(&self) -> GpuWeakPointer<[u8]> {
+ self.object.buffer.inner.lock().kernel_buffer.weak_pointer()
+ }
+
+ /// Returns the GPU pointer to the `buffer::Info::ver` object associated with this Scene.
+ pub(crate) fn buffer_pointer(&self) -> GpuPointer<'_, buffer::Info::ver> {
+ // We can't return the strong pointer directly since its lifetime crosses a lock, but we know
+ // its lifetime will be valid as long as &self since we hold a reference to the buffer,
+ // so just construct the strong pointer with the right lifetime here.
+ unsafe { self.weak_buffer_pointer().upgrade() }
+ }
+
+ /// Returns the GPU weak pointer to the `buffer::Info::ver` object associated with this Scene.
+ pub(crate) fn weak_buffer_pointer(&self) -> GpuWeakPointer<buffer::Info::ver> {
+ self.object.buffer.inner.lock().info.weak_pointer()
+ }
+
+ /// Returns the GPU pointer to the TVB heap metadata buffer.
+ pub(crate) fn tvb_heapmeta_pointer(&self) -> GpuPointer<'_, &'_ [u8]> {
+ self.object.tvb_heapmeta.gpu_pointer()
+ }
+
+ /// Returns the GPU pointer to the top-level TVB tilemap buffer.
+ pub(crate) fn tvb_tilemap_pointer(&self) -> GpuPointer<'_, &'_ [u8]> {
+ self.object.tvb_tilemap.gpu_pointer()
+ }
+
+ /// Returns the GPU pointer to the Tail Pointer Cache buffer.
+ pub(crate) fn tpc_pointer(&self) -> GpuPointer<'_, &'_ [u8]> {
+ self.object.tpc.gpu_pointer()
+ }
+
+ /// Returns the GPU pointer to the first preemption scratch buffer.
+ pub(crate) fn preempt_buf_1_pointer(&self) -> GpuPointer<'_, &'_ [u8]> {
+ self.object.preempt_buf.gpu_pointer()
+ }
+
+ /// Returns the GPU pointer to the second preemption scratch buffer.
+ pub(crate) fn preempt_buf_2_pointer(&self) -> GpuPointer<'_, &'_ [u8]> {
+ self.object
+ .preempt_buf
+ .gpu_offset_pointer(self.preempt2_off)
+ }
+
+ /// Returns the GPU pointer to the third preemption scratch buffer.
+ pub(crate) fn preempt_buf_3_pointer(&self) -> GpuPointer<'_, &'_ [u8]> {
+ self.object
+ .preempt_buf
+ .gpu_offset_pointer(self.preempt3_off)
+ }
+
+ /// Returns the GPU pointer to the per-cluster tilemap buffer, if clustering is enabled.
+ #[allow(dead_code)]
+ pub(crate) fn cluster_tilemaps_pointer(&self) -> Option<GpuPointer<'_, &'_ [u8]>> {
+ self.object
+ .clustering
+ .as_ref()
+ .map(|c| c.tilemaps.gpu_pointer())
+ }
+
+ /// Returns the GPU pointer to the clustering metadata 1 buffer, if clustering is enabled.
+ #[allow(dead_code)]
+ pub(crate) fn meta_1_pointer(&self) -> Option<GpuPointer<'_, &'_ [u8]>> {
+ self.object
+ .clustering
+ .as_ref()
+ .map(|c| c.meta.gpu_pointer())
+ }
+
+ /// Returns the GPU pointer to the clustering metadata 2 buffer, if clustering is enabled.
+ #[allow(dead_code)]
+ pub(crate) fn meta_2_pointer(&self) -> Option<GpuPointer<'_, &'_ [u8]>> {
+ self.object
+ .clustering
+ .as_ref()
+ .map(|c| c.meta.gpu_offset_pointer(self.meta2_off))
+ }
+
+ /// Returns the GPU pointer to the clustering metadata 3 buffer, if clustering is enabled.
+ #[allow(dead_code)]
+ pub(crate) fn meta_3_pointer(&self) -> Option<GpuPointer<'_, &'_ [u8]>> {
+ self.object
+ .clustering
+ .as_ref()
+ .map(|c| c.meta.gpu_offset_pointer(self.meta3_off))
+ }
+
+ /// Returns the GPU pointer to the clustering metadata 4 buffer, if clustering is enabled.
+ #[allow(dead_code)]
+ pub(crate) fn meta_4_pointer(&self) -> Option<GpuPointer<'_, &'_ [u8]>> {
+ self.object
+ .clustering
+ .as_ref()
+ .map(|c| c.meta.gpu_offset_pointer(self.meta4_off))
+ }
+
+ /// Returns the GPU pointer to an unknown buffer with incrementing numbers.
+ pub(crate) fn seq_buf_pointer(&self) -> GpuPointer<'_, &'_ [u64]> {
+ self.object.seq_buf.gpu_pointer()
+ }
+
+ /// Returns the number of TVB bytes used for this scene.
+ pub(crate) fn used_bytes(&self) -> usize {
+ self.object
+ .with(|raw, _inner| raw.total_page_count.load(Ordering::Relaxed) as usize * PAGE_SIZE)
+ }
+
+ /// Returns whether the TVB overflowed while rendering this scene.
+ pub(crate) fn overflowed(&self) -> bool {
+ self.object.with(|raw, _inner| {
+ raw.total_page_count.load(Ordering::Relaxed)
+ > raw.pass_page_count.load(Ordering::Relaxed)
+ })
+ }
+}
+
+#[versions(AGX)]
+impl Drop for Scene::ver {
+ fn drop(&mut self) {
+ let mut inner = self.object.buffer.inner.lock();
+ assert_ne!(inner.active_scenes, 0);
+ inner.active_scenes -= 1;
+
+ if inner.active_scenes == 0 {
+ mod_pr_debug!(
+ "Buffer: no scenes left, dropping slot {}",
+ inner.active_slot.take().unwrap().slot()
+ );
+ inner.active_slot = None;
+ }
+ }
+}
+
+/// Inner data for a single TVB buffer object.
+#[versions(AGX)]
+struct BufferInner {
+ info: GpuObject<buffer::Info::ver>,
+ ualloc: Arc<Mutex<alloc::DefaultAllocator>>,
+ ualloc_priv: Arc<Mutex<alloc::DefaultAllocator>>,
+ blocks: Vec<GpuOnlyArray<u8>>,
+ max_blocks: usize,
+ max_blocks_nomemless: usize,
+ mgr: BufferManager,
+ active_scenes: usize,
+ active_slot: Option<slotalloc::Guard<()>>,
+ last_token: Option<slotalloc::SlotToken>,
+ tpc: Option<Arc<GpuArray<u8>>>,
+ kernel_buffer: GpuArray<u8>,
+ stats: GpuObject<buffer::Stats>,
+ preempt1_size: usize,
+ preempt2_size: usize,
+ preempt3_size: usize,
+ num_clusters: usize,
+}
+
+/// Locked and reference counted TVB buffer.
+#[versions(AGX)]
+pub(crate) struct Buffer {
+ inner: Arc<Mutex<BufferInner::ver>>,
+}
+
+#[versions(AGX)]
+impl Buffer::ver {
+ /// Create a new Buffer for a given VM, given the per-VM allocators.
+ pub(crate) fn new(
+ gpu: &dyn gpu::GpuManager,
+ alloc: &mut gpu::KernelAllocators,
+ ualloc: Arc<Mutex<alloc::DefaultAllocator>>,
+ ualloc_priv: Arc<Mutex<alloc::DefaultAllocator>>,
+ mgr: &BufferManager,
+ ) -> Result<Buffer::ver> {
+ // These are the typical max numbers on macOS.
+ // 8GB machines have this halved.
+ let max_size: usize = 862_322_688; // bytes
+ let max_size_nomemless = max_size / 3;
+
+ let max_blocks = max_size / BLOCK_SIZE;
+ let max_blocks_nomemless = max_size_nomemless / BLOCK_SIZE;
+ let max_pages = max_blocks * PAGES_PER_BLOCK;
+ let max_pages_nomemless = max_blocks_nomemless * PAGES_PER_BLOCK;
+
+ let num_clusters = gpu.get_dyncfg().id.num_clusters as usize;
+ let num_clusters_adj = if num_clusters > 1 {
+ num_clusters + 1
+ } else {
+ 1
+ };
+
+ let preempt1_size = num_clusters_adj * gpu.get_cfg().preempt1_size;
+ let preempt2_size = num_clusters_adj * gpu.get_cfg().preempt2_size;
+ let preempt3_size = num_clusters_adj * gpu.get_cfg().preempt3_size;
+
+ let inner = box_in_place!(buffer::Info::ver {
+ block_ctl: alloc.shared.new_default::<buffer::BlockControl>()?,
+ counter: alloc.shared.new_default::<buffer::Counter>()?,
+ page_list: ualloc_priv.lock().array_empty(max_pages)?,
+ block_list: ualloc_priv.lock().array_empty(max_blocks * 2)?,
+ })?;
+
+ let info = alloc.private.new_boxed(inner, |inner, ptr| {
+ Ok(place!(
+ ptr,
+ buffer::raw::Info::ver {
+ gpu_counter: 0x0,
+ unk_4: 0,
+ last_id: 0x0,
+ cur_id: -1,
+ unk_10: 0x0,
+ gpu_counter2: 0x0,
+ unk_18: 0x0,
+ #[ver(V < V13_0B4)]
+ unk_1c: 0x0,
+ page_list: inner.page_list.gpu_pointer(),
+ page_list_size: (4 * max_pages).try_into()?,
+ page_count: AtomicU32::new(0),
+ max_blocks: max_blocks.try_into()?,
+ block_count: AtomicU32::new(0),
+ unk_38: 0x0,
+ block_list: inner.block_list.gpu_pointer(),
+ block_ctl: inner.block_ctl.gpu_pointer(),
+ last_page: AtomicU32::new(0),
+ gpu_page_ptr1: 0x0,
+ gpu_page_ptr2: 0x0,
+ unk_58: 0x0,
+ block_size: BLOCK_SIZE as u32,
+ unk_60: U64(0x0),
+ counter: inner.counter.gpu_pointer(),
+ unk_70: 0x0,
+ unk_74: 0x0,
+ unk_78: 0x0,
+ unk_7c: 0x0,
+ unk_80: 0x1,
+ max_pages: max_pages.try_into()?,
+ max_pages_nomemless: max_pages_nomemless.try_into()?,
+ unk_8c: 0x0,
+ unk_90: Default::default(),
+ }
+ ))
+ })?;
+
+ // Technically similar to Scene below, let's play it safe.
+ let kernel_buffer = alloc.shared.array_empty(0x40)?;
+ let stats = alloc
+ .shared
+ .new_object(Default::default(), |_inner| buffer::raw::Stats {
+ reset: AtomicU32::from(1),
+ ..Default::default()
+ })?;
+
+ Ok(Buffer::ver {
+ inner: Arc::try_new(Mutex::new(BufferInner::ver {
+ info,
+ ualloc,
+ ualloc_priv,
+ blocks: Vec::new(),
+ max_blocks,
+ max_blocks_nomemless,
+ mgr: mgr.clone(),
+ active_scenes: 0,
+ active_slot: None,
+ last_token: None,
+ tpc: None,
+ kernel_buffer,
+ stats,
+ preempt1_size,
+ preempt2_size,
+ preempt3_size,
+ num_clusters,
+ }))?,
+ })
+ }
+
+ /// Returns the total block count allocated to this Buffer.
+ pub(crate) fn block_count(&self) -> u32 {
+ self.inner.lock().blocks.len() as u32
+ }
+
+ /// Returns the total size in bytes allocated to this Buffer.
+ pub(crate) fn size(&self) -> usize {
+ self.block_count() as usize * BLOCK_SIZE
+ }
+
+ /// Automatically grow the Buffer based on feedback from the statistics.
+ pub(crate) fn auto_grow(&self) -> Result<bool> {
+ let inner = self.inner.lock();
+
+ let used_pages = inner.stats.with(|raw, _inner| {
+ let used = raw.max_pages.load(Ordering::Relaxed);
+ raw.reset.store(1, Ordering::Release);
+ used as usize
+ });
+
+ let need_blocks = div_ceil(used_pages * 2, PAGES_PER_BLOCK).min(inner.max_blocks_nomemless);
+ let want_blocks = div_ceil(used_pages * 3, PAGES_PER_BLOCK).min(inner.max_blocks_nomemless);
+
+ let cur_count = inner.blocks.len();
+
+ if need_blocks <= cur_count {
+ Ok(false)
+ } else {
+ // Grow to 3x requested size (same logic as macOS)
+ core::mem::drop(inner);
+ self.ensure_blocks(want_blocks)?;
+ Ok(true)
+ }
+ }
+
+ /// Ensure that the buffer has at least a certain minimum size in blocks.
+ pub(crate) fn ensure_blocks(&self, min_blocks: usize) -> Result<bool> {
+ let mut inner = self.inner.lock();
+
+ let cur_count = inner.blocks.len();
+ if cur_count >= min_blocks {
+ return Ok(false);
+ }
+ if min_blocks > inner.max_blocks {
+ return Err(ENOMEM);
+ }
+
+ let add_blocks = min_blocks - cur_count;
+ let new_count = min_blocks;
+
+ let mut new_blocks: Vec<GpuOnlyArray<u8>> = Vec::new();
+
+ // Allocate the new blocks first, so if it fails they will be dropped
+ let mut ualloc = inner.ualloc.lock();
+ for _i in 0..add_blocks {
+ new_blocks.try_push(ualloc.array_gpuonly(BLOCK_SIZE)?)?;
+ }
+ core::mem::drop(ualloc);
+
+ // Then actually commit them
+ inner.blocks.try_reserve(add_blocks)?;
+
+ for (i, block) in new_blocks.into_iter().enumerate() {
+ let page_num = (block.gpu_va().get() >> PAGE_SHIFT) as u32;
+
+ inner
+ .blocks
+ .try_push(block)
+ .expect("try_push() failed after try_reserve()");
+ inner.info.block_list[2 * (cur_count + i)] = page_num;
+ for j in 0..PAGES_PER_BLOCK {
+ inner.info.page_list[(cur_count + i) * PAGES_PER_BLOCK + j] = page_num + j as u32;
+ }
+ }
+
+ inner.info.block_ctl.with(|raw, _inner| {
+ raw.total.store(new_count as u32, Ordering::SeqCst);
+ raw.wptr.store(new_count as u32, Ordering::SeqCst);
+ });
+
+ let page_count = (new_count * PAGES_PER_BLOCK) as u32;
+ inner.info.with(|raw, _inner| {
+ raw.page_count.store(page_count, Ordering::Relaxed);
+ raw.block_count.store(new_count as u32, Ordering::Relaxed);
+ raw.last_page.store(page_count - 1, Ordering::Relaxed);
+ });
+
+ Ok(true)
+ }
+
+ /// Create a new [`Scene::ver`] (render pass) using this buffer.
+ pub(crate) fn new_scene(
+ &self,
+ alloc: &mut gpu::KernelAllocators,
+ tile_info: &TileInfo,
+ ) -> Result<Scene::ver> {
+ let mut inner = self.inner.lock();
+
+ let tilemap_size = tile_info.tilemap_size;
+ let tpc_size = tile_info.tpc_size;
+
+ // TODO: what is this exactly?
+ mod_pr_debug!("Buffer: Allocating TVB buffers\n");
+
+ // This seems to be a list, with 4x2 bytes of headers and 8 bytes per entry.
+ // On single-cluster devices, the used length always seems to be 1.
+ // On M1 Ultra, it can grow and usually doesn't exceed 8 * cluster_factor
+ // entries. macOS allocates a whole 64K * 0x80 for this, so let's go with
+ // that to be safe...
+ let user_buffer = inner.ualloc.lock().array_empty(if inner.num_clusters > 1 {
+ 0x10080
+ } else {
+ 0x80
+ })?;
+
+ let tvb_heapmeta = inner.ualloc.lock().array_empty(0x200)?;
+ let tvb_tilemap = inner.ualloc.lock().array_empty(tilemap_size)?;
+
+ mod_pr_debug!("Buffer: Allocating misc buffers\n");
+ let preempt_buf = inner
+ .ualloc
+ .lock()
+ .array_empty(inner.preempt1_size + inner.preempt2_size + inner.preempt3_size)?;
+
+ let mut seq_buf = inner.ualloc.lock().array_empty(0x800)?;
+ for i in 1..0x400 {
+ seq_buf[i] = (i + 1) as u64;
+ }
+
+ let tpc = match inner.tpc.as_ref() {
+ Some(buf) if buf.len() >= tpc_size => buf.clone(),
+ _ => {
+ // MacOS allocates this as shared GPU+FW, but
+ // priv seems to work and might be faster?
+ // Needs to be FW-writable anyway, so ualloc
+ // won't work.
+ let buf = Arc::try_new(
+ inner
+ .ualloc_priv
+ .lock()
+ .array_empty((tpc_size + mmu::UAT_PGMSK) & !mmu::UAT_PGMSK)?,
+ )?;
+ inner.tpc = Some(buf.clone());
+ buf
+ }
+ };
+
+ // Maybe: (4x4 macro tiles + 1 global page)*n, 32bit each (17*4*n)
+ let meta1_size = align(tile_info.meta1_blocks as usize * 0x44, 0x80);
+ // check
+ let meta2_size = align(0x190 * inner.num_clusters, 0x80);
+ let meta3_size = align(0x280 * inner.num_clusters, 0x80);
+ // Like user_buffer for single-cluster modes, 0x30 per cluster * the cluster
+ // factor.
+ let meta4_size = align(0x30 * inner.num_clusters * tile_info.cluster_factor, 0x80);
+ let meta_size = meta1_size + meta2_size + meta3_size + meta4_size;
+
+ let clustering = if inner.num_clusters > 1 {
+ mod_pr_debug!("Buffer: Allocating clustering buffers\n");
+ let tilemaps = inner
+ .ualloc
+ .lock()
+ .array_empty(inner.num_clusters * tilemap_size * tile_info.cluster_factor)?;
+ let meta = inner.ualloc.lock().array_empty(meta_size)?;
+ Some(buffer::ClusterBuffers { tilemaps, meta })
+ } else {
+ None
+ };
+
+ let scene_inner = box_in_place!(buffer::Scene::ver {
+ user_buffer: user_buffer,
+ buffer: self.clone(),
+ tvb_heapmeta: tvb_heapmeta,
+ tvb_tilemap: tvb_tilemap,
+ tpc: tpc,
+ clustering: clustering,
+ preempt_buf: preempt_buf,
+ seq_buf: seq_buf,
+ })?;
+
+ // Could be made strong, but we wind up with a deadlock if we try to grab the
+ // pointer through the inner.buffer path inside the closure.
+ let stats_pointer = inner.stats.weak_pointer();
+
+ // macOS allocates this as private. However, the firmware does not
+ // DC CIVAC this before reading it (like it does most other things),
+ // which causes odd cache incoherency bugs when combined with
+ // speculation on the firmware side (maybe). This doesn't happen
+ // on macOS because these structs are a circular pool that is mapped
+ // already initialized. Just mark this shared for now.
+ let scene = alloc.shared.new_boxed(scene_inner, |inner, ptr| {
+ Ok(place!(
+ ptr,
+ buffer::raw::Scene {
+ pass_page_count: AtomicU32::new(0),
+ unk_4: 0,
+ unk_8: U64(0),
+ unk_10: U64(0),
+ user_buffer: inner.user_buffer.gpu_pointer(),
+ unk_20: 0,
+ stats: stats_pointer,
+ total_page_count: AtomicU32::new(0),
+ unk_30: U64(0),
+ unk_38: U64(0),
+ }
+ ))
+ })?;
+
+ let mut rebind = false;
+
+ if inner.active_slot.is_none() {
+ assert_eq!(inner.active_scenes, 0);
+
+ let slot = inner.mgr.0.get(inner.last_token)?;
+ rebind = slot.changed();
+
+ mod_pr_debug!("Buffer: assigning slot {} (rebind={})", slot.slot(), rebind);
+
+ inner.last_token = Some(slot.token());
+ inner.active_slot = Some(slot);
+ }
+
+ inner.active_scenes += 1;
+
+ Ok(Scene::ver {
+ object: scene,
+ slot: inner.active_slot.as_ref().unwrap().slot(),
+ rebind,
+ preempt2_off: inner.preempt1_size,
+ preempt3_off: inner.preempt1_size + inner.preempt2_size,
+ meta2_off: meta1_size,
+ meta3_off: meta1_size + meta2_size,
+ meta4_off: meta1_size + meta2_size + meta3_size,
+ })
+ }
+
+ /// Increment the buffer manager usage count. Should we done once we know the Scene is ready
+ /// to be committed and used in commands submitted to the GPU.
+ pub(crate) fn increment(&self) {
+ let inner = self.inner.lock();
+ inner.info.counter.with(|raw, _inner| {
+ // We could use fetch_add, but the non-LSE atomic
+ // sequence Rust produces confuses the hypervisor.
+ // We have inner locked anyway, so this is not racy.
+ let v = raw.count.load(Ordering::Relaxed);
+ raw.count.store(v + 1, Ordering::Relaxed);
+ });
+ }
+}
+
+#[versions(AGX)]
+impl Clone for Buffer::ver {
+ fn clone(&self) -> Self {
+ Buffer::ver {
+ inner: self.inner.clone(),
+ }
+ }
+}
+
+/// The GPU-global buffer manager, used to allocate and release buffer slots from the pool.
+pub(crate) struct BufferManager(slotalloc::SlotAllocator<()>);
+
+impl BufferManager {
+ pub(crate) fn new() -> Result<BufferManager> {
+ Ok(BufferManager(slotalloc::SlotAllocator::new(
+ NUM_BUFFERS,
+ (),
+ |_inner, _slot| (),
+ )?))
+ }
+}
+
+impl Clone for BufferManager {
+ fn clone(&self) -> Self {
+ BufferManager(self.0.clone())
+ }
+}
diff --git a/drivers/gpu/drm/asahi/channel.rs b/drivers/gpu/drm/asahi/channel.rs
new file mode 100644
index 000000000000..0b3c3b65c279
--- /dev/null
+++ b/drivers/gpu/drm/asahi/channel.rs
@@ -0,0 +1,542 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU ring buffer channels
+//!
+//! The GPU firmware use a set of ring buffer channels to receive commands from the driver and send
+//! it notifications and status messages.
+//!
+//! These ring buffers mostly follow uniform conventions, so they share the same base
+//! implementation.
+
+use crate::debug::*;
+use crate::driver::AsahiDevice;
+use crate::fw::channels::*;
+use crate::fw::initdata::{raw, ChannelRing};
+use crate::fw::types::*;
+use crate::{event, gpu, mem};
+use core::time::Duration;
+use kernel::{c_str, delay::coarse_sleep, prelude::*, sync::Arc, time};
+
+pub(crate) use crate::fw::channels::PipeType;
+
+/// A receive (FW->driver) channel.
+pub(crate) struct RxChannel<T: RxChannelState, U: Copy + Default>
+where
+ for<'a> <T as GpuStruct>::Raw<'a>: Debug + Default + Zeroed,
+{
+ ring: ChannelRing<T, U>,
+ // FIXME: needs feature(generic_const_exprs)
+ //rptr: [u32; T::SUB_CHANNELS],
+ rptr: [u32; 6],
+ count: u32,
+}
+
+impl<T: RxChannelState, U: Copy + Default> RxChannel<T, U>
+where
+ for<'a> <T as GpuStruct>::Raw<'a>: Debug + Default + Zeroed,
+{
+ /// Allocates a new receive channel with a given message count.
+ pub(crate) fn new(alloc: &mut gpu::KernelAllocators, count: usize) -> Result<RxChannel<T, U>> {
+ Ok(RxChannel {
+ ring: ChannelRing {
+ state: alloc.shared.new_default()?,
+ ring: alloc.shared.array_empty(T::SUB_CHANNELS * count)?,
+ },
+ rptr: Default::default(),
+ count: count as u32,
+ })
+ }
+
+ /// Receives a message on the specified sub-channel index, optionally leaving in the ring
+ /// buffer.
+ ///
+ /// Returns None if the channel is empty.
+ fn get_or_peek(&mut self, index: usize, peek: bool) -> Option<U> {
+ self.ring.state.with(|raw, _inner| {
+ let wptr = T::wptr(raw, index);
+ let rptr = &mut self.rptr[index];
+ if wptr == *rptr {
+ None
+ } else {
+ let off = self.count as usize * index;
+ let msg = self.ring.ring[off + *rptr as usize];
+ if !peek {
+ *rptr = (*rptr + 1) % self.count;
+ T::set_rptr(raw, index, *rptr);
+ }
+ Some(msg)
+ }
+ })
+ }
+
+ /// Receives a message on the specified sub-channel index, and dequeues it from the ring buffer.
+ ///
+ /// Returns None if the channel is empty.
+ pub(crate) fn get(&mut self, index: usize) -> Option<U> {
+ self.get_or_peek(index, false)
+ }
+
+ /// Peeks a message on the specified sub-channel index, leaving it in the ring buffer.
+ ///
+ /// Returns None if the channel is empty.
+ pub(crate) fn peek(&mut self, index: usize) -> Option<U> {
+ self.get_or_peek(index, true)
+ }
+}
+
+/// A transmit (driver->FW) channel.
+pub(crate) struct TxChannel<T: TxChannelState, U: Copy + Default>
+where
+ for<'a> <T as GpuStruct>::Raw<'a>: Debug + Default + Zeroed,
+{
+ ring: ChannelRing<T, U>,
+ wptr: u32,
+ count: u32,
+}
+
+impl<T: TxChannelState, U: Copy + Default> TxChannel<T, U>
+where
+ for<'a> <T as GpuStruct>::Raw<'a>: Debug + Default + Zeroed,
+{
+ /// Allocates a new cached transmit channel with a given message count.
+ pub(crate) fn new(alloc: &mut gpu::KernelAllocators, count: usize) -> Result<TxChannel<T, U>> {
+ Ok(TxChannel {
+ ring: ChannelRing {
+ state: alloc.shared.new_default()?,
+ ring: alloc.private.array_empty(count)?,
+ },
+ wptr: 0,
+ count: count as u32,
+ })
+ }
+
+ /// Allocates a new uncached transmit channel with a given message count.
+ pub(crate) fn new_uncached(
+ alloc: &mut gpu::KernelAllocators,
+ count: usize,
+ ) -> Result<TxChannel<T, U>> {
+ Ok(TxChannel {
+ ring: ChannelRing {
+ state: alloc.shared.new_default()?,
+ ring: alloc.shared.array_empty(count)?,
+ },
+ wptr: 0,
+ count: count as u32,
+ })
+ }
+
+ /// Send a message to the ring, returning a cookie with the ring buffer position.
+ ///
+ /// This will poll/block if the ring is full, which we don't really expect to happen.
+ pub(crate) fn put(&mut self, msg: &U) -> u32 {
+ self.ring.state.with(|raw, _inner| {
+ let next_wptr = (self.wptr + 1) % self.count;
+ let mut rptr = T::rptr(raw);
+ if next_wptr == rptr {
+ pr_err!(
+ "TX ring buffer is full! Waiting... ({}, {})\n",
+ next_wptr,
+ rptr
+ );
+ // TODO: block properly on incoming messages?
+ while next_wptr == rptr {
+ coarse_sleep(Duration::from_millis(8));
+ rptr = T::rptr(raw);
+ }
+ }
+ self.ring.ring[self.wptr as usize] = *msg;
+ mem::sync();
+ T::set_wptr(raw, next_wptr);
+ self.wptr = next_wptr;
+ });
+ self.wptr
+ }
+
+ /// Wait for a previously submitted message to be popped off of the ring by the GPU firmware.
+ ///
+ /// This busy-loops, and is intended to be used for rare cases when we need to block for
+ /// completion of a cache management or invalidation operation synchronously (which
+ /// the firmware normally completes fast enough not to be worth sleeping for).
+ /// If the poll takes longer than 10ms, this switches to sleeping between polls.
+ pub(crate) fn wait_for(&mut self, wptr: u32, timeout_ms: u64) -> Result {
+ const MAX_FAST_POLL: u64 = 10;
+ let start = time::ktime_get();
+ let timeout_fast = start + Duration::from_millis(timeout_ms.min(MAX_FAST_POLL));
+ let timeout_slow = start + Duration::from_millis(timeout_ms);
+ self.ring.state.with(|raw, _inner| {
+ while time::ktime_get() < timeout_fast {
+ if T::rptr(raw) == wptr {
+ return Ok(());
+ }
+ mem::sync();
+ }
+ while time::ktime_get() < timeout_slow {
+ if T::rptr(raw) == wptr {
+ return Ok(());
+ }
+ coarse_sleep(Duration::from_millis(5));
+ mem::sync();
+ }
+ Err(ETIMEDOUT)
+ })
+ }
+}
+
+/// Device Control channel for global device management commands.
+#[versions(AGX)]
+pub(crate) struct DeviceControlChannel {
+ dev: AsahiDevice,
+ ch: TxChannel<ChannelState, DeviceControlMsg::ver>,
+}
+
+#[versions(AGX)]
+impl DeviceControlChannel::ver {
+ const COMMAND_TIMEOUT_MS: u64 = 1000;
+
+ /// Allocate a new Device Control channel.
+ pub(crate) fn new(
+ dev: &AsahiDevice,
+ alloc: &mut gpu::KernelAllocators,
+ ) -> Result<DeviceControlChannel::ver> {
+ Ok(DeviceControlChannel::ver {
+ dev: dev.clone(),
+ ch: TxChannel::<ChannelState, DeviceControlMsg::ver>::new(alloc, 0x100)?,
+ })
+ }
+
+ /// Returns the raw `ChannelRing` structure to pass to firmware.
+ pub(crate) fn to_raw(&self) -> raw::ChannelRing<ChannelState, DeviceControlMsg::ver> {
+ self.ch.ring.to_raw()
+ }
+
+ /// Submits a Device Control command.
+ pub(crate) fn send(&mut self, msg: &DeviceControlMsg::ver) -> u32 {
+ cls_dev_dbg!(DeviceControlCh, self.dev, "DeviceControl: {:?}\n", msg);
+ self.ch.put(msg)
+ }
+
+ /// Waits for a previously submitted Device Control command to complete.
+ pub(crate) fn wait_for(&mut self, wptr: u32) -> Result {
+ self.ch.wait_for(wptr, Self::COMMAND_TIMEOUT_MS)
+ }
+}
+
+/// Pipe channel to submit WorkQueue execution requests.
+#[versions(AGX)]
+pub(crate) struct PipeChannel {
+ dev: AsahiDevice,
+ ch: TxChannel<ChannelState, PipeMsg::ver>,
+}
+
+#[versions(AGX)]
+impl PipeChannel::ver {
+ /// Allocate a new Pipe submission channel.
+ pub(crate) fn new(
+ dev: &AsahiDevice,
+ alloc: &mut gpu::KernelAllocators,
+ ) -> Result<PipeChannel::ver> {
+ Ok(PipeChannel::ver {
+ dev: dev.clone(),
+ ch: TxChannel::<ChannelState, PipeMsg::ver>::new(alloc, 0x100)?,
+ })
+ }
+
+ /// Returns the raw `ChannelRing` structure to pass to firmware.
+ pub(crate) fn to_raw(&self) -> raw::ChannelRing<ChannelState, PipeMsg::ver> {
+ self.ch.ring.to_raw()
+ }
+
+ /// Submits a Pipe kick command to the firmware.
+ pub(crate) fn send(&mut self, msg: &PipeMsg::ver) {
+ cls_dev_dbg!(PipeCh, self.dev, "Pipe: {:?}\n", msg);
+ self.ch.put(msg);
+ }
+}
+
+/// Firmware Control channel, used for secure cache flush requests.
+pub(crate) struct FwCtlChannel {
+ dev: AsahiDevice,
+ ch: TxChannel<FwCtlChannelState, FwCtlMsg>,
+}
+
+impl FwCtlChannel {
+ const COMMAND_TIMEOUT_MS: u64 = 1000;
+
+ /// Allocate a new Firmware Control channel.
+ pub(crate) fn new(
+ dev: &AsahiDevice,
+ alloc: &mut gpu::KernelAllocators,
+ ) -> Result<FwCtlChannel> {
+ Ok(FwCtlChannel {
+ dev: dev.clone(),
+ ch: TxChannel::<FwCtlChannelState, FwCtlMsg>::new_uncached(alloc, 0x100)?,
+ })
+ }
+
+ /// Returns the raw `ChannelRing` structure to pass to firmware.
+ pub(crate) fn to_raw(&self) -> raw::ChannelRing<FwCtlChannelState, FwCtlMsg> {
+ self.ch.ring.to_raw()
+ }
+
+ /// Submits a Firmware Control command to the firmware.
+ pub(crate) fn send(&mut self, msg: &FwCtlMsg) -> u32 {
+ cls_dev_dbg!(FwCtlCh, self.dev, "FwCtl: {:?}\n", msg);
+ self.ch.put(msg)
+ }
+
+ /// Waits for a previously submitted Firmware Control command to complete.
+ pub(crate) fn wait_for(&mut self, wptr: u32) -> Result {
+ self.ch.wait_for(wptr, Self::COMMAND_TIMEOUT_MS)
+ }
+}
+
+/// Event channel, used to notify the driver of command completions, GPU faults and errors, and
+/// other events.
+pub(crate) struct EventChannel {
+ dev: AsahiDevice,
+ ch: RxChannel<ChannelState, RawEventMsg>,
+ mgr: Arc<event::EventManager>,
+ gpu: Option<Arc<dyn gpu::GpuManager>>,
+}
+
+impl EventChannel {
+ /// Allocate a new Event channel.
+ pub(crate) fn new(
+ dev: &AsahiDevice,
+ alloc: &mut gpu::KernelAllocators,
+ mgr: Arc<event::EventManager>,
+ ) -> Result<EventChannel> {
+ Ok(EventChannel {
+ dev: dev.clone(),
+ ch: RxChannel::<ChannelState, RawEventMsg>::new(alloc, 0x100)?,
+ mgr,
+ gpu: None,
+ })
+ }
+
+ /// Registers the managing `Gpu` instance that will handle events on this channel.
+ pub(crate) fn set_manager(&mut self, gpu: Arc<dyn gpu::GpuManager>) {
+ self.gpu = Some(gpu);
+ }
+
+ /// Returns the raw `ChannelRing` structure to pass to firmware.
+ pub(crate) fn to_raw(&self) -> raw::ChannelRing<ChannelState, RawEventMsg> {
+ self.ch.ring.to_raw()
+ }
+
+ /// Polls for new Event messages on this ring.
+ pub(crate) fn poll(&mut self) {
+ while let Some(msg) = self.ch.get(0) {
+ let tag = unsafe { msg.raw.0 };
+ match tag {
+ 0..=EVENT_MAX => {
+ let msg = unsafe { msg.msg };
+
+ cls_dev_dbg!(EventCh, self.dev, "Event: {:?}\n", msg);
+ match msg {
+ EventMsg::Fault => match self.gpu.as_ref() {
+ Some(gpu) => gpu.handle_fault(),
+ None => {
+ dev_crit!(self.dev, "EventChannel: No GPU manager available!\n")
+ }
+ },
+ EventMsg::Timeout {
+ counter,
+ event_slot,
+ ..
+ } => match self.gpu.as_ref() {
+ Some(gpu) => gpu.handle_timeout(counter, event_slot),
+ None => {
+ dev_crit!(self.dev, "EventChannel: No GPU manager available!\n")
+ }
+ },
+ EventMsg::Flag { firing, .. } => {
+ for (i, flags) in firing.iter().enumerate() {
+ for j in 0..32 {
+ if flags & (1u32 << j) != 0 {
+ self.mgr.signal((i * 32 + j) as u32);
+ }
+ }
+ }
+ }
+ msg => {
+ dev_crit!(self.dev, "Unknown event message: {:?}\n", msg);
+ }
+ }
+ }
+ _ => {
+ dev_warn!(self.dev, "Unknown event message: {:?}\n", unsafe {
+ msg.raw
+ });
+ }
+ }
+ }
+ }
+}
+
+/// Firmware Log channel. This one is pretty special, since it has 6 sub-channels (for different log
+/// levels), and it also uses a side buffer to actually hold the log messages, only passing around
+/// pointers in the main buffer.
+pub(crate) struct FwLogChannel {
+ dev: AsahiDevice,
+ ch: RxChannel<FwLogChannelState, RawFwLogMsg>,
+ payload_buf: GpuArray<RawFwLogPayloadMsg>,
+}
+
+impl FwLogChannel {
+ const RING_SIZE: usize = 0x100;
+ const BUF_SIZE: usize = 0x100;
+
+ /// Allocate a new Firmware Log channel.
+ pub(crate) fn new(
+ dev: &AsahiDevice,
+ alloc: &mut gpu::KernelAllocators,
+ ) -> Result<FwLogChannel> {
+ Ok(FwLogChannel {
+ dev: dev.clone(),
+ ch: RxChannel::<FwLogChannelState, RawFwLogMsg>::new(alloc, Self::RING_SIZE)?,
+ payload_buf: alloc
+ .shared
+ .array_empty(Self::BUF_SIZE * FwLogChannelState::SUB_CHANNELS)?,
+ })
+ }
+
+ /// Returns the raw `ChannelRing` structure to pass to firmware.
+ pub(crate) fn to_raw(&self) -> raw::ChannelRing<FwLogChannelState, RawFwLogMsg> {
+ self.ch.ring.to_raw()
+ }
+
+ /// Returns the GPU pointers to the firmware log payload buffer.
+ pub(crate) fn get_buf(&self) -> GpuWeakPointer<[RawFwLogPayloadMsg]> {
+ self.payload_buf.weak_pointer()
+ }
+
+ /// Polls for new log messages on all sub-rings.
+ pub(crate) fn poll(&mut self) {
+ for i in 0..=FwLogChannelState::SUB_CHANNELS - 1 {
+ while let Some(msg) = self.ch.peek(i) {
+ cls_dev_dbg!(FwLogCh, self.dev, "FwLog{}: {:?}\n", i, msg);
+ if msg.msg_type != 2 {
+ dev_warn!(self.dev, "Unknown FWLog{} message: {:?}\n", i, msg);
+ self.ch.get(i);
+ continue;
+ }
+ if msg.msg_index.0 as usize >= Self::BUF_SIZE {
+ dev_warn!(
+ self.dev,
+ "FWLog{} message index out of bounds: {:?}\n",
+ i,
+ msg
+ );
+ self.ch.get(i);
+ continue;
+ }
+ let index = Self::BUF_SIZE * i + msg.msg_index.0 as usize;
+ let payload = &self.payload_buf.as_slice()[index];
+ if payload.msg_type != 3 {
+ dev_warn!(self.dev, "Unknown FWLog{} payload: {:?}\n", i, payload);
+ self.ch.get(i);
+ continue;
+ }
+ let msg = if let Some(end) = payload.msg.iter().position(|&r| r == 0) {
+ CStr::from_bytes_with_nul(&(*payload.msg)[..end + 1])
+ .unwrap_or(c_str!("cstr_err"))
+ } else {
+ dev_warn!(
+ self.dev,
+ "FWLog{} payload not NUL-terminated: {:?}\n",
+ i,
+ payload
+ );
+ self.ch.get(i);
+ continue;
+ };
+ match i {
+ 0 => dev_dbg!(self.dev, "FWLog: {}\n", msg),
+ 1 => dev_info!(self.dev, "FWLog: {}\n", msg),
+ 2 => dev_notice!(self.dev, "FWLog: {}\n", msg),
+ 3 => dev_warn!(self.dev, "FWLog: {}\n", msg),
+ 4 => dev_err!(self.dev, "FWLog: {}\n", msg),
+ 5 => dev_crit!(self.dev, "FWLog: {}\n", msg),
+ _ => (),
+ };
+ self.ch.get(i);
+ }
+ }
+ }
+}
+
+pub(crate) struct KTraceChannel {
+ dev: AsahiDevice,
+ ch: RxChannel<ChannelState, RawKTraceMsg>,
+}
+
+/// KTrace channel, used to receive detailed execution trace markers from the firmware.
+/// We currently disable this in initdata, so no messages are expected here at this time.
+impl KTraceChannel {
+ /// Allocate a new KTrace channel.
+ pub(crate) fn new(
+ dev: &AsahiDevice,
+ alloc: &mut gpu::KernelAllocators,
+ ) -> Result<KTraceChannel> {
+ Ok(KTraceChannel {
+ dev: dev.clone(),
+ ch: RxChannel::<ChannelState, RawKTraceMsg>::new(alloc, 0x200)?,
+ })
+ }
+
+ /// Returns the raw `ChannelRing` structure to pass to firmware.
+ pub(crate) fn to_raw(&self) -> raw::ChannelRing<ChannelState, RawKTraceMsg> {
+ self.ch.ring.to_raw()
+ }
+
+ /// Polls for new KTrace messages on this ring.
+ pub(crate) fn poll(&mut self) {
+ while let Some(msg) = self.ch.get(0) {
+ cls_dev_dbg!(KTraceCh, self.dev, "KTrace: {:?}\n", msg);
+ }
+ }
+}
+
+/// Statistics channel, reporting power-related statistics to the driver.
+/// Not really implemented other than debug logs yet...
+#[versions(AGX)]
+pub(crate) struct StatsChannel {
+ dev: AsahiDevice,
+ ch: RxChannel<ChannelState, RawStatsMsg::ver>,
+}
+
+#[versions(AGX)]
+impl StatsChannel::ver {
+ /// Allocate a new Statistics channel.
+ pub(crate) fn new(
+ dev: &AsahiDevice,
+ alloc: &mut gpu::KernelAllocators,
+ ) -> Result<StatsChannel::ver> {
+ Ok(StatsChannel::ver {
+ dev: dev.clone(),
+ ch: RxChannel::<ChannelState, RawStatsMsg::ver>::new(alloc, 0x100)?,
+ })
+ }
+
+ /// Returns the raw `ChannelRing` structure to pass to firmware.
+ pub(crate) fn to_raw(&self) -> raw::ChannelRing<ChannelState, RawStatsMsg::ver> {
+ self.ch.ring.to_raw()
+ }
+
+ /// Polls for new statistics messages on this ring.
+ pub(crate) fn poll(&mut self) {
+ while let Some(msg) = self.ch.get(0) {
+ let tag = unsafe { msg.raw.0 };
+ match tag {
+ 0..=STATS_MAX::ver => {
+ let msg = unsafe { msg.msg };
+ cls_dev_dbg!(StatsCh, self.dev, "Stats: {:?}\n", msg);
+ }
+ _ => {
+ pr_warn!("Unknown stats message: {:?}\n", unsafe { msg.raw });
+ }
+ }
+ }
+ }
+}
diff --git a/drivers/gpu/drm/asahi/debug.rs b/drivers/gpu/drm/asahi/debug.rs
new file mode 100644
index 000000000000..2f3a70e04cfd
--- /dev/null
+++ b/drivers/gpu/drm/asahi/debug.rs
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+#![allow(dead_code)]
+
+//! Debug enable/disable flags and convenience macros
+
+#[allow(unused_imports)]
+pub(crate) use super::{cls_dev_dbg, cls_pr_debug, debug, mod_dev_dbg, mod_pr_debug};
+use core::sync::atomic::{AtomicU64, Ordering};
+
+static DEBUG_FLAGS: AtomicU64 = AtomicU64::new(0);
+
+/// Debug flag bit indices
+pub(crate) enum DebugFlags {
+ // 0-3: Memory-related debug
+ Mmu = 0,
+ Alloc = 1,
+ Gem = 2,
+ Object = 3,
+
+ // 4-7: Firmware objects and resources
+ Event = 4,
+ Buffer = 5,
+ WorkQueue = 6,
+
+ // 8-13: DRM interface, rendering, compute, GPU globals
+ Gpu = 8,
+ File = 9,
+ Queue = 10,
+ Render = 11,
+ Compute = 12,
+
+ // 14-15: Misc stats
+ MemStats = 14,
+ TVBStats = 15,
+
+ // 16-22: Channels
+ FwLogCh = 16,
+ KTraceCh = 17,
+ StatsCh = 18,
+ EventCh = 19,
+ PipeCh = 20,
+ DeviceControlCh = 21,
+ FwCtlCh = 22,
+
+ // 32-35: Allocator debugging
+ FillAllocations = 32,
+ DebugAllocations = 33,
+ DetectOverflows = 34,
+ ForceCPUMaps = 35,
+
+ // 36-: Behavior flags
+ ConservativeTlbi = 36,
+ KeepGpuPowered = 37,
+ WaitForPowerOff = 38,
+ NoGpuRecovery = 39,
+ DisableClustering = 40,
+
+ // 48-: Misc
+ Debug0 = 48,
+ Debug1 = 49,
+ Debug2 = 50,
+ Debug3 = 51,
+ Debug4 = 52,
+ Debug5 = 53,
+ Debug6 = 54,
+ Debug7 = 55,
+}
+
+/// Update the cached global debug flags from the module parameter
+pub(crate) fn update_debug_flags() {
+ let flags = {
+ let lock = crate::THIS_MODULE.kernel_param_lock();
+ *crate::debug_flags.read(&lock)
+ };
+
+ DEBUG_FLAGS.store(flags, Ordering::Relaxed);
+}
+
+/// Check whether debug is enabled for a given flag
+#[inline(always)]
+pub(crate) fn debug_enabled(flag: DebugFlags) -> bool {
+ DEBUG_FLAGS.load(Ordering::Relaxed) & 1 << (flag as usize) != 0
+}
+
+/// Run some code only if debug is enabled for the calling module
+#[macro_export]
+macro_rules! debug {
+ ($($arg:tt)*) => {
+ if $crate::debug::debug_enabled(DEBUG_CLASS) {
+ $($arg)*
+ }
+ };
+}
+
+/// pr_info!() if debug is enabled for the calling module
+#[macro_export]
+macro_rules! mod_pr_debug (
+ ($($arg:tt)*) => (
+ $crate::debug! { ::kernel::pr_info! ( $($arg)* ); }
+ )
+);
+
+/// dev_info!() if debug is enabled for the calling module
+#[macro_export]
+macro_rules! mod_dev_dbg (
+ ($($arg:tt)*) => (
+ $crate::debug! { ::kernel::dev_info! ( $($arg)* ); }
+ )
+);
+
+/// pr_info!() if debug is enabled for a specific module
+#[macro_export]
+macro_rules! cls_pr_debug (
+ ($cls:ident, $($arg:tt)*) => (
+ if $crate::debug::debug_enabled($crate::debug::DebugFlags::$cls) {
+ ::kernel::pr_info! ( $($arg)* );
+ }
+ )
+);
+
+/// dev_info!() if debug is enabled for a specific module
+#[macro_export]
+macro_rules! cls_dev_dbg (
+ ($cls:ident, $($arg:tt)*) => (
+ if $crate::debug::debug_enabled($crate::debug::DebugFlags::$cls) {
+ ::kernel::dev_info! ( $($arg)* );
+ }
+ )
+);
diff --git a/drivers/gpu/drm/asahi/driver.rs b/drivers/gpu/drm/asahi/driver.rs
new file mode 100644
index 000000000000..d49d8b1934a4
--- /dev/null
+++ b/drivers/gpu/drm/asahi/driver.rs
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Top-level GPU driver implementation.
+
+use kernel::{
+ c_str, device, drm, drm::drv, drm::ioctl, error::Result, of, platform, prelude::*, sync::Arc,
+};
+
+use crate::{debug, file, gem, gpu, hw, regs};
+
+use kernel::device::RawDevice;
+use kernel::macros::vtable;
+
+/// Driver metadata
+const INFO: drv::DriverInfo = drv::DriverInfo {
+ major: 0,
+ minor: 0,
+ patchlevel: 0,
+ name: c_str!("asahi"),
+ desc: c_str!("Apple AGX Graphics"),
+ date: c_str!("20220831"),
+};
+
+/// Device data for the driver registration.
+///
+/// Holds a reference to the top-level `GpuManager` object.
+pub(crate) struct AsahiData {
+ pub(crate) dev: device::Device,
+ pub(crate) gpu: Arc<dyn gpu::GpuManager>,
+}
+
+/// Convenience type alias for the `device::Data` type for this driver.
+type DeviceData = device::Data<drv::Registration<AsahiDriver>, regs::Resources, AsahiData>;
+
+/// Empty struct representing this driver.
+pub(crate) struct AsahiDriver;
+
+/// Convenience type alias for the DRM device type for this driver.
+pub(crate) type AsahiDevice = kernel::drm::device::Device<AsahiDriver>;
+
+/// DRM Driver implementation for `AsahiDriver`.
+#[vtable]
+impl drv::Driver for AsahiDriver {
+ /// Our `DeviceData` type, reference-counted
+ type Data = Arc<DeviceData>;
+ /// Our `File` type.
+ type File = file::File;
+ /// Our `Object` type.
+ type Object = gem::Object;
+
+ const INFO: drv::DriverInfo = INFO;
+ const FEATURES: u32 =
+ drv::FEAT_GEM | drv::FEAT_RENDER | drv::FEAT_SYNCOBJ | drv::FEAT_SYNCOBJ_TIMELINE;
+
+ kernel::declare_drm_ioctls! {
+ (ASAHI_GET_PARAMS, drm_asahi_get_params,
+ ioctl::RENDER_ALLOW, file::File::get_params),
+ (ASAHI_VM_CREATE, drm_asahi_vm_create,
+ ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::vm_create),
+ (ASAHI_VM_DESTROY, drm_asahi_vm_destroy,
+ ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::vm_destroy),
+ (ASAHI_GEM_CREATE, drm_asahi_gem_create,
+ ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::gem_create),
+ (ASAHI_GEM_MMAP_OFFSET, drm_asahi_gem_mmap_offset,
+ ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::gem_mmap_offset),
+ (ASAHI_GEM_BIND, drm_asahi_gem_bind,
+ ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::gem_bind),
+ (ASAHI_QUEUE_CREATE, drm_asahi_queue_create,
+ ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::queue_create),
+ (ASAHI_QUEUE_DESTROY, drm_asahi_queue_destroy,
+ ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::queue_destroy),
+ (ASAHI_SUBMIT, drm_asahi_submit,
+ ioctl::AUTH | ioctl::RENDER_ALLOW, file::File::submit),
+ }
+}
+
+// OF Device ID table.
+kernel::define_of_id_table! {ASAHI_ID_TABLE, &'static hw::HwConfig, [
+ (of::DeviceId::Compatible(b"apple,agx-t8103"), Some(&hw::t8103::HWCONFIG)),
+ (of::DeviceId::Compatible(b"apple,agx-t8112"), Some(&hw::t8112::HWCONFIG)),
+ (of::DeviceId::Compatible(b"apple,agx-t6000"), Some(&hw::t600x::HWCONFIG_T6000)),
+ (of::DeviceId::Compatible(b"apple,agx-t6001"), Some(&hw::t600x::HWCONFIG_T6001)),
+ (of::DeviceId::Compatible(b"apple,agx-t6002"), Some(&hw::t600x::HWCONFIG_T6002)),
+]}
+
+/// Platform Driver implementation for `AsahiDriver`.
+impl platform::Driver for AsahiDriver {
+ /// Our `DeviceData` type, reference-counted
+ type Data = Arc<DeviceData>;
+ /// Data associated with each hardware ID.
+ type IdInfo = &'static hw::HwConfig;
+
+ // Assign the above OF ID table to this driver.
+ kernel::driver_of_id_table!(ASAHI_ID_TABLE);
+
+ /// Device probe function.
+ fn probe(
+ pdev: &mut platform::Device,
+ id_info: Option<&Self::IdInfo>,
+ ) -> Result<Arc<DeviceData>> {
+ debug::update_debug_flags();
+
+ let dev = device::Device::from_dev(pdev);
+
+ dev_info!(dev, "Probing...\n");
+
+ let cfg = id_info.ok_or(ENODEV)?;
+
+ pdev.set_dma_masks((1 << cfg.uat_oas) - 1)?;
+
+ let res = regs::Resources::new(pdev)?;
+
+ // Initialize misc MMIO
+ res.init_mmio()?;
+
+ // Start the coprocessor CPU, so UAT can initialize the handoff
+ res.start_cpu()?;
+
+ let node = dev.of_node().ok_or(EIO)?;
+ let compat: Vec<u32> = node.get_property(c_str!("apple,firmware-compat"))?;
+
+ let reg = drm::drv::Registration::<AsahiDriver>::new(&dev)?;
+ let gpu = match (cfg.gpu_gen, compat.as_slice()) {
+ (hw::GpuGen::G13, &[12, 3, 0]) => {
+ gpu::GpuManagerG13V12_3::new(reg.device(), &res, cfg)? as Arc<dyn gpu::GpuManager>
+ }
+ (hw::GpuGen::G13, &[13, 2, 0]) => {
+ gpu::GpuManagerG13V13_2::new(reg.device(), &res, cfg)? as Arc<dyn gpu::GpuManager>
+ }
+ (hw::GpuGen::G14, &[12, 4, 0]) => {
+ gpu::GpuManagerG14V12_4::new(reg.device(), &res, cfg)? as Arc<dyn gpu::GpuManager>
+ }
+ (hw::GpuGen::G14, &[13, 2, 0]) => {
+ gpu::GpuManagerG14V13_2::new(reg.device(), &res, cfg)? as Arc<dyn gpu::GpuManager>
+ }
+ _ => {
+ dev_info!(
+ dev,
+ "Unsupported GPU/firmware combination ({:?}, {:?})\n",
+ cfg.gpu_gen,
+ compat
+ );
+ return Err(ENODEV);
+ }
+ };
+
+ let data =
+ kernel::new_device_data!(reg, res, AsahiData { dev, gpu }, "Asahi::Registrations")?;
+
+ let data = Arc::<DeviceData>::from(data);
+
+ data.gpu.init()?;
+
+ kernel::drm_device_register!(
+ data.registrations().ok_or(ENXIO)?.as_pinned_mut(),
+ data.clone(),
+ 0
+ )?;
+
+ dev_info!(data.dev, "Probed!\n");
+ Ok(data)
+ }
+}
+
+// Export the OF ID table as a module ID table, to make modpost/autoloading work.
+kernel::module_of_id_table!(MOD_TABLE, ASAHI_ID_TABLE);
diff --git a/drivers/gpu/drm/asahi/event.rs b/drivers/gpu/drm/asahi/event.rs
new file mode 100644
index 000000000000..ccf00e4104be
--- /dev/null
+++ b/drivers/gpu/drm/asahi/event.rs
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU event manager
+//!
+//! The GPU firmware manages work completion by using event objects (Apple calls them "stamps"),
+//! which are monotonically incrementing counters. There are a fixed number of objects, and
+//! they are managed with a `SlotAllocator`.
+//!
+//! This module manages the set of available events and lets users compute expected values.
+//! It also manages signaling owners when the GPU firmware reports that an event fired.
+
+use crate::debug::*;
+use crate::fw::types::*;
+use crate::{gpu, slotalloc, workqueue};
+use core::cmp;
+use core::sync::atomic::Ordering;
+use kernel::prelude::*;
+use kernel::sync::Arc;
+
+const DEBUG_CLASS: DebugFlags = DebugFlags::Event;
+
+/// Number of events managed by the firmware.
+const NUM_EVENTS: u32 = 128;
+
+/// Inner data associated with a given event slot.
+pub(crate) struct EventInner {
+ /// CPU pointer to the driver notification event stamp
+ stamp: *const AtomicU32,
+ /// GPU pointer to the driver notification event stamp
+ gpu_stamp: GpuWeakPointer<Stamp>,
+ /// GPU pointer to the firmware-internal event stamp
+ gpu_fw_stamp: GpuWeakPointer<FwStamp>,
+}
+
+/// SAFETY: The event slots are safe to send across threads.
+unsafe impl Send for EventInner {}
+
+/// Alias for an event token, which allows requesting the same event.
+pub(crate) type Token = slotalloc::SlotToken;
+/// Alias for an allocated `Event` that has a slot.
+pub(crate) type Event = slotalloc::Guard<EventInner>;
+
+/// Represents a given stamp value for an event.
+#[derive(Eq, PartialEq, Copy, Clone, Debug)]
+#[repr(transparent)]
+pub(crate) struct EventValue(u32);
+
+impl EventValue {
+ /// Returns the `EventValue` that succeeds this one.
+ pub(crate) fn next(&self) -> EventValue {
+ EventValue(self.0.wrapping_add(0x100))
+ }
+
+ /// Increments this `EventValue` in place.
+ pub(crate) fn increment(&mut self) {
+ self.0 = self.0.wrapping_add(0x100);
+ }
+
+ /* Not used
+ /// Increments this `EventValue` in place by a certain count.
+ pub(crate) fn add(&mut self, val: u32) {
+ self.0 = self
+ .0
+ .wrapping_add(val.checked_mul(0x100).expect("Adding too many events"));
+ }
+ */
+
+ /// Increments this `EventValue` in place by a certain count.
+ pub(crate) fn sub(&mut self, val: u32) {
+ self.0 = self
+ .0
+ .wrapping_sub(val.checked_mul(0x100).expect("Subtracting too many events"));
+ }
+
+ /// Computes the delta between this event and another event.
+ pub(crate) fn delta(&self, other: &EventValue) -> i32 {
+ (self.0.wrapping_sub(other.0) as i32) >> 8
+ }
+}
+
+impl PartialOrd for EventValue {
+ fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
+ Some(self.cmp(other))
+ }
+}
+
+impl Ord for EventValue {
+ fn cmp(&self, other: &Self) -> cmp::Ordering {
+ self.delta(other).cmp(&0)
+ }
+}
+
+impl EventInner {
+ /// Returns the GPU pointer to the driver notification stamp
+ pub(crate) fn stamp_pointer(&self) -> GpuWeakPointer<Stamp> {
+ self.gpu_stamp
+ }
+
+ /// Returns the GPU pointer to the firmware internal stamp
+ pub(crate) fn fw_stamp_pointer(&self) -> GpuWeakPointer<FwStamp> {
+ self.gpu_fw_stamp
+ }
+
+ /// Fetches the current event value from shared memory
+ pub(crate) fn current(&self) -> EventValue {
+ // SAFETY: The pointer is always valid as constructed in
+ // EventManager below, and outside users cannot construct
+ // new EventInners, nor move or copy them, and Guards as
+ // returned by the SlotAllocator hold a reference to the
+ // SlotAllocator containing the EventManagerInner, which
+ // keeps the GpuObject the stamp is contained within alive.
+ EventValue(unsafe { &*self.stamp }.load(Ordering::Acquire))
+ }
+}
+
+impl slotalloc::SlotItem for EventInner {
+ type Data = EventManagerInner;
+
+ fn release(&mut self, data: &mut Self::Data, slot: u32) {
+ mod_pr_debug!("EventManager: Released slot {}\n", slot);
+ data.owners[slot as usize] = None;
+ }
+}
+
+/// Inner data for the event manager, to be protected by the SlotAllocator lock.
+pub(crate) struct EventManagerInner {
+ stamps: GpuArray<Stamp>,
+ fw_stamps: GpuArray<FwStamp>,
+ // Note: Use dyn to avoid having to version this entire module.
+ owners: Vec<Option<Arc<dyn workqueue::WorkQueue + Send + Sync>>>,
+}
+
+/// Top-level EventManager object.
+pub(crate) struct EventManager {
+ alloc: slotalloc::SlotAllocator<EventInner>,
+}
+
+impl EventManager {
+ /// Create a new EventManager.
+ #[inline(never)]
+ pub(crate) fn new(alloc: &mut gpu::KernelAllocators) -> Result<EventManager> {
+ let mut owners = Vec::new();
+ for _i in 0..(NUM_EVENTS as usize) {
+ owners.try_push(None)?;
+ }
+ let inner = EventManagerInner {
+ stamps: alloc.shared.array_empty(NUM_EVENTS as usize)?,
+ fw_stamps: alloc.private.array_empty(NUM_EVENTS as usize)?,
+ owners,
+ };
+
+ Ok(EventManager {
+ alloc: slotalloc::SlotAllocator::new(
+ NUM_EVENTS,
+ inner,
+ |inner: &mut EventManagerInner, slot| EventInner {
+ stamp: &inner.stamps[slot as usize].0,
+ gpu_stamp: inner.stamps.weak_item_pointer(slot as usize),
+ gpu_fw_stamp: inner.fw_stamps.weak_item_pointer(slot as usize),
+ },
+ )?,
+ })
+ }
+
+ /// Gets a free `Event`, optionally trying to reuse the last one allocated by this caller.
+ pub(crate) fn get(
+ &self,
+ token: Option<Token>,
+ owner: Arc<dyn workqueue::WorkQueue + Send + Sync>,
+ ) -> Result<Event> {
+ let ev = self.alloc.get_inner(token, |inner, ev| {
+ mod_pr_debug!(
+ "EventManager: Registered owner {:p} on slot {}\n",
+ &*owner,
+ ev.slot()
+ );
+ inner.owners[ev.slot() as usize] = Some(owner);
+ Ok(())
+ })?;
+ Ok(ev)
+ }
+
+ /// Signals an event by slot, indicating completion (of one or more commands).
+ pub(crate) fn signal(&self, slot: u32) {
+ match self
+ .alloc
+ .with_inner(|inner| inner.owners[slot as usize].as_ref().cloned())
+ {
+ Some(owner) => {
+ owner.signal();
+ }
+ None => {
+ mod_pr_debug!("EventManager: Received event for empty slot {}\n", slot);
+ }
+ }
+ }
+
+ /// Marks the owner of an event as having lost its work due to a GPU error.
+ pub(crate) fn mark_error(&self, slot: u32, wait_value: u32, error: workqueue::WorkError) {
+ match self
+ .alloc
+ .with_inner(|inner| inner.owners[slot as usize].as_ref().cloned())
+ {
+ Some(owner) => {
+ owner.mark_error(EventValue(wait_value), error);
+ }
+ None => {
+ pr_err!("Received error for empty slot {}\n", slot);
+ }
+ }
+ }
+
+ /// Fail all commands, used when the GPU crashes.
+ pub(crate) fn fail_all(&self, error: workqueue::WorkError) {
+ let mut owners: Vec<Arc<dyn workqueue::WorkQueue + Send + Sync>> = Vec::new();
+
+ self.alloc.with_inner(|inner| {
+ for wq in inner.owners.iter().filter_map(|o| o.as_ref()).cloned() {
+ if owners.try_push(wq).is_err() {
+ pr_err!("Failed to signal failure to WorkQueue\n");
+ }
+ }
+ });
+
+ for wq in owners {
+ wq.fail_all(error);
+ }
+ }
+}
diff --git a/drivers/gpu/drm/asahi/file.rs b/drivers/gpu/drm/asahi/file.rs
new file mode 100644
index 000000000000..5d47feb30134
--- /dev/null
+++ b/drivers/gpu/drm/asahi/file.rs
@@ -0,0 +1,718 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+#![allow(clippy::unusual_byte_groupings)]
+
+//! File implementation, which represents a single DRM client.
+//!
+//! This is in charge of managing the resources associated with one GPU client, including an
+//! arbitrary number of submission queues and Vm objects, and reporting hardware/driver
+//! information to userspace and accepting submissions.
+
+use crate::debug::*;
+use crate::driver::AsahiDevice;
+use crate::{alloc, buffer, driver, gem, mmu, queue};
+use core::mem::MaybeUninit;
+use kernel::dma_fence::RawDmaFence;
+use kernel::drm::gem::BaseObject;
+use kernel::io_buffer::{IoBufferReader, IoBufferWriter};
+use kernel::prelude::*;
+use kernel::sync::{smutex::Mutex, Arc};
+use kernel::user_ptr::UserSlicePtr;
+use kernel::{bindings, dma_fence, drm, xarray};
+
+const DEBUG_CLASS: DebugFlags = DebugFlags::File;
+
+const MAX_SYNCS_PER_SUBMISSION: u32 = 64;
+const MAX_COMMANDS_PER_SUBMISSION: u32 = 64;
+pub(crate) const MAX_COMMANDS_IN_FLIGHT: u32 = 1024;
+
+/// A client instance of an `mmu::Vm` address space.
+struct Vm {
+ ualloc: Arc<Mutex<alloc::DefaultAllocator>>,
+ ualloc_priv: Arc<Mutex<alloc::DefaultAllocator>>,
+ vm: mmu::Vm,
+ dummy_obj: gem::ObjectRef,
+}
+
+impl Drop for Vm {
+ fn drop(&mut self) {
+ // Mappings create a reference loop, make sure to break it.
+ self.dummy_obj.drop_vm_mappings(self.vm.id());
+ }
+}
+
+/// Sync object from userspace.
+pub(crate) struct SyncItem {
+ pub(crate) syncobj: drm::syncobj::SyncObj,
+ pub(crate) fence: Option<dma_fence::Fence>,
+ pub(crate) chain_fence: Option<dma_fence::FenceChain>,
+ pub(crate) timeline_value: u64,
+}
+
+impl SyncItem {
+ fn parse_one(file: &DrmFile, data: bindings::drm_asahi_sync, out: bool) -> Result<SyncItem> {
+ if data.extensions != 0 {
+ return Err(EINVAL);
+ }
+
+ match data.sync_type {
+ bindings::drm_asahi_sync_type_DRM_ASAHI_SYNC_SYNCOBJ => {
+ if data.timeline_value != 0 {
+ return Err(EINVAL);
+ }
+ let syncobj = drm::syncobj::SyncObj::lookup_handle(file, data.handle)?;
+
+ Ok(SyncItem {
+ fence: if out {
+ None
+ } else {
+ Some(syncobj.fence_get().ok_or(EINVAL)?)
+ },
+ syncobj,
+ chain_fence: None,
+ timeline_value: data.timeline_value,
+ })
+ }
+ bindings::drm_asahi_sync_type_DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ => {
+ let syncobj = drm::syncobj::SyncObj::lookup_handle(file, data.handle)?;
+ let fence = if out {
+ None
+ } else {
+ Some(
+ syncobj
+ .fence_get()
+ .ok_or(EINVAL)?
+ .chain_find_seqno(data.timeline_value)?,
+ )
+ };
+
+ Ok(SyncItem {
+ fence,
+ syncobj,
+ chain_fence: if out {
+ Some(dma_fence::FenceChain::new()?)
+ } else {
+ None
+ },
+ timeline_value: data.timeline_value,
+ })
+ }
+ _ => Err(EINVAL),
+ }
+ }
+
+ fn parse_array(file: &DrmFile, ptr: u64, count: u32, out: bool) -> Result<Vec<SyncItem>> {
+ let mut vec = Vec::try_with_capacity(count as usize)?;
+
+ const STRIDE: usize = core::mem::size_of::<bindings::drm_asahi_sync>();
+ let size = STRIDE * count as usize;
+
+ // SAFETY: We only read this once, so there are no TOCTOU issues.
+ let mut reader = unsafe { UserSlicePtr::new(ptr as usize as *mut _, size).reader() };
+
+ for _i in 0..count {
+ let mut sync: MaybeUninit<bindings::drm_asahi_sync> = MaybeUninit::uninit();
+
+ // SAFETY: The size of `sync` is STRIDE
+ unsafe { reader.read_raw(sync.as_mut_ptr() as *mut u8, STRIDE)? };
+
+ // SAFETY: All bit patterns in the struct are valid
+ let sync = unsafe { sync.assume_init() };
+
+ vec.try_push(SyncItem::parse_one(file, sync, out)?)?;
+ }
+
+ Ok(vec)
+ }
+}
+
+/// State associated with a client.
+pub(crate) struct File {
+ id: u64,
+ vms: xarray::XArray<Box<Vm>>,
+ queues: xarray::XArray<Arc<Mutex<Box<dyn queue::Queue>>>>,
+}
+
+/// Convenience type alias for our DRM `File` type.
+pub(crate) type DrmFile = drm::file::File<File>;
+
+/// Start address of the 32-bit USC address space.
+const VM_SHADER_START: u64 = 0x11_00000000;
+/// End address of the 32-bit USC address space.
+const VM_SHADER_END: u64 = 0x11_ffffffff;
+/// Start address of the general user mapping region.
+const VM_USER_START: u64 = 0x20_00000000;
+/// End address of the general user mapping region.
+const VM_USER_END: u64 = 0x5f_ffffffff;
+
+/// Start address of the kernel-managed GPU-only mapping region.
+const VM_DRV_GPU_START: u64 = 0x60_00000000;
+/// End address of the kernel-managed GPU-only mapping region.
+const VM_DRV_GPU_END: u64 = 0x60_ffffffff;
+/// Start address of the kernel-managed GPU/FW shared mapping region.
+const VM_DRV_GPUFW_START: u64 = 0x61_00000000;
+/// End address of the kernel-managed GPU/FW shared mapping region.
+const VM_DRV_GPUFW_END: u64 = 0x61_ffffffff;
+/// Address of a special dummy page?
+const VM_UNK_PAGE: u64 = 0x6f_ffff8000;
+
+impl drm::file::DriverFile for File {
+ type Driver = driver::AsahiDriver;
+
+ /// Create a new `File` instance for a fresh client.
+ fn open(device: &AsahiDevice) -> Result<Box<Self>> {
+ debug::update_debug_flags();
+
+ let gpu = &device.data().gpu;
+ let id = gpu.ids().file.next();
+
+ mod_dev_dbg!(device, "[File {}]: DRM device opened\n", id);
+ Ok(Box::try_new(Self {
+ id,
+ vms: xarray::XArray::new(xarray::flags::ALLOC1)?,
+ queues: xarray::XArray::new(xarray::flags::ALLOC1)?,
+ })?)
+ }
+}
+
+impl File {
+ /// IOCTL: get_param: Get a driver parameter value.
+ pub(crate) fn get_params(
+ device: &AsahiDevice,
+ data: &mut bindings::drm_asahi_get_params,
+ file: &DrmFile,
+ ) -> Result<u32> {
+ mod_dev_dbg!(device, "[File {}]: IOCTL: get_params\n", file.id);
+
+ let gpu = &device.data().gpu;
+
+ if data.extensions != 0 || data.param_group != 0 || data.pad != 0 {
+ return Err(EINVAL);
+ }
+
+ let mut params = bindings::drm_asahi_params_global {
+ unstable_uabi_version: bindings::DRM_ASAHI_UNSTABLE_UABI_VERSION,
+ pad0: 0,
+
+ feat_compat: gpu.get_cfg().gpu_feat_compat,
+ feat_incompat: gpu.get_cfg().gpu_feat_incompat,
+
+ gpu_generation: gpu.get_dyncfg().id.gpu_gen as u32,
+ gpu_variant: gpu.get_dyncfg().id.gpu_variant as u32,
+ gpu_revision: gpu.get_dyncfg().id.gpu_rev as u32,
+ chip_id: gpu.get_cfg().chip_id,
+
+ num_dies: gpu.get_dyncfg().id.max_dies,
+ num_clusters_total: gpu.get_dyncfg().id.num_clusters,
+ num_cores_per_cluster: gpu.get_dyncfg().id.num_cores,
+ num_frags_per_cluster: gpu.get_dyncfg().id.num_frags,
+ num_gps_per_cluster: gpu.get_dyncfg().id.num_gps,
+ num_cores_total_active: gpu.get_dyncfg().id.total_active_cores,
+ core_masks: [0; bindings::DRM_ASAHI_MAX_CLUSTERS as usize],
+
+ vm_page_size: mmu::UAT_PGSZ as u32,
+ pad1: 0,
+ vm_user_start: VM_USER_START,
+ vm_user_end: VM_USER_END,
+ vm_shader_start: VM_SHADER_START,
+ vm_shader_end: VM_SHADER_END,
+
+ max_syncs_per_submission: MAX_SYNCS_PER_SUBMISSION,
+ max_commands_per_submission: MAX_COMMANDS_PER_SUBMISSION,
+ max_commands_in_flight: MAX_COMMANDS_IN_FLIGHT,
+ max_attachments: crate::microseq::MAX_ATTACHMENTS as u32,
+
+ timer_frequency_hz: gpu.get_cfg().base_clock_hz,
+ min_frequency_khz: gpu.get_dyncfg().pwr.min_frequency_khz(),
+ max_frequency_khz: gpu.get_dyncfg().pwr.max_frequency_khz(),
+ max_power_mw: gpu.get_dyncfg().pwr.max_power_mw,
+
+ result_render_size: core::mem::size_of::<bindings::drm_asahi_result_render>() as u32,
+ result_compute_size: core::mem::size_of::<bindings::drm_asahi_result_compute>() as u32,
+ };
+
+ for (i, mask) in gpu.get_dyncfg().id.core_masks.iter().enumerate() {
+ *(params.core_masks.get_mut(i).ok_or(EIO)?) = (*mask).try_into()?;
+ }
+
+ let size =
+ core::mem::size_of::<bindings::drm_asahi_params_global>().min(data.size.try_into()?);
+
+ // SAFETY: We only write to this userptr once, so there are no TOCTOU issues.
+ let mut params_writer =
+ unsafe { UserSlicePtr::new(data.pointer as usize as *mut _, size).writer() };
+
+ // SAFETY: `size` is at most the sizeof of `params`
+ unsafe { params_writer.write_raw(&params as *const _ as *const u8, size)? };
+
+ Ok(0)
+ }
+
+ /// IOCTL: vm_create: Create a new `Vm`.
+ pub(crate) fn vm_create(
+ device: &AsahiDevice,
+ data: &mut bindings::drm_asahi_vm_create,
+ file: &DrmFile,
+ ) -> Result<u32> {
+ if data.extensions != 0 {
+ return Err(EINVAL);
+ }
+
+ let gpu = &device.data().gpu;
+ let file_id = file.id;
+ let vm = gpu.new_vm(file_id)?;
+
+ let resv = file.vms.reserve()?;
+ let id: u32 = resv.index().try_into()?;
+
+ mod_dev_dbg!(device, "[File {} VM {}]: VM Create\n", file_id, id);
+ mod_dev_dbg!(
+ device,
+ "[File {} VM {}]: Creating allocators\n",
+ file_id,
+ id
+ );
+ let ualloc = Arc::try_new(Mutex::new(alloc::DefaultAllocator::new(
+ device,
+ &vm,
+ VM_DRV_GPU_START,
+ VM_DRV_GPU_END,
+ buffer::PAGE_SIZE,
+ mmu::PROT_GPU_SHARED_RW,
+ 512 * 1024,
+ true,
+ fmt!("File {} VM {} GPU Shared", file_id, id),
+ false,
+ )?))?;
+ let ualloc_priv = Arc::try_new(Mutex::new(alloc::DefaultAllocator::new(
+ device,
+ &vm,
+ VM_DRV_GPUFW_START,
+ VM_DRV_GPUFW_END,
+ buffer::PAGE_SIZE,
+ mmu::PROT_GPU_FW_PRIV_RW,
+ 64 * 1024,
+ true,
+ fmt!("File {} VM {} GPU FW Private", file_id, id),
+ false,
+ )?))?;
+
+ mod_dev_dbg!(
+ device,
+ "[File {} VM {}]: Creating dummy object\n",
+ file_id,
+ id
+ );
+ let mut dummy_obj = gem::new_kernel_object(device, 0x4000)?;
+ dummy_obj.vmap()?.as_mut_slice().fill(0);
+ dummy_obj.map_at(&vm, VM_UNK_PAGE, mmu::PROT_GPU_SHARED_RW, true)?;
+
+ mod_dev_dbg!(device, "[File {} VM {}]: VM created\n", file_id, id);
+ resv.store(Box::try_new(Vm {
+ ualloc,
+ ualloc_priv,
+ vm,
+ dummy_obj,
+ })?)?;
+
+ data.vm_id = id;
+
+ Ok(0)
+ }
+
+ /// IOCTL: vm_destroy: Destroy a `Vm`.
+ pub(crate) fn vm_destroy(
+ _device: &AsahiDevice,
+ data: &mut bindings::drm_asahi_vm_destroy,
+ file: &DrmFile,
+ ) -> Result<u32> {
+ if data.extensions != 0 {
+ return Err(EINVAL);
+ }
+
+ if file.vms.remove(data.vm_id as usize).is_none() {
+ Err(ENOENT)
+ } else {
+ Ok(0)
+ }
+ }
+
+ /// IOCTL: gem_create: Create a new GEM object.
+ pub(crate) fn gem_create(
+ device: &AsahiDevice,
+ data: &mut bindings::drm_asahi_gem_create,
+ file: &DrmFile,
+ ) -> Result<u32> {
+ mod_dev_dbg!(
+ device,
+ "[File {}]: IOCTL: gem_create size={:#x?}\n",
+ file.id,
+ data.size
+ );
+
+ if data.extensions != 0
+ || (data.flags & !(bindings::ASAHI_GEM_WRITEBACK | bindings::ASAHI_GEM_VM_PRIVATE)) != 0
+ || (data.flags & bindings::ASAHI_GEM_VM_PRIVATE == 0 && data.vm_id != 0)
+ {
+ return Err(EINVAL);
+ }
+
+ let vm_id = if data.flags & bindings::ASAHI_GEM_VM_PRIVATE != 0 {
+ Some(file.vms.get(data.vm_id.try_into()?).ok_or(ENOENT)?.vm.id())
+ } else {
+ None
+ };
+
+ let bo = gem::new_object(device, data.size.try_into()?, data.flags, vm_id)?;
+
+ let handle = bo.gem.create_handle(file)?;
+ data.handle = handle;
+
+ mod_dev_dbg!(
+ device,
+ "[File {}]: IOCTL: gem_create size={:#x} handle={:#x?}\n",
+ file.id,
+ data.size,
+ data.handle
+ );
+
+ Ok(0)
+ }
+
+ /// IOCTL: gem_mmap_offset: Assign an mmap offset to a GEM object.
+ pub(crate) fn gem_mmap_offset(
+ device: &AsahiDevice,
+ data: &mut bindings::drm_asahi_gem_mmap_offset,
+ file: &DrmFile,
+ ) -> Result<u32> {
+ mod_dev_dbg!(
+ device,
+ "[File {}]: IOCTL: gem_mmap_offset handle={:#x?}\n",
+ file.id,
+ data.handle
+ );
+
+ if data.extensions != 0 || data.flags != 0 {
+ return Err(EINVAL);
+ }
+
+ let bo = gem::lookup_handle(file, data.handle)?;
+ data.offset = bo.gem.create_mmap_offset()?;
+ Ok(0)
+ }
+
+ /// IOCTL: gem_bind: Map or unmap a GEM object into a Vm.
+ pub(crate) fn gem_bind(
+ device: &AsahiDevice,
+ data: &mut bindings::drm_asahi_gem_bind,
+ file: &DrmFile,
+ ) -> Result<u32> {
+ mod_dev_dbg!(
+ device,
+ "[File {} VM {}]: IOCTL: gem_bind op={:?} handle={:#x?} flags={:#x?} {:#x?}:{:#x?} -> {:#x?}\n",
+ file.id,
+ data.op,
+ data.vm_id,
+ data.handle,
+ data.flags,
+ data.offset,
+ data.range,
+ data.addr
+ );
+
+ if data.extensions != 0 {
+ return Err(EINVAL);
+ }
+
+ match data.op {
+ bindings::drm_asahi_bind_op_ASAHI_BIND_OP_BIND => Self::do_gem_bind(device, data, file),
+ bindings::drm_asahi_bind_op_ASAHI_BIND_OP_UNBIND => Err(ENOTSUPP),
+ bindings::drm_asahi_bind_op_ASAHI_BIND_OP_UNBIND_ALL => {
+ Self::do_gem_unbind_all(device, data, file)
+ }
+ _ => Err(EINVAL),
+ }
+ }
+
+ pub(crate) fn do_gem_bind(
+ _device: &AsahiDevice,
+ data: &mut bindings::drm_asahi_gem_bind,
+ file: &DrmFile,
+ ) -> Result<u32> {
+ if data.offset != 0 {
+ return Err(EINVAL); // Not supported yet
+ }
+
+ if (data.addr | data.range) as usize & mmu::UAT_PGMSK != 0 {
+ return Err(EINVAL); // Must be page aligned
+ }
+
+ if (data.flags & !(bindings::ASAHI_BIND_READ | bindings::ASAHI_BIND_WRITE)) != 0 {
+ return Err(EINVAL);
+ }
+
+ let mut bo = gem::lookup_handle(file, data.handle)?;
+
+ if data.range != bo.size().try_into()? {
+ return Err(EINVAL); // Not supported yet
+ }
+
+ let start = data.addr;
+ let end = data.addr + data.range - 1;
+
+ if (VM_SHADER_START..=VM_SHADER_END).contains(&start) {
+ if !(VM_SHADER_START..=VM_SHADER_END).contains(&end) {
+ return Err(EINVAL); // Invalid map range
+ }
+ } else if (VM_USER_START..=VM_USER_END).contains(&start) {
+ if !(VM_USER_START..=VM_USER_END).contains(&end) {
+ return Err(EINVAL); // Invalid map range
+ }
+ } else {
+ return Err(EINVAL); // Invalid map range
+ }
+
+ // Just in case
+ if end >= VM_DRV_GPU_START {
+ return Err(EINVAL);
+ }
+
+ let prot = if data.flags & bindings::ASAHI_BIND_READ != 0 {
+ if data.flags & bindings::ASAHI_BIND_WRITE != 0 {
+ mmu::PROT_GPU_SHARED_RW
+ } else {
+ mmu::PROT_GPU_SHARED_RO
+ }
+ } else if data.flags & bindings::ASAHI_BIND_WRITE != 0 {
+ mmu::PROT_GPU_SHARED_WO
+ } else {
+ return Err(EINVAL); // Must specify one of ASAHI_BIND_{READ,WRITE}
+ };
+
+ // Clone it immediately so we aren't holding the XArray lock
+ let vm = file
+ .vms
+ .get(data.vm_id.try_into()?)
+ .ok_or(ENOENT)?
+ .vm
+ .clone();
+
+ bo.map_at(&vm, start, prot, true)?;
+
+ Ok(0)
+ }
+
+ pub(crate) fn do_gem_unbind_all(
+ _device: &AsahiDevice,
+ data: &mut bindings::drm_asahi_gem_bind,
+ file: &DrmFile,
+ ) -> Result<u32> {
+ if data.flags != 0 || data.offset != 0 || data.range != 0 || data.addr != 0 {
+ return Err(EINVAL);
+ }
+
+ let mut bo = gem::lookup_handle(file, data.handle)?;
+
+ if data.vm_id == 0 {
+ bo.drop_file_mappings(file.id);
+ } else {
+ let vm_id = file.vms.get(data.vm_id.try_into()?).ok_or(ENOENT)?.vm.id();
+ bo.drop_vm_mappings(vm_id);
+ }
+
+ Ok(0)
+ }
+
+ /// IOCTL: queue_create: Create a new command submission queue of a given type.
+ pub(crate) fn queue_create(
+ device: &AsahiDevice,
+ data: &mut bindings::drm_asahi_queue_create,
+ file: &DrmFile,
+ ) -> Result<u32> {
+ let file_id = file.id;
+
+ mod_dev_dbg!(
+ device,
+ "[File {} VM {}]: Creating queue caps={:?} prio={:?} flags={:#x?}\n",
+ file_id,
+ data.vm_id,
+ data.queue_caps,
+ data.priority,
+ data.flags,
+ );
+
+ if data.extensions != 0
+ || data.flags != 0
+ || data.priority > 3
+ || data.queue_caps == 0
+ || (data.queue_caps
+ & !(bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_RENDER
+ | bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_BLIT
+ | bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_COMPUTE))
+ != 0
+ {
+ return Err(EINVAL);
+ }
+
+ let resv = file.queues.reserve()?;
+ let file_vm = file.vms.get(data.vm_id.try_into()?).ok_or(ENOENT)?;
+ let vm = file_vm.vm.clone();
+ let ualloc = file_vm.ualloc.clone();
+ let ualloc_priv = file_vm.ualloc_priv.clone();
+ // Drop the vms lock eagerly
+ core::mem::drop(file_vm);
+
+ let queue =
+ device
+ .data()
+ .gpu
+ .new_queue(vm, ualloc, ualloc_priv, data.priority, data.queue_caps)?;
+
+ data.queue_id = resv.index().try_into()?;
+ resv.store(Arc::try_new(Mutex::new(queue))?)?;
+
+ Ok(0)
+ }
+
+ /// IOCTL: queue_destroy: Destroy a command submission queue.
+ pub(crate) fn queue_destroy(
+ _device: &AsahiDevice,
+ data: &mut bindings::drm_asahi_queue_destroy,
+ file: &DrmFile,
+ ) -> Result<u32> {
+ if data.extensions != 0 {
+ return Err(EINVAL);
+ }
+
+ if file.queues.remove(data.queue_id as usize).is_none() {
+ Err(ENOENT)
+ } else {
+ Ok(0)
+ }
+ }
+
+ /// IOCTL: submit: Submit GPU work to a command submission queue.
+ pub(crate) fn submit(
+ device: &AsahiDevice,
+ data: &mut bindings::drm_asahi_submit,
+ file: &DrmFile,
+ ) -> Result<u32> {
+ if data.extensions != 0
+ || data.flags != 0
+ || data.in_sync_count > MAX_SYNCS_PER_SUBMISSION
+ || data.out_sync_count > MAX_SYNCS_PER_SUBMISSION
+ || data.command_count > MAX_COMMANDS_PER_SUBMISSION
+ {
+ return Err(EINVAL);
+ }
+
+ debug::update_debug_flags();
+
+ let gpu = &device.data().gpu;
+ gpu.update_globals();
+
+ // Upgrade to Arc<T> to drop the XArray lock early
+ let queue: Arc<Mutex<Box<dyn queue::Queue>>> = file
+ .queues
+ .get(data.queue_id.try_into()?)
+ .ok_or(ENOENT)?
+ .borrow()
+ .into();
+
+ let id = gpu.ids().submission.next();
+ mod_dev_dbg!(
+ device,
+ "[File {} Queue {}]: IOCTL: submit (submission ID: {})\n",
+ file.id,
+ data.queue_id,
+ id
+ );
+
+ mod_dev_dbg!(
+ device,
+ "[File {} Queue {}]: IOCTL: submit({}): Parsing in_syncs\n",
+ file.id,
+ data.queue_id,
+ id
+ );
+ let in_syncs = SyncItem::parse_array(file, data.in_syncs, data.in_sync_count, false)?;
+ mod_dev_dbg!(
+ device,
+ "[File {} Queue {}]: IOCTL: submit({}): Parsing out_syncs\n",
+ file.id,
+ data.queue_id,
+ id
+ );
+ let out_syncs = SyncItem::parse_array(file, data.out_syncs, data.out_sync_count, true)?;
+
+ let result_buf = if data.result_handle != 0 {
+ mod_dev_dbg!(
+ device,
+ "[File {} Queue {}]: IOCTL: submit({}): Looking up result_handle {}\n",
+ file.id,
+ data.queue_id,
+ id,
+ data.result_handle
+ );
+ Some(gem::lookup_handle(file, data.result_handle)?)
+ } else {
+ None
+ };
+
+ mod_dev_dbg!(
+ device,
+ "[File {} Queue {}]: IOCTL: submit({}): Parsing commands\n",
+ file.id,
+ data.queue_id,
+ id
+ );
+ let mut commands = Vec::try_with_capacity(data.command_count as usize)?;
+
+ const STRIDE: usize = core::mem::size_of::<bindings::drm_asahi_command>();
+ let size = STRIDE * data.command_count as usize;
+
+ // SAFETY: We only read this once, so there are no TOCTOU issues.
+ let mut reader =
+ unsafe { UserSlicePtr::new(data.commands as usize as *mut _, size).reader() };
+
+ for _i in 0..data.command_count {
+ let mut cmd: MaybeUninit<bindings::drm_asahi_command> = MaybeUninit::uninit();
+
+ // SAFETY: The size of `sync` is STRIDE
+ unsafe { reader.read_raw(cmd.as_mut_ptr() as *mut u8, STRIDE)? };
+
+ // SAFETY: All bit patterns in the struct are valid
+ commands.try_push(unsafe { cmd.assume_init() })?;
+ }
+
+ let ret = queue
+ .lock()
+ .submit(id, in_syncs, out_syncs, result_buf, commands);
+
+ match ret {
+ Err(ERESTARTSYS) => Err(ERESTARTSYS),
+ Err(e) => {
+ dev_info!(
+ device,
+ "[File {} Queue {}]: IOCTL: submit failed! (submission ID: {} err: {:?})\n",
+ file.id,
+ data.queue_id,
+ id,
+ e
+ );
+ Err(e)
+ }
+ Ok(_) => Ok(0),
+ }
+ }
+
+ /// Returns the unique file ID for this `File`.
+ pub(crate) fn file_id(&self) -> u64 {
+ self.id
+ }
+}
+
+impl Drop for File {
+ fn drop(&mut self) {
+ mod_pr_debug!("[File {}]: Closing...\n", self.id);
+ }
+}
diff --git a/drivers/gpu/drm/asahi/float.rs b/drivers/gpu/drm/asahi/float.rs
new file mode 100644
index 000000000000..e73b4b628cf9
--- /dev/null
+++ b/drivers/gpu/drm/asahi/float.rs
@@ -0,0 +1,381 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Basic soft floating-point support
+//!
+//! The GPU firmware requires a large number of power-related configuration values, many of which
+//! are IEEE 754 32-bit floating point values. These values change not only between GPU/SoC
+//! variants, but also between specific hardware platforms using these SoCs, so they must be
+//! derived from device tree properties. There are many redundant values computed from the same
+//! inputs with simple add/sub/mul/div calculations, plus a few values that are actually specific
+//! to each individual device depending on its binning and fused voltage configuration, so it
+//! doesn't make sense to store the final values to be passed to the firmware in the device tree.
+//!
+//! Therefore, we need a way to perform floating-point calculations in the kernel.
+//!
+//! Using the actual FPU from kernel mode is asking for trouble, since there is no way to bound
+//! the execution of FPU instructions to a controlled section of code without outright putting it
+//! in its own compilation unit, which is quite painful for Rust. Since these calculations only
+//! have to happen at initialization time and there is no need for performance, let's use a simple
+//! software float implementation instead.
+//!
+//! This implementation makes no attempt to be fully IEEE754 compliant, but it's good enough and
+//! gives bit-identical results to macOS in the vast majority of cases, with one or two exceptions
+//! related to slightly non-compliant rounding.
+
+use core::ops;
+use kernel::{of, prelude::*};
+
+/// An IEEE754-compatible floating point number implemented in software.
+#[derive(Default, Debug, Copy, Clone)]
+pub(crate) struct F32(u32);
+
+#[derive(Default, Debug, Copy, Clone)]
+struct F32U {
+ sign: bool,
+ exp: i32,
+ frac: i64,
+}
+
+impl F32 {
+ /// Convert a raw 32-bit representation into an F32
+ pub(crate) const fn from_bits(u: u32) -> F32 {
+ F32(u)
+ }
+
+ // Convert a `f32` value into an F32
+ //
+ // This must ONLY be used in const context. Use the `f32!{}` macro to do it safely.
+ #[doc(hidden)]
+ pub(crate) const fn from_f32(v: f32) -> F32 {
+ F32(unsafe { core::mem::transmute(v) })
+ }
+
+ // Convert an F32 into a `f32` value
+ //
+ // For testing only.
+ #[doc(hidden)]
+ #[cfg(test)]
+ pub(crate) fn to_f32(self) -> f32 {
+ f32::from_bits(self.0)
+ }
+
+ const fn unpack(&self) -> F32U {
+ F32U {
+ sign: self.0 & (1 << 31) != 0,
+ exp: ((self.0 >> 23) & 0xff) as i32 - 127,
+ frac: (((self.0 & 0x7fffff) | 0x800000) as i64) << 9,
+ }
+ .norm()
+ }
+}
+
+/// Safely construct an `F32` out of a constant floating-point value.
+///
+/// This ensures that the conversion happens in const context, so no floating point operations are
+/// emitted.
+#[macro_export]
+macro_rules! f32 {
+ ([$($val:expr),*]) => {{
+ [$(f32!($val)),*]
+ }};
+ ($val:expr) => {{
+ const _K: $crate::float::F32 = $crate::float::F32::from_f32($val);
+ _K
+ }};
+}
+
+impl ops::Neg for F32 {
+ type Output = F32;
+
+ fn neg(self) -> F32 {
+ F32(self.0 ^ (1 << 31))
+ }
+}
+
+impl ops::Add<F32> for F32 {
+ type Output = F32;
+
+ fn add(self, rhs: F32) -> F32 {
+ self.unpack().add(rhs.unpack()).pack()
+ }
+}
+
+impl ops::Sub<F32> for F32 {
+ type Output = F32;
+
+ fn sub(self, rhs: F32) -> F32 {
+ self.unpack().add((-rhs).unpack()).pack()
+ }
+}
+
+impl ops::Mul<F32> for F32 {
+ type Output = F32;
+
+ fn mul(self, rhs: F32) -> F32 {
+ self.unpack().mul(rhs.unpack()).pack()
+ }
+}
+
+impl ops::Div<F32> for F32 {
+ type Output = F32;
+
+ fn div(self, rhs: F32) -> F32 {
+ self.unpack().div(rhs.unpack()).pack()
+ }
+}
+
+macro_rules! from_ints {
+ ($u:ty, $i:ty) => {
+ impl From<$i> for F32 {
+ fn from(v: $i) -> F32 {
+ F32U::from_i64(v as i64).pack()
+ }
+ }
+ impl From<$u> for F32 {
+ fn from(v: $u) -> F32 {
+ F32U::from_u64(v as u64).pack()
+ }
+ }
+ };
+}
+
+from_ints!(u8, i8);
+from_ints!(u16, i16);
+from_ints!(u32, i32);
+from_ints!(u64, i64);
+
+impl F32U {
+ const INFINITY: F32U = f32!(f32::INFINITY).unpack();
+ const NEG_INFINITY: F32U = f32!(f32::NEG_INFINITY).unpack();
+
+ fn from_i64(v: i64) -> F32U {
+ F32U {
+ sign: v < 0,
+ exp: 32,
+ frac: v.abs(),
+ }
+ .norm()
+ }
+
+ fn from_u64(mut v: u64) -> F32U {
+ let mut exp = 32;
+ if v >= (1 << 63) {
+ exp = 31;
+ v >>= 1;
+ }
+ F32U {
+ sign: false,
+ exp,
+ frac: v as i64,
+ }
+ .norm()
+ }
+
+ fn shr(&mut self, shift: i32) {
+ if shift > 63 {
+ self.exp = 0;
+ self.frac = 0;
+ } else {
+ self.frac >>= shift;
+ }
+ }
+
+ fn align(a: &mut F32U, b: &mut F32U) {
+ if a.exp > b.exp {
+ b.shr(a.exp - b.exp);
+ b.exp = a.exp;
+ } else {
+ a.shr(b.exp - a.exp);
+ a.exp = b.exp;
+ }
+ }
+
+ fn mul(self, other: F32U) -> F32U {
+ F32U {
+ sign: self.sign != other.sign,
+ exp: self.exp + other.exp,
+ frac: ((self.frac >> 8) * (other.frac >> 8)) >> 16,
+ }
+ }
+
+ fn div(self, other: F32U) -> F32U {
+ if other.frac == 0 || self.is_inf() {
+ if self.sign {
+ F32U::NEG_INFINITY
+ } else {
+ F32U::INFINITY
+ }
+ } else {
+ F32U {
+ sign: self.sign != other.sign,
+ exp: self.exp - other.exp,
+ frac: ((self.frac << 24) / (other.frac >> 8)),
+ }
+ }
+ }
+
+ fn add(mut self, mut other: F32U) -> F32U {
+ F32U::align(&mut self, &mut other);
+ if self.sign == other.sign {
+ self.frac += other.frac;
+ } else {
+ self.frac -= other.frac;
+ }
+ if self.frac < 0 {
+ self.sign = !self.sign;
+ self.frac = -self.frac;
+ }
+ self
+ }
+
+ const fn norm(mut self) -> F32U {
+ let lz = self.frac.leading_zeros() as i32;
+ if lz > 31 {
+ self.frac <<= lz - 31;
+ self.exp -= lz - 31;
+ } else if lz < 31 {
+ self.frac >>= 31 - lz;
+ self.exp += 31 - lz;
+ }
+
+ if self.is_zero() {
+ return F32U {
+ sign: self.sign,
+ frac: 0,
+ exp: 0,
+ };
+ }
+ self
+ }
+
+ const fn is_zero(&self) -> bool {
+ self.frac == 0 || self.exp < -126
+ }
+
+ const fn is_inf(&self) -> bool {
+ self.exp > 127
+ }
+
+ const fn pack(mut self) -> F32 {
+ self = self.norm();
+ if !self.is_zero() {
+ self.frac += 0x100;
+ self = self.norm();
+ }
+
+ if self.is_inf() {
+ if self.sign {
+ return f32!(f32::NEG_INFINITY);
+ } else {
+ return f32!(f32::INFINITY);
+ }
+ } else if self.is_zero() {
+ if self.sign {
+ return f32!(-0.0);
+ } else {
+ return f32!(0.0);
+ }
+ }
+
+ F32(if self.sign { 1u32 << 31 } else { 0u32 }
+ | ((self.exp + 127) as u32) << 23
+ | ((self.frac >> 9) & 0x7fffff) as u32)
+ }
+}
+
+impl<'a> TryFrom<of::Property<'a>> for F32 {
+ type Error = Error;
+
+ fn try_from(p: of::Property<'_>) -> core::result::Result<F32, Self::Error> {
+ let bits: u32 = p.try_into()?;
+ Ok(F32::from_bits(bits))
+ }
+}
+
+impl of::PropertyUnit for F32 {
+ const UNIT_SIZE: usize = 4;
+
+ fn from_bytes(data: &[u8]) -> Result<Self> {
+ Ok(F32::from_bits(<u32 as of::PropertyUnit>::from_bytes(data)?))
+ }
+}
+
+// TODO: Make this an actual test and figure out how to make it run.
+#[cfg(test)]
+mod tests {
+ #[test]
+ fn test_all() {
+ fn add(a: f32, b: f32) {
+ println!(
+ "{} + {} = {} {}",
+ a,
+ b,
+ (F32::from_f32(a) + F32::from_f32(b)).to_f32(),
+ a + b
+ );
+ }
+ fn sub(a: f32, b: f32) {
+ println!(
+ "{} - {} = {} {}",
+ a,
+ b,
+ (F32::from_f32(a) - F32::from_f32(b)).to_f32(),
+ a - b
+ );
+ }
+ fn mul(a: f32, b: f32) {
+ println!(
+ "{} * {} = {} {}",
+ a,
+ b,
+ (F32::from_f32(a) * F32::from_f32(b)).to_f32(),
+ a * b
+ );
+ }
+ fn div(a: f32, b: f32) {
+ println!(
+ "{} / {} = {} {}",
+ a,
+ b,
+ (F32::from_f32(a) / F32::from_f32(b)).to_f32(),
+ a / b
+ );
+ }
+
+ fn test(a: f32, b: f32) {
+ add(a, b);
+ sub(a, b);
+ mul(a, b);
+ div(a, b);
+ }
+
+ test(1.123, 7.567);
+ test(1.123, 1.456);
+ test(7.567, 1.123);
+ test(1.123, -7.567);
+ test(1.123, -1.456);
+ test(7.567, -1.123);
+ test(-1.123, -7.567);
+ test(-1.123, -1.456);
+ test(-7.567, -1.123);
+ test(1000.123, 0.001);
+ test(1000.123, 0.0000001);
+ test(0.0012, 1000.123);
+ test(0.0000001, 1000.123);
+ test(0., 0.);
+ test(0., 1.);
+ test(1., 0.);
+ test(1., 1.);
+ test(2., f32::INFINITY);
+ test(2., f32::NEG_INFINITY);
+ test(f32::INFINITY, 2.);
+ test(f32::NEG_INFINITY, 2.);
+ test(f32::NEG_INFINITY, 2.);
+ test(f32::MAX, 2.);
+ test(f32::MIN, 2.);
+ test(f32::MIN_POSITIVE, 2.);
+ test(2., f32::MAX);
+ test(2., f32::MIN);
+ test(2., f32::MIN_POSITIVE);
+ }
+}
diff --git a/drivers/gpu/drm/asahi/fw/buffer.rs b/drivers/gpu/drm/asahi/fw/buffer.rs
new file mode 100644
index 000000000000..a8a467879518
--- /dev/null
+++ b/drivers/gpu/drm/asahi/fw/buffer.rs
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU tiled vertex buffer control firmware structures
+
+use super::types::*;
+use super::workqueue;
+use crate::{default_zeroed, no_debug, trivial_gpustruct};
+use kernel::sync::Arc;
+
+pub(crate) mod raw {
+ use super::*;
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct BlockControl {
+ pub(crate) total: AtomicU32,
+ pub(crate) wptr: AtomicU32,
+ pub(crate) unk: AtomicU32,
+ pub(crate) pad: Pad<0x34>,
+ }
+ default_zeroed!(BlockControl);
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct Counter {
+ pub(crate) count: AtomicU32,
+ __pad: Pad<0x3c>,
+ }
+ default_zeroed!(Counter);
+
+ #[derive(Debug, Default)]
+ #[repr(C)]
+ pub(crate) struct Stats {
+ pub(crate) max_pages: AtomicU32,
+ pub(crate) max_b: AtomicU32,
+ pub(crate) overflow_count: AtomicU32,
+ pub(crate) gpu_c: AtomicU32,
+ pub(crate) __pad0: Pad<0x10>,
+ pub(crate) reset: AtomicU32,
+ pub(crate) __pad1: Pad<0x1c>,
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct Info<'a> {
+ pub(crate) gpu_counter: u32,
+ pub(crate) unk_4: u32,
+ pub(crate) last_id: i32,
+ pub(crate) cur_id: i32,
+ pub(crate) unk_10: u32,
+ pub(crate) gpu_counter2: u32,
+ pub(crate) unk_18: u32,
+
+ #[ver(V < V13_0B4)]
+ pub(crate) unk_1c: u32,
+
+ pub(crate) page_list: GpuPointer<'a, &'a [u32]>,
+ pub(crate) page_list_size: u32,
+ pub(crate) page_count: AtomicU32,
+ pub(crate) max_blocks: u32,
+ pub(crate) block_count: AtomicU32,
+ pub(crate) unk_38: u32,
+ pub(crate) block_list: GpuPointer<'a, &'a [u32]>,
+ pub(crate) block_ctl: GpuPointer<'a, super::BlockControl>,
+ pub(crate) last_page: AtomicU32,
+ pub(crate) gpu_page_ptr1: u32,
+ pub(crate) gpu_page_ptr2: u32,
+ pub(crate) unk_58: u32,
+ pub(crate) block_size: u32,
+ pub(crate) unk_60: U64,
+ pub(crate) counter: GpuPointer<'a, super::Counter>,
+ pub(crate) unk_70: u32,
+ pub(crate) unk_74: u32,
+ pub(crate) unk_78: u32,
+ pub(crate) unk_7c: u32,
+ pub(crate) unk_80: u32,
+ pub(crate) max_pages: u32,
+ pub(crate) max_pages_nomemless: u32,
+ pub(crate) unk_8c: u32,
+ pub(crate) unk_90: Array<0x30, u8>,
+ }
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct Scene<'a> {
+ pub(crate) pass_page_count: AtomicU32,
+ pub(crate) unk_4: u32,
+ pub(crate) unk_8: U64,
+ pub(crate) unk_10: U64,
+ pub(crate) user_buffer: GpuPointer<'a, &'a [u8]>,
+ pub(crate) unk_20: u32,
+ pub(crate) stats: GpuWeakPointer<super::Stats>,
+ pub(crate) total_page_count: AtomicU32,
+ pub(crate) unk_30: U64, // pad
+ pub(crate) unk_38: U64, // pad
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct InitBuffer<'a> {
+ pub(crate) tag: workqueue::CommandType,
+ pub(crate) vm_slot: u32,
+ pub(crate) buffer_slot: u32,
+ pub(crate) unk_c: u32,
+ pub(crate) block_count: u32,
+ pub(crate) buffer: GpuPointer<'a, super::Info::ver>,
+ pub(crate) stamp_value: EventValue,
+ }
+}
+
+trivial_gpustruct!(BlockControl);
+trivial_gpustruct!(Counter);
+trivial_gpustruct!(Stats);
+
+#[versions(AGX)]
+#[derive(Debug)]
+pub(crate) struct Info {
+ pub(crate) block_ctl: GpuObject<BlockControl>,
+ pub(crate) counter: GpuObject<Counter>,
+ pub(crate) page_list: GpuArray<u32>,
+ pub(crate) block_list: GpuArray<u32>,
+}
+
+#[versions(AGX)]
+impl GpuStruct for Info::ver {
+ type Raw<'a> = raw::Info::ver<'a>;
+}
+
+pub(crate) struct ClusterBuffers {
+ pub(crate) tilemaps: GpuArray<u8>,
+ pub(crate) meta: GpuArray<u8>,
+}
+
+#[versions(AGX)]
+pub(crate) struct Scene {
+ pub(crate) user_buffer: GpuArray<u8>,
+ pub(crate) buffer: crate::buffer::Buffer::ver,
+ pub(crate) tvb_heapmeta: GpuArray<u8>,
+ pub(crate) tvb_tilemap: GpuArray<u8>,
+ pub(crate) tpc: Arc<GpuArray<u8>>,
+ pub(crate) clustering: Option<ClusterBuffers>,
+ pub(crate) preempt_buf: GpuArray<u8>,
+ pub(crate) seq_buf: GpuArray<u64>,
+}
+
+#[versions(AGX)]
+no_debug!(Scene::ver);
+
+#[versions(AGX)]
+impl GpuStruct for Scene::ver {
+ type Raw<'a> = raw::Scene<'a>;
+}
+
+#[versions(AGX)]
+pub(crate) struct InitBuffer {
+ pub(crate) scene: Arc<crate::buffer::Scene::ver>,
+}
+
+#[versions(AGX)]
+no_debug!(InitBuffer::ver);
+
+#[versions(AGX)]
+impl workqueue::Command for InitBuffer::ver {}
+
+#[versions(AGX)]
+impl GpuStruct for InitBuffer::ver {
+ type Raw<'a> = raw::InitBuffer::ver<'a>;
+}
diff --git a/drivers/gpu/drm/asahi/fw/channels.rs b/drivers/gpu/drm/asahi/fw/channels.rs
new file mode 100644
index 000000000000..db5ac9a3ded5
--- /dev/null
+++ b/drivers/gpu/drm/asahi/fw/channels.rs
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU communication channel firmware structures (ring buffers)
+
+use super::types::*;
+use crate::default_zeroed;
+use core::sync::atomic::Ordering;
+
+pub(crate) mod raw {
+ use super::*;
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct ChannelState<'a> {
+ pub(crate) read_ptr: AtomicU32,
+ __pad0: Pad<0x1c>,
+ pub(crate) write_ptr: AtomicU32,
+ __pad1: Pad<0xc>,
+ _p: PhantomData<&'a ()>,
+ }
+ default_zeroed!(<'a>, ChannelState<'a>);
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct FwCtlChannelState<'a> {
+ pub(crate) read_ptr: AtomicU32,
+ __pad0: Pad<0xc>,
+ pub(crate) write_ptr: AtomicU32,
+ __pad1: Pad<0xc>,
+ _p: PhantomData<&'a ()>,
+ }
+ default_zeroed!(<'a>, FwCtlChannelState<'a>);
+}
+
+pub(crate) trait RxChannelState: GpuStruct + Debug + Default
+where
+ for<'a> <Self as GpuStruct>::Raw<'a>: Default + Zeroed,
+{
+ const SUB_CHANNELS: usize;
+
+ fn wptr(raw: &Self::Raw<'_>, index: usize) -> u32;
+ fn set_rptr(raw: &Self::Raw<'_>, index: usize, rptr: u32);
+}
+
+#[derive(Debug, Default)]
+pub(crate) struct ChannelState {}
+
+impl GpuStruct for ChannelState {
+ type Raw<'a> = raw::ChannelState<'a>;
+}
+
+impl RxChannelState for ChannelState {
+ const SUB_CHANNELS: usize = 1;
+
+ fn wptr(raw: &Self::Raw<'_>, _index: usize) -> u32 {
+ raw.write_ptr.load(Ordering::Acquire)
+ }
+
+ fn set_rptr(raw: &Self::Raw<'_>, _index: usize, rptr: u32) {
+ raw.read_ptr.store(rptr, Ordering::Release);
+ }
+}
+
+#[derive(Debug, Default)]
+pub(crate) struct FwLogChannelState {}
+
+impl GpuStruct for FwLogChannelState {
+ type Raw<'a> = Array<6, raw::ChannelState<'a>>;
+}
+
+impl RxChannelState for FwLogChannelState {
+ const SUB_CHANNELS: usize = 6;
+
+ fn wptr(raw: &Self::Raw<'_>, index: usize) -> u32 {
+ raw[index].write_ptr.load(Ordering::Acquire)
+ }
+
+ fn set_rptr(raw: &Self::Raw<'_>, index: usize, rptr: u32) {
+ raw[index].read_ptr.store(rptr, Ordering::Release);
+ }
+}
+
+#[derive(Debug, Default)]
+pub(crate) struct FwCtlChannelState {}
+
+impl GpuStruct for FwCtlChannelState {
+ type Raw<'a> = raw::FwCtlChannelState<'a>;
+}
+
+pub(crate) trait TxChannelState: GpuStruct + Debug + Default {
+ fn rptr(raw: &Self::Raw<'_>) -> u32;
+ fn set_wptr(raw: &Self::Raw<'_>, wptr: u32);
+}
+
+impl TxChannelState for ChannelState {
+ fn rptr(raw: &Self::Raw<'_>) -> u32 {
+ raw.read_ptr.load(Ordering::Acquire)
+ }
+
+ fn set_wptr(raw: &Self::Raw<'_>, wptr: u32) {
+ raw.write_ptr.store(wptr, Ordering::Release);
+ }
+}
+
+impl TxChannelState for FwCtlChannelState {
+ fn rptr(raw: &Self::Raw<'_>) -> u32 {
+ raw.read_ptr.load(Ordering::Acquire)
+ }
+
+ fn set_wptr(raw: &Self::Raw<'_>, wptr: u32) {
+ raw.write_ptr.store(wptr, Ordering::Release);
+ }
+}
+
+#[derive(Debug, Copy, Clone, Default)]
+#[repr(u32)]
+pub(crate) enum PipeType {
+ #[default]
+ Vertex = 0,
+ Fragment = 1,
+ Compute = 2,
+}
+
+#[versions(AGX)]
+#[derive(Debug, Copy, Clone, Default)]
+#[repr(C)]
+pub(crate) struct RunWorkQueueMsg {
+ pub(crate) pipe_type: PipeType,
+ pub(crate) work_queue: Option<GpuWeakPointer<super::workqueue::QueueInfo::ver>>,
+ pub(crate) wptr: u32,
+ pub(crate) event_slot: u32,
+ pub(crate) is_new: bool,
+ #[ver(V >= V13_2 && G >= G14)]
+ pub(crate) __pad: Pad<0x2b>,
+ #[ver(V < V13_2 || G < G14)]
+ pub(crate) __pad: Pad<0x1b>,
+}
+
+#[versions(AGX)]
+pub(crate) type PipeMsg = RunWorkQueueMsg::ver;
+
+#[versions(AGX)]
+pub(crate) const DEVICECONTROL_SZ: usize = {
+ #[ver(V < V13_2 || G < G14)]
+ {
+ 0x2c
+ }
+ #[ver(V >= V13_2 && G >= G14)]
+ {
+ 0x3c
+ }
+};
+
+// TODO: clean up when arbitrary_enum_discriminant is stable
+// https://github.com/rust-lang/rust/issues/60553
+
+#[versions(AGX)]
+#[derive(Debug, Copy, Clone)]
+#[repr(C, u32)]
+#[allow(dead_code)]
+pub(crate) enum DeviceControlMsg {
+ Unk00(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk01(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk02(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk03(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk04(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk05(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk06(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk07(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk08(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk09(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk0a(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk0b(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk0c(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk0d(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk0e(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk0f(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk10(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk11(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk12(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk13(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk14(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk15(Array<DEVICECONTROL_SZ::ver, u8>),
+ Unk16(Array<DEVICECONTROL_SZ::ver, u8>),
+ DestroyContext {
+ unk_4: u32,
+ ctx_23: u8,
+ __pad0: Pad<3>,
+ unk_c: u32,
+ unk_10: u32,
+ ctx_0: u8,
+ ctx_1: u8,
+ ctx_4: u8,
+ __pad1: Pad<1>,
+ unk_18: u32,
+ gpu_context: Option<GpuWeakPointer<super::workqueue::GpuContextData>>,
+ __pad2: Pad<{ DEVICECONTROL_SZ::ver - 0x20 }>,
+ },
+ Unk18(Array<DEVICECONTROL_SZ::ver, u8>),
+ Initialize(Pad<DEVICECONTROL_SZ::ver>),
+}
+
+#[versions(AGX)]
+default_zeroed!(DeviceControlMsg::ver);
+
+#[derive(Copy, Clone, Default, Debug)]
+#[repr(C)]
+#[allow(dead_code)]
+pub(crate) struct FwCtlMsg {
+ pub(crate) addr: U64,
+ pub(crate) unk_8: u32,
+ pub(crate) slot: u32,
+ pub(crate) page_count: u16,
+ pub(crate) unk_12: u16,
+}
+
+pub(crate) const EVENT_SZ: usize = 0x34;
+
+#[derive(Debug, Copy, Clone)]
+#[repr(C, u32)]
+#[allow(dead_code)]
+pub(crate) enum EventMsg {
+ Fault,
+ Flag {
+ firing: [u32; 4],
+ unk_14: u16,
+ },
+ Unk2(Array<EVENT_SZ, u8>),
+ Unk3(Array<EVENT_SZ, u8>),
+ Timeout {
+ counter: u32,
+ unk_8: u32,
+ event_slot: u32,
+ }, // Max discriminant: 0x4
+}
+
+pub(crate) const EVENT_MAX: u32 = 0x4;
+
+#[derive(Copy, Clone)]
+#[repr(C)]
+pub(crate) union RawEventMsg {
+ pub(crate) raw: (u32, Array<EVENT_SZ, u8>),
+ pub(crate) msg: EventMsg,
+}
+
+default_zeroed!(RawEventMsg);
+
+#[derive(Debug, Copy, Clone, Default)]
+#[repr(C)]
+pub(crate) struct RawFwLogMsg {
+ pub(crate) msg_type: u32,
+ __pad0: u32,
+ pub(crate) msg_index: U64,
+ __pad1: Pad<0x28>,
+}
+
+#[derive(Debug, Copy, Clone, Default)]
+#[repr(C)]
+pub(crate) struct RawFwLogPayloadMsg {
+ pub(crate) msg_type: u32,
+ pub(crate) seq_no: u32,
+ pub(crate) timestamp: U64,
+ pub(crate) msg: Array<0xc8, u8>,
+}
+
+#[derive(Debug, Copy, Clone, Default)]
+#[repr(C)]
+pub(crate) struct RawKTraceMsg {
+ pub(crate) msg_type: u32,
+ pub(crate) timestamp: U64,
+ pub(crate) args: Array<4, U64>,
+ pub(crate) code: u8,
+ pub(crate) channel: u8,
+ __pad: Pad<1>,
+ pub(crate) thread: u8,
+ pub(crate) unk_flag: U64,
+}
+
+#[versions(AGX)]
+pub(crate) const STATS_SZ: usize = {
+ #[ver(V < V13_0B4)]
+ {
+ 0x2c
+ }
+ #[ver(V >= V13_0B4)]
+ {
+ 0x3c
+ }
+};
+
+#[versions(AGX)]
+#[derive(Debug, Copy, Clone)]
+#[repr(C, u32)]
+#[allow(dead_code)]
+pub(crate) enum StatsMsg {
+ Power {
+ // 0x00
+ __pad: Pad<0x18>,
+ power: U64,
+ },
+ Unk1(Array<{ STATS_SZ::ver }, u8>),
+ PowerOn {
+ // 0x02
+ off_time: U64,
+ },
+ PowerOff {
+ // 0x03
+ on_time: U64,
+ },
+ Utilization {
+ // 0x04
+ timestamp: U64,
+ util1: u32,
+ util2: u32,
+ util3: u32,
+ util4: u32,
+ },
+ Unk5(Array<{ STATS_SZ::ver }, u8>),
+ Unk6(Array<{ STATS_SZ::ver }, u8>),
+ Unk7(Array<{ STATS_SZ::ver }, u8>),
+ Unk8(Array<{ STATS_SZ::ver }, u8>),
+ AvgPower {
+ // 0x09
+ active_cs: U64,
+ unk2: u32,
+ unk3: u32,
+ unk4: u32,
+ avg_power: u32,
+ },
+ Temperature {
+ // 0x0a
+ __pad: Pad<0x8>,
+ raw_value: u32,
+ scale: u32,
+ tmin: u32,
+ tmax: u32,
+ },
+ PowerState {
+ // 0x0b
+ timestamp: U64,
+ last_busy_ts: U64,
+ active: u32,
+ poweroff: u32,
+ unk1: u32,
+ pstate: u32,
+ unk2: u32,
+ unk3: u32,
+ },
+ FwBusy {
+ // 0x0c
+ timestamp: U64,
+ busy: u32,
+ },
+ PState {
+ // 0x0d
+ __pad: Pad<0x8>,
+ ps_min: u32,
+ unk1: u32,
+ ps_max: u32,
+ unk2: u32,
+ },
+ TempSensor {
+ // 0x0e
+ __pad: Pad<0x4>,
+ sensor_id: u32,
+ raw_value: u32,
+ scale: u32,
+ tmin: u32,
+ tmax: u32,
+ }, // Max discriminant: 0xe
+}
+
+#[versions(AGX)]
+pub(crate) const STATS_MAX: u32 = 0xe;
+
+#[versions(AGX)]
+#[derive(Copy, Clone)]
+#[repr(C)]
+pub(crate) union RawStatsMsg {
+ pub(crate) raw: (u32, Array<{ STATS_SZ::ver }, u8>),
+ pub(crate) msg: StatsMsg::ver,
+}
+
+#[versions(AGX)]
+default_zeroed!(RawStatsMsg::ver);
diff --git a/drivers/gpu/drm/asahi/fw/compute.rs b/drivers/gpu/drm/asahi/fw/compute.rs
new file mode 100644
index 000000000000..0dbcd77c5e3e
--- /dev/null
+++ b/drivers/gpu/drm/asahi/fw/compute.rs
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU compute job firmware structures
+
+use super::types::*;
+use super::{event, job, workqueue};
+use crate::{microseq, mmu};
+use kernel::sync::Arc;
+
+pub(crate) mod raw {
+ use super::*;
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct JobParameters1<'a> {
+ pub(crate) preempt_buf1: GpuPointer<'a, &'a [u8]>,
+ pub(crate) encoder: U64,
+ pub(crate) preempt_buf2: GpuPointer<'a, &'a [u8]>,
+ pub(crate) preempt_buf3: GpuPointer<'a, &'a [u8]>,
+ pub(crate) preempt_buf4: GpuPointer<'a, &'a [u8]>,
+ pub(crate) preempt_buf5: GpuPointer<'a, &'a [u8]>,
+ pub(crate) pipeline_base: U64,
+ pub(crate) unk_38: U64,
+ pub(crate) unk_40: u32,
+ pub(crate) unk_44: u32,
+ pub(crate) compute_layout_addr: U64,
+ pub(crate) unk_50: u32,
+ pub(crate) unk_54: u32,
+ pub(crate) unk_58: u32,
+ pub(crate) unk_5c: u32,
+ pub(crate) iogpu_unk_40: u32,
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct JobParameters2<'a> {
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_0_0: u32,
+ pub(crate) unk_0: Array<0x24, u8>,
+ pub(crate) preempt_buf1: GpuPointer<'a, &'a [u8]>,
+ pub(crate) encoder_end: U64,
+ pub(crate) unk_34: Array<0x28, u8>,
+ #[ver(V < V13_0B4)]
+ pub(crate) unk_5c: u32,
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct RunCompute<'a> {
+ pub(crate) tag: workqueue::CommandType,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) counter: U64,
+
+ pub(crate) unk_4: u32,
+ pub(crate) vm_slot: u32,
+ pub(crate) notifier: GpuPointer<'a, event::Notifier::ver>,
+ pub(crate) unk_pointee: Array<0x54, u8>,
+ pub(crate) job_params1: JobParameters1<'a>,
+ pub(crate) unk_b8: Array<0x11c, u8>,
+ pub(crate) microsequence: GpuPointer<'a, &'a [u8]>,
+ pub(crate) microsequence_size: u32,
+ pub(crate) job_params2: JobParameters2::ver<'a>,
+ pub(crate) encoder_params: job::raw::EncoderParams<'a>,
+ pub(crate) meta: job::raw::JobMeta,
+ pub(crate) cur_ts: U64,
+ pub(crate) start_ts: Option<GpuPointer<'a, AtomicU64>>,
+ pub(crate) end_ts: Option<GpuPointer<'a, AtomicU64>>,
+ pub(crate) unk_2c0: u32,
+ pub(crate) unk_2c4: u32,
+ pub(crate) unk_2c8: u32,
+ pub(crate) unk_2cc: u32,
+ pub(crate) client_sequence: u8,
+ pub(crate) pad_2d1: Array<3, u8>,
+ pub(crate) unk_2d4: u32,
+ pub(crate) unk_2d8: u8,
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_ts: U64,
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_2e1: Array<0x1c, u8>,
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_flag: U32,
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_pad: Array<0x10, u8>,
+ }
+}
+
+#[versions(AGX)]
+#[derive(Debug)]
+pub(crate) struct RunCompute {
+ pub(crate) notifier: Arc<GpuObject<event::Notifier::ver>>,
+ pub(crate) preempt_buf: GpuArray<u8>,
+ pub(crate) seq_buf: GpuArray<u64>,
+ pub(crate) micro_seq: microseq::MicroSequence,
+ pub(crate) vm_bind: mmu::VmBind,
+ pub(crate) timestamps: Arc<GpuObject<job::JobTimestamps>>,
+}
+
+#[versions(AGX)]
+impl GpuStruct for RunCompute::ver {
+ type Raw<'a> = raw::RunCompute::ver<'a>;
+}
+
+#[versions(AGX)]
+impl workqueue::Command for RunCompute::ver {}
diff --git a/drivers/gpu/drm/asahi/fw/event.rs b/drivers/gpu/drm/asahi/fw/event.rs
new file mode 100644
index 000000000000..fbf65ab6d976
--- /dev/null
+++ b/drivers/gpu/drm/asahi/fw/event.rs
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU events control structures & stamps
+
+use super::types::*;
+use crate::{default_zeroed, trivial_gpustruct};
+use core::sync::atomic::Ordering;
+
+pub(crate) mod raw {
+ use super::*;
+
+ #[derive(Debug, Clone, Copy, Default)]
+ #[repr(C)]
+ pub(crate) struct LinkedListHead {
+ pub(crate) prev: Option<GpuWeakPointer<LinkedListHead>>,
+ pub(crate) next: Option<GpuWeakPointer<LinkedListHead>>,
+ }
+
+ #[derive(Debug, Clone, Copy)]
+ #[repr(C)]
+ pub(crate) struct NotifierList {
+ pub(crate) list_head: LinkedListHead,
+ pub(crate) unkptr_10: U64,
+ }
+ default_zeroed!(NotifierList);
+
+ #[versions(AGX)]
+ #[derive(Debug, Clone, Copy)]
+ #[repr(C)]
+ pub(crate) struct NotifierState {
+ unk_14: u32,
+ unk_18: U64,
+ unk_20: u32,
+ vm_slot: u32,
+ has_vtx: u32,
+ pstamp_vtx: Array<4, U64>,
+ has_frag: u32,
+ pstamp_frag: Array<4, U64>,
+ has_comp: u32,
+ pstamp_comp: Array<4, U64>,
+ #[ver(G >= G14 && V < V13_0B4)]
+ unk_98_g14_0: Array<0x14, u8>,
+ in_list: u32,
+ list_head: LinkedListHead,
+ #[ver(G >= G14 && V < V13_0B4)]
+ unk_a8_g14_0: Pad<4>,
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_buf: Array<0x8, u8>, // Init to all-ff
+ }
+
+ #[versions(AGX)]
+ impl Default for NotifierState::ver {
+ fn default() -> Self {
+ #[allow(unused_mut)]
+ let mut s: Self = unsafe { core::mem::zeroed() };
+ #[ver(V >= V13_0B4)]
+ s.unk_buf = Array::new([0xff; 0x8]);
+ s
+ }
+ }
+
+ #[derive(Debug)]
+ #[repr(transparent)]
+ pub(crate) struct Threshold(AtomicU64);
+ default_zeroed!(Threshold);
+
+ impl Threshold {
+ pub(crate) fn increment(&self) {
+ // We could use fetch_add, but the non-LSE atomic
+ // sequence Rust produces confuses the hypervisor.
+ let v = self.0.load(Ordering::Relaxed);
+ self.0.store(v + 1, Ordering::Relaxed);
+ }
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct Notifier<'a> {
+ pub(crate) threshold: GpuPointer<'a, super::Threshold>,
+ pub(crate) generation: AtomicU32,
+ pub(crate) cur_count: AtomicU32,
+ pub(crate) unk_10: AtomicU32,
+ pub(crate) state: NotifierState::ver,
+ }
+}
+
+trivial_gpustruct!(Threshold);
+trivial_gpustruct!(NotifierList);
+
+#[versions(AGX)]
+#[derive(Debug)]
+pub(crate) struct Notifier {
+ pub(crate) threshold: GpuObject<Threshold>,
+}
+
+#[versions(AGX)]
+impl GpuStruct for Notifier::ver {
+ type Raw<'a> = raw::Notifier::ver<'a>;
+}
diff --git a/drivers/gpu/drm/asahi/fw/fragment.rs b/drivers/gpu/drm/asahi/fw/fragment.rs
new file mode 100644
index 000000000000..eca275efb967
--- /dev/null
+++ b/drivers/gpu/drm/asahi/fw/fragment.rs
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU fragment job firmware structures
+
+use super::types::*;
+use super::{event, job, workqueue};
+use crate::{buffer, fw, microseq, mmu};
+use kernel::sync::Arc;
+
+pub(crate) mod raw {
+ use super::*;
+
+ #[derive(Debug, Clone, Copy)]
+ #[repr(C)]
+ pub(crate) struct ClearPipelineBinding {
+ pub(crate) pipeline_bind: U64,
+ pub(crate) address: U64,
+ }
+
+ #[derive(Debug, Clone, Copy, Default)]
+ #[repr(C)]
+ pub(crate) struct StorePipelineBinding {
+ pub(crate) unk_0: U64,
+ pub(crate) unk_8: u32,
+ pub(crate) pipeline_bind: u32,
+ pub(crate) unk_10: u32,
+ pub(crate) address: u32,
+ pub(crate) unk_18: u32,
+ pub(crate) unk_1c_padding: u32,
+ }
+
+ impl StorePipelineBinding {
+ pub(crate) fn new(pipeline_bind: u32, address: u32) -> StorePipelineBinding {
+ StorePipelineBinding {
+ pipeline_bind,
+ address,
+ ..Default::default()
+ }
+ }
+ }
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct ArrayAddr {
+ pub(crate) ptr: U64,
+ pub(crate) unk_padding: U64,
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug, Clone, Copy)]
+ #[repr(C)]
+ pub(crate) struct AuxFBInfo {
+ pub(crate) iogpu_unk_214: u32,
+ pub(crate) unk2: u32,
+ pub(crate) width: u32,
+ pub(crate) height: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk3: U64,
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct JobParameters1<'a> {
+ pub(crate) utile_config: u32,
+ pub(crate) unk_4: u32,
+ pub(crate) clear_pipeline: ClearPipelineBinding,
+ pub(crate) ppp_multisamplectl: U64,
+ pub(crate) scissor_array: U64,
+ pub(crate) depth_bias_array: U64,
+ pub(crate) aux_fb_info: AuxFBInfo::ver,
+ pub(crate) depth_dimensions: U64,
+ pub(crate) visibility_result_buffer: U64,
+ pub(crate) zls_ctrl: U64,
+
+ #[ver(G >= G14)]
+ pub(crate) unk_58_g14_0: U64,
+ #[ver(G >= G14)]
+ pub(crate) unk_58_g14_8: U64,
+
+ pub(crate) depth_buffer_ptr1: U64,
+ pub(crate) depth_buffer_ptr2: U64,
+ pub(crate) stencil_buffer_ptr1: U64,
+ pub(crate) stencil_buffer_ptr2: U64,
+
+ #[ver(G >= G14)]
+ pub(crate) unk_68_g14_0: Array<0x20, u8>,
+
+ pub(crate) unk_78: Array<0x4, U64>,
+ pub(crate) depth_meta_buffer_ptr1: U64,
+ pub(crate) unk_a0: U64,
+ pub(crate) depth_meta_buffer_ptr2: U64,
+ pub(crate) unk_b0: U64,
+ pub(crate) stencil_meta_buffer_ptr1: U64,
+ pub(crate) unk_c0: U64,
+ pub(crate) stencil_meta_buffer_ptr2: U64,
+ pub(crate) unk_d0: U64,
+ pub(crate) tvb_tilemap: GpuPointer<'a, &'a [u8]>,
+ pub(crate) tvb_heapmeta: GpuPointer<'a, &'a [u8]>,
+ pub(crate) mtile_stride_dwords: U64,
+ pub(crate) tvb_heapmeta_2: GpuPointer<'a, &'a [u8]>,
+ pub(crate) tile_config: U64,
+ pub(crate) aux_fb: GpuPointer<'a, &'a [u8]>,
+ pub(crate) unk_108: Array<0x6, U64>,
+ pub(crate) pipeline_base: U64,
+ pub(crate) unk_140: U64,
+ pub(crate) unk_148: U64,
+ pub(crate) unk_150: U64,
+ pub(crate) unk_158: U64,
+ pub(crate) unk_160: U64,
+
+ #[ver(G < G14)]
+ pub(crate) unk_168_padding: Array<0x1d8, u8>,
+ #[ver(G >= G14)]
+ pub(crate) unk_168_padding: Array<0x1a8, u8>,
+ #[ver(V < V13_0B4)]
+ pub(crate) __pad0: Pad<0x8>,
+ }
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct JobParameters2 {
+ pub(crate) store_pipeline_bind: u32,
+ pub(crate) store_pipeline_addr: u32,
+ pub(crate) unk_8: u32,
+ pub(crate) unk_c: u32,
+ pub(crate) merge_upper_x: F32,
+ pub(crate) merge_upper_y: F32,
+ pub(crate) unk_18: U64,
+ pub(crate) utiles_per_mtile_y: u16,
+ pub(crate) utiles_per_mtile_x: u16,
+ pub(crate) unk_24: u32,
+ pub(crate) tile_counts: u32,
+ pub(crate) iogpu_unk_212: u32,
+ pub(crate) isp_bgobjdepth: u32,
+ pub(crate) isp_bgobjvals: u32,
+ pub(crate) unk_38: u32,
+ pub(crate) unk_3c: u32,
+ pub(crate) unk_40: u32,
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct JobParameters3 {
+ pub(crate) unk_44_padding: Array<0xac, u8>,
+ pub(crate) depth_bias_array: ArrayAddr,
+ pub(crate) scissor_array: ArrayAddr,
+ pub(crate) visibility_result_buffer: U64,
+ pub(crate) unk_118: U64,
+ pub(crate) unk_120: Array<0x25, U64>,
+ pub(crate) unk_reload_pipeline: ClearPipelineBinding,
+ pub(crate) unk_258: U64,
+ pub(crate) unk_260: U64,
+ pub(crate) unk_268: U64,
+ pub(crate) unk_270: U64,
+ pub(crate) reload_pipeline: ClearPipelineBinding,
+ pub(crate) zls_ctrl: U64,
+ pub(crate) unk_290: U64,
+ pub(crate) depth_buffer_ptr1: U64,
+ pub(crate) unk_2a0: U64,
+ pub(crate) unk_2a8: U64,
+ pub(crate) depth_buffer_ptr2: U64,
+ pub(crate) depth_buffer_ptr3: U64,
+ pub(crate) depth_meta_buffer_ptr3: U64,
+ pub(crate) stencil_buffer_ptr1: U64,
+ pub(crate) unk_2d0: U64,
+ pub(crate) unk_2d8: U64,
+ pub(crate) stencil_buffer_ptr2: U64,
+ pub(crate) stencil_buffer_ptr3: U64,
+ pub(crate) stencil_meta_buffer_ptr3: U64,
+ pub(crate) unk_2f8: Array<2, U64>,
+ pub(crate) iogpu_unk_212: u32,
+ pub(crate) unk_30c: u32,
+ pub(crate) aux_fb_info: AuxFBInfo::ver,
+ pub(crate) unk_320_padding: Array<0x10, u8>,
+ pub(crate) unk_partial_store_pipeline: StorePipelineBinding,
+ pub(crate) partial_store_pipeline: StorePipelineBinding,
+ pub(crate) isp_bgobjdepth: u32,
+ pub(crate) isp_bgobjvals: u32,
+ pub(crate) iogpu_unk_49: u32,
+ pub(crate) unk_37c: u32,
+ pub(crate) unk_380: U64,
+ pub(crate) unk_388: U64,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_390_0: U64,
+
+ pub(crate) depth_dimensions: U64,
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct RunFragment<'a> {
+ pub(crate) tag: workqueue::CommandType,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) counter: U64,
+
+ pub(crate) vm_slot: u32,
+ pub(crate) unk_8: u32,
+ pub(crate) microsequence: GpuPointer<'a, &'a [u8]>,
+ pub(crate) microsequence_size: u32,
+ pub(crate) notifier: GpuPointer<'a, event::Notifier::ver>,
+ pub(crate) buffer: GpuPointer<'a, fw::buffer::Info::ver>,
+ pub(crate) scene: GpuPointer<'a, fw::buffer::Scene::ver>,
+ pub(crate) unk_buffer_buf: GpuWeakPointer<[u8]>,
+ pub(crate) tvb_tilemap: GpuPointer<'a, &'a [u8]>,
+ pub(crate) ppp_multisamplectl: U64,
+ pub(crate) samples: u32,
+ pub(crate) tiles_per_mtile_y: u16,
+ pub(crate) tiles_per_mtile_x: u16,
+ pub(crate) unk_50: U64,
+ pub(crate) unk_58: U64,
+ pub(crate) merge_upper_x: F32,
+ pub(crate) merge_upper_y: F32,
+ pub(crate) unk_68: U64,
+ pub(crate) tile_count: U64,
+ pub(crate) job_params1: JobParameters1::ver<'a>,
+ pub(crate) job_params2: JobParameters2,
+ pub(crate) job_params3: JobParameters3::ver,
+ pub(crate) unk_758_flag: u32,
+ pub(crate) unk_75c_flag: u32,
+ pub(crate) unk_buf: Array<0x110, u8>,
+ pub(crate) busy_flag: u32,
+ pub(crate) tvb_overflow_count: u32,
+ pub(crate) unk_878: u32,
+ pub(crate) encoder_params: job::raw::EncoderParams<'a>,
+ pub(crate) process_empty_tiles: u32,
+ pub(crate) no_clear_pipeline_textures: u32,
+ pub(crate) unk_param: u32,
+ pub(crate) unk_pointee: u32,
+ pub(crate) meta: job::raw::JobMeta,
+ pub(crate) unk_after_meta: u32,
+ pub(crate) unk_buf_0: U64,
+ pub(crate) unk_buf_8: U64,
+ pub(crate) unk_buf_10: U64,
+ pub(crate) cur_ts: U64,
+ pub(crate) start_ts: Option<GpuPointer<'a, AtomicU64>>,
+ pub(crate) end_ts: Option<GpuPointer<'a, AtomicU64>>,
+ pub(crate) unk_914: u32,
+ pub(crate) unk_918: U64,
+ pub(crate) unk_920: u32,
+ pub(crate) client_sequence: u8,
+ pub(crate) pad_925: Array<3, u8>,
+ pub(crate) unk_928: u32,
+ pub(crate) unk_92c: u8,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_ts: U64,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_92d_8: Array<0x1b, u8>,
+ }
+}
+
+#[versions(AGX)]
+#[derive(Debug)]
+pub(crate) struct RunFragment {
+ pub(crate) notifier: Arc<GpuObject<event::Notifier::ver>>,
+ pub(crate) scene: Arc<buffer::Scene::ver>,
+ pub(crate) micro_seq: microseq::MicroSequence,
+ pub(crate) vm_bind: mmu::VmBind,
+ pub(crate) aux_fb: GpuArray<u8>,
+ pub(crate) timestamps: Arc<GpuObject<job::RenderTimestamps>>,
+}
+
+#[versions(AGX)]
+impl GpuStruct for RunFragment::ver {
+ type Raw<'a> = raw::RunFragment::ver<'a>;
+}
+
+#[versions(AGX)]
+impl workqueue::Command for RunFragment::ver {}
diff --git a/drivers/gpu/drm/asahi/fw/initdata.rs b/drivers/gpu/drm/asahi/fw/initdata.rs
new file mode 100644
index 000000000000..44de0c1cccf3
--- /dev/null
+++ b/drivers/gpu/drm/asahi/fw/initdata.rs
@@ -0,0 +1,1264 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU initialization / global structures
+
+use super::channels;
+use super::types::*;
+use crate::{default_zeroed, no_debug, trivial_gpustruct};
+
+pub(crate) mod raw {
+ use super::*;
+
+ #[derive(Debug, Default)]
+ #[repr(C)]
+ pub(crate) struct ChannelRing<T: GpuStruct + Debug + Default, U: Copy> {
+ pub(crate) state: Option<GpuWeakPointer<T>>,
+ pub(crate) ring: Option<GpuWeakPointer<[U]>>,
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct PipeChannels {
+ pub(crate) vtx: ChannelRing<channels::ChannelState, channels::PipeMsg::ver>,
+ pub(crate) frag: ChannelRing<channels::ChannelState, channels::PipeMsg::ver>,
+ pub(crate) comp: ChannelRing<channels::ChannelState, channels::PipeMsg::ver>,
+ }
+ #[versions(AGX)]
+ default_zeroed!(PipeChannels::ver);
+
+ #[derive(Debug, Default)]
+ #[repr(C)]
+ pub(crate) struct FwStatusFlags {
+ pub(crate) halt_count: AtomicU32,
+ __pad0: Pad<0xc>,
+ pub(crate) halted: AtomicU32,
+ __pad1: Pad<0xc>,
+ pub(crate) resume: AtomicU32,
+ __pad2: Pad<0xc>,
+ pub(crate) unk_40: u32,
+ __pad3: Pad<0xc>,
+ pub(crate) unk_ctr: u32,
+ __pad4: Pad<0xc>,
+ pub(crate) unk_60: u32,
+ __pad5: Pad<0xc>,
+ pub(crate) unk_70: u32,
+ __pad6: Pad<0xc>,
+ }
+
+ #[derive(Debug, Default)]
+ #[repr(C)]
+ pub(crate) struct FwStatus {
+ pub(crate) fwctl_channel: ChannelRing<channels::FwCtlChannelState, channels::FwCtlMsg>,
+ pub(crate) flags: FwStatusFlags,
+ }
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct HwDataShared1 {
+ pub(crate) table: Array<16, i32>,
+ pub(crate) unk_44: Array<0x60, u8>,
+ pub(crate) unk_a4: u32,
+ pub(crate) unk_a8: u32,
+ }
+ default_zeroed!(HwDataShared1);
+
+ #[derive(Debug, Default)]
+ #[repr(C)]
+ pub(crate) struct HwDataShared2Curve {
+ pub(crate) unk_0: u32,
+ pub(crate) unk_4: u32,
+ pub(crate) t1: Array<16, i16>,
+ pub(crate) t2: Array<16, i16>,
+ pub(crate) t3: Array<8, Array<16, i32>>,
+ }
+
+ #[derive(Debug, Default)]
+ #[repr(C)]
+ pub(crate) struct HwDataShared2T8112 {
+ pub(crate) unk_0: Array<5, u32>,
+ pub(crate) unk_14: u32,
+ pub(crate) unk_18: Array<8, u32>,
+ pub(crate) curve1: HwDataShared2Curve,
+ pub(crate) curve2: HwDataShared2Curve,
+ }
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct HwDataShared2 {
+ pub(crate) table: Array<10, i32>,
+ pub(crate) unk_28: Array<0x10, u8>,
+ pub(crate) t8112: HwDataShared2T8112,
+ pub(crate) unk_500: u32,
+ pub(crate) unk_504: u32,
+ pub(crate) unk_508: u32,
+ pub(crate) unk_50c: u32,
+ pub(crate) unk_510: u32,
+ }
+ default_zeroed!(HwDataShared2);
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct HwDataShared3 {
+ pub(crate) unk_0: u32,
+ pub(crate) unk_4: u32,
+ pub(crate) unk_8: u32,
+ pub(crate) table: Array<16, u32>,
+ pub(crate) unk_4c: u32,
+ }
+ default_zeroed!(HwDataShared3);
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct HwDataA130Extra {
+ pub(crate) unk_0: Array<0x38, u8>,
+ pub(crate) unk_38: u32,
+ pub(crate) unk_3c: u32,
+ pub(crate) unk_40: u32,
+ pub(crate) unk_44: u32,
+ pub(crate) unk_48: u32,
+ pub(crate) unk_4c: u32,
+ pub(crate) unk_50: u32,
+ pub(crate) unk_54: u32,
+ pub(crate) unk_58: u32,
+ pub(crate) unk_5c: u32,
+ pub(crate) unk_60: F32,
+ pub(crate) unk_64: F32,
+ pub(crate) unk_68: F32,
+ pub(crate) unk_6c: F32,
+ pub(crate) unk_70: F32,
+ pub(crate) unk_74: F32,
+ pub(crate) unk_78: F32,
+ pub(crate) unk_7c: F32,
+ pub(crate) unk_80: F32,
+ pub(crate) unk_84: F32,
+ pub(crate) unk_88: u32,
+ pub(crate) unk_8c: u32,
+ pub(crate) max_pstate_scaled_1: u32,
+ pub(crate) unk_94: u32,
+ pub(crate) unk_98: u32,
+ pub(crate) unk_9c: F32,
+ pub(crate) unk_a0: u32,
+ pub(crate) unk_a4: u32,
+ pub(crate) unk_a8: u32,
+ pub(crate) unk_ac: u32,
+ pub(crate) unk_b0: u32,
+ pub(crate) unk_b4: u32,
+ pub(crate) unk_b8: u32,
+ pub(crate) unk_bc: u32,
+ pub(crate) unk_c0: u32,
+ pub(crate) unk_c4: F32,
+ pub(crate) unk_c8: Array<0x4c, u8>,
+ pub(crate) unk_114: F32,
+ pub(crate) unk_118: u32,
+ pub(crate) unk_11c: u32,
+ pub(crate) unk_120: u32,
+ pub(crate) unk_124: u32,
+ pub(crate) max_pstate_scaled_2: u32,
+ pub(crate) unk_12c: Array<0x8c, u8>,
+ }
+ default_zeroed!(HwDataA130Extra);
+
+ #[derive(Default)]
+ #[repr(C)]
+ pub(crate) struct T81xxData {
+ pub(crate) unk_d8c: u32,
+ pub(crate) unk_d90: u32,
+ pub(crate) unk_d94: u32,
+ pub(crate) unk_d98: u32,
+ pub(crate) unk_d9c: F32,
+ pub(crate) unk_da0: u32,
+ pub(crate) unk_da4: F32,
+ pub(crate) unk_da8: u32,
+ pub(crate) unk_dac: F32,
+ pub(crate) unk_db0: u32,
+ pub(crate) unk_db4: u32,
+ pub(crate) unk_db8: F32,
+ pub(crate) unk_dbc: F32,
+ pub(crate) unk_dc0: u32,
+ pub(crate) unk_dc4: u32,
+ pub(crate) unk_dc8: u32,
+ pub(crate) max_pstate_scaled: u32,
+ }
+
+ #[versions(AGX)]
+ #[derive(Default, Copy, Clone)]
+ #[repr(C)]
+ pub(crate) struct PowerZone {
+ pub(crate) val: F32,
+ pub(crate) target: u32,
+ pub(crate) target_off: u32,
+ pub(crate) filter_tc_x4: u32,
+ pub(crate) filter_tc_xperiod: u32,
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_10: u32,
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_14: u32,
+ pub(crate) filter_a_neg: F32,
+ pub(crate) filter_a: F32,
+ pub(crate) pad: u32,
+ }
+
+ #[versions(AGX)]
+ #[repr(C)]
+ pub(crate) struct HwDataA {
+ pub(crate) unk_0: u32,
+ pub(crate) clocks_per_period: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) clocks_per_period_2: u32,
+
+ pub(crate) unk_8: u32,
+ pub(crate) pwr_status: AtomicU32,
+ pub(crate) unk_10: F32,
+ pub(crate) unk_14: u32,
+ pub(crate) unk_18: u32,
+ pub(crate) unk_1c: u32,
+ pub(crate) unk_20: u32,
+ pub(crate) unk_24: u32,
+ pub(crate) actual_pstate: u32,
+ pub(crate) tgt_pstate: u32,
+ pub(crate) unk_30: u32,
+ pub(crate) cur_pstate: u32,
+ pub(crate) unk_38: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_3c_0: u32,
+
+ pub(crate) base_pstate_scaled: u32,
+ pub(crate) unk_40: u32,
+ pub(crate) max_pstate_scaled: u32,
+ pub(crate) unk_48: u32,
+ pub(crate) min_pstate_scaled: u32,
+ pub(crate) freq_mhz: F32,
+ pub(crate) unk_54: Array<0x20, u8>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_74_0: u32,
+
+ pub(crate) sram_k: Array<0x10, F32>,
+ pub(crate) unk_b4: Array<0x100, u8>,
+ pub(crate) unk_1b4: u32,
+ pub(crate) temp_c: u32,
+ pub(crate) avg_power_mw: u32,
+ pub(crate) update_ts: U64,
+ pub(crate) unk_1c8: u32,
+ pub(crate) unk_1cc: Array<0x478, u8>,
+ pub(crate) pad_644: Pad<0x8>,
+ pub(crate) unk_64c: u32,
+ pub(crate) unk_650: u32,
+ pub(crate) pad_654: u32,
+ pub(crate) pwr_filter_a_neg: F32,
+ pub(crate) pad_65c: u32,
+ pub(crate) pwr_filter_a: F32,
+ pub(crate) pad_664: u32,
+ pub(crate) pwr_integral_gain: F32,
+ pub(crate) pad_66c: u32,
+ pub(crate) pwr_integral_min_clamp: F32,
+ pub(crate) max_power_1: F32,
+ pub(crate) pwr_proportional_gain: F32,
+ pub(crate) pad_67c: u32,
+ pub(crate) pwr_pstate_related_k: F32,
+ pub(crate) pwr_pstate_max_dc_offset: i32,
+ pub(crate) unk_688: u32,
+ pub(crate) max_pstate_scaled_2: u32,
+ pub(crate) pad_690: u32,
+ pub(crate) unk_694: u32,
+ pub(crate) max_power_2: u32,
+ pub(crate) pad_69c: Pad<0x18>,
+ pub(crate) unk_6b4: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_6b8_0: Array<0x10, u8>,
+
+ pub(crate) max_pstate_scaled_3: u32,
+ pub(crate) unk_6bc: u32,
+ pub(crate) pad_6c0: Pad<0x14>,
+ pub(crate) ppm_filter_tc_periods_x4: u32,
+ pub(crate) unk_6d8: u32,
+ pub(crate) pad_6dc: u32,
+ pub(crate) ppm_filter_a_neg: F32,
+ pub(crate) pad_6e4: u32,
+ pub(crate) ppm_filter_a: F32,
+ pub(crate) pad_6ec: u32,
+ pub(crate) ppm_ki_dt: F32,
+ pub(crate) pad_6f4: u32,
+ pub(crate) pwr_integral_min_clamp_2: u32,
+ pub(crate) unk_6fc: F32,
+ pub(crate) ppm_kp: F32,
+ pub(crate) pad_704: u32,
+ pub(crate) unk_708: u32,
+ pub(crate) pwr_min_duty_cycle: u32,
+ pub(crate) max_pstate_scaled_4: u32,
+ pub(crate) unk_714: u32,
+ pub(crate) pad_718: u32,
+ pub(crate) unk_71c: F32,
+ pub(crate) max_power_3: u32,
+ pub(crate) cur_power_mw_2: u32,
+ pub(crate) ppm_filter_tc_ms: u32,
+ pub(crate) unk_72c: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) ppm_filter_tc_clks: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_730_4: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_730_8: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_730_c: u32,
+
+ pub(crate) unk_730: F32,
+ pub(crate) unk_734: u32,
+ pub(crate) unk_738: u32,
+ pub(crate) unk_73c: u32,
+ pub(crate) unk_740: u32,
+ pub(crate) unk_744: u32,
+ pub(crate) unk_748: Array<0x4, F32>,
+ pub(crate) unk_758: u32,
+ pub(crate) perf_tgt_utilization: u32,
+ pub(crate) pad_760: u32,
+ pub(crate) perf_boost_min_util: u32,
+ pub(crate) perf_boost_ce_step: u32,
+ pub(crate) perf_reset_iters: u32,
+ pub(crate) pad_770: u32,
+ pub(crate) unk_774: u32,
+ pub(crate) unk_778: u32,
+ pub(crate) perf_filter_drop_threshold: u32,
+ pub(crate) perf_filter_a_neg: F32,
+ pub(crate) perf_filter_a2_neg: F32,
+ pub(crate) perf_filter_a: F32,
+ pub(crate) perf_filter_a2: F32,
+ pub(crate) perf_ki: F32,
+ pub(crate) perf_ki2: F32,
+ pub(crate) perf_integral_min_clamp: F32,
+ pub(crate) unk_79c: F32,
+ pub(crate) perf_kp: F32,
+ pub(crate) perf_kp2: F32,
+ pub(crate) boost_state_unk_k: F32,
+ pub(crate) base_pstate_scaled_2: u32,
+ pub(crate) max_pstate_scaled_5: u32,
+ pub(crate) base_pstate_scaled_3: u32,
+ pub(crate) pad_7b8: u32,
+ pub(crate) perf_cur_utilization: F32,
+ pub(crate) perf_tgt_utilization_2: u32,
+ pub(crate) pad_7c4: Pad<0x18>,
+ pub(crate) unk_7dc: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_7e0_0: Array<0x10, u8>,
+
+ pub(crate) base_pstate_scaled_4: u32,
+ pub(crate) pad_7e4: u32,
+ pub(crate) unk_7e8: Array<0x14, u8>,
+ pub(crate) unk_7fc: F32,
+ pub(crate) pwr_min_duty_cycle_2: F32,
+ pub(crate) max_pstate_scaled_6: F32,
+ pub(crate) max_freq_mhz: u32,
+ pub(crate) pad_80c: u32,
+ pub(crate) unk_810: u32,
+ pub(crate) pad_814: u32,
+ pub(crate) pwr_min_duty_cycle_3: u32,
+ pub(crate) unk_81c: u32,
+ pub(crate) pad_820: u32,
+ pub(crate) min_pstate_scaled_4: F32,
+ pub(crate) max_pstate_scaled_7: u32,
+ pub(crate) unk_82c: u32,
+ pub(crate) unk_alpha_neg: F32,
+ pub(crate) unk_alpha: F32,
+ pub(crate) unk_838: u32,
+ pub(crate) unk_83c: u32,
+ pub(crate) pad_840: Pad<0x2c>,
+ pub(crate) unk_86c: u32,
+ pub(crate) fast_die0_sensor_mask: U64,
+ pub(crate) fast_die0_release_temp_cc: u32,
+ pub(crate) unk_87c: i32,
+ pub(crate) unk_880: u32,
+ pub(crate) unk_884: u32,
+ pub(crate) pad_888: u32,
+ pub(crate) unk_88c: u32,
+ pub(crate) pad_890: u32,
+ pub(crate) unk_894: F32,
+ pub(crate) pad_898: u32,
+ pub(crate) fast_die0_ki_dt: F32,
+ pub(crate) pad_8a0: u32,
+ pub(crate) unk_8a4: u32,
+ pub(crate) unk_8a8: F32,
+ pub(crate) fast_die0_kp: F32,
+ pub(crate) pad_8b0: u32,
+ pub(crate) unk_8b4: u32,
+ pub(crate) pwr_min_duty_cycle_4: u32,
+ pub(crate) max_pstate_scaled_8: u32,
+ pub(crate) max_pstate_scaled_9: u32,
+ pub(crate) fast_die0_prop_tgt_delta: u32,
+ pub(crate) unk_8c8: u32,
+ pub(crate) unk_8cc: u32,
+ pub(crate) pad_8d0: Pad<0x14>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_8e4_0: Array<0x10, u8>,
+
+ pub(crate) unk_8e4: u32,
+ pub(crate) unk_8e8: u32,
+ pub(crate) max_pstate_scaled_10: u32,
+ pub(crate) unk_8f0: u32,
+ pub(crate) unk_8f4: u32,
+ pub(crate) pad_8f8: u32,
+ pub(crate) pad_8fc: u32,
+ pub(crate) unk_900: Array<0x24, u8>,
+ pub(crate) unk_coef_a1: Array<8, Array<8, F32>>,
+ pub(crate) unk_coef_a2: Array<8, Array<8, F32>>,
+ pub(crate) pad_b24: Pad<0x70>,
+ pub(crate) max_pstate_scaled_11: u32,
+ pub(crate) freq_with_off: u32,
+ pub(crate) unk_b9c: u32,
+ pub(crate) unk_ba0: U64,
+ pub(crate) unk_ba8: U64,
+ pub(crate) unk_bb0: u32,
+ pub(crate) unk_bb4: u32,
+ pub(crate) pad_bb8: Pad<0x74>,
+ pub(crate) unk_c2c: u32,
+ pub(crate) power_zone_count: u32,
+ pub(crate) max_power_4: u32,
+ pub(crate) max_power_5: u32,
+ pub(crate) max_power_6: u32,
+ pub(crate) unk_c40: u32,
+ pub(crate) unk_c44: F32,
+ pub(crate) avg_power_target_filter_a_neg: F32,
+ pub(crate) avg_power_target_filter_a: F32,
+ pub(crate) avg_power_target_filter_tc_x4: u32,
+ pub(crate) avg_power_target_filter_tc_xperiod: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) avg_power_target_filter_tc_clks: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_c58_4: u32,
+
+ pub(crate) power_zones: Array<5, PowerZone::ver>,
+ pub(crate) avg_power_filter_tc_periods_x4: u32,
+ pub(crate) unk_cfc: u32,
+ pub(crate) unk_d00: u32,
+ pub(crate) avg_power_filter_a_neg: F32,
+ pub(crate) unk_d08: u32,
+ pub(crate) avg_power_filter_a: F32,
+ pub(crate) unk_d10: u32,
+ pub(crate) avg_power_ki_dt: F32,
+ pub(crate) unk_d18: u32,
+ pub(crate) unk_d1c: u32,
+ pub(crate) unk_d20: F32,
+ pub(crate) avg_power_kp: F32,
+ pub(crate) unk_d28: u32,
+ pub(crate) unk_d2c: u32,
+ pub(crate) avg_power_min_duty_cycle: u32,
+ pub(crate) max_pstate_scaled_12: u32,
+ pub(crate) max_pstate_scaled_13: u32,
+ pub(crate) unk_d3c: u32,
+ pub(crate) max_power_7: F32,
+ pub(crate) max_power_8: u32,
+ pub(crate) unk_d48: u32,
+ pub(crate) avg_power_filter_tc_ms: u32,
+ pub(crate) unk_d50: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) avg_power_filter_tc_clks: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_d54_4: Array<0xc, u8>,
+
+ pub(crate) unk_d54: Array<0x10, u8>,
+ pub(crate) max_pstate_scaled_14: u32,
+ pub(crate) unk_d68: Array<0x24, u8>,
+
+ pub(crate) t81xx_data: T81xxData,
+
+ pub(crate) unk_dd0: Array<0x40, u8>,
+
+ #[ver(V >= V13_2)]
+ pub(crate) unk_e10_pad: Array<0x10, u8>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_e10_0: HwDataA130Extra,
+
+ pub(crate) unk_e10: Array<0xc, u8>,
+ pub(crate) fast_die0_sensor_mask_2: U64,
+ pub(crate) unk_e24: u32,
+ pub(crate) unk_e28: u32,
+ pub(crate) unk_e2c: Pad<0x1c>,
+ pub(crate) unk_coef_b1: Array<8, Array<8, F32>>,
+ pub(crate) unk_coef_b2: Array<8, Array<8, F32>>,
+ pub(crate) pad_1048: Pad<0x5e4>,
+ pub(crate) fast_die0_sensor_mask_alt: U64,
+ #[ver(V < V13_0B4)]
+ pub(crate) fast_die0_sensor_present: U64,
+
+ pub(crate) unk_163c: u32,
+
+ pub(crate) unk_1640: Array<0x2000, u8>,
+ pub(crate) unk_3640: u32,
+ pub(crate) unk_3644: u32,
+ pub(crate) hws1: HwDataShared1,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_pad1: Pad<0x20>,
+
+ pub(crate) hws2: HwDataShared2,
+ pub(crate) unk_3c04: u32,
+ pub(crate) hws3: HwDataShared3,
+ pub(crate) unk_3c58: Array<0x3c, u8>,
+ pub(crate) unk_3c94: u32,
+ pub(crate) unk_3c98: U64,
+ pub(crate) unk_3ca0: U64,
+ pub(crate) unk_3ca8: U64,
+ pub(crate) unk_3cb0: U64,
+ pub(crate) ts_last_idle: U64,
+ pub(crate) ts_last_poweron: U64,
+ pub(crate) ts_last_poweroff: U64,
+ pub(crate) unk_3cd0: U64,
+ pub(crate) unk_3cd8: U64,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_3ce0_0: u32,
+
+ pub(crate) unk_3ce0: u32,
+ pub(crate) unk_3ce4: u32,
+ pub(crate) unk_3ce8: u32,
+ pub(crate) unk_3cec: u32,
+ pub(crate) unk_3cf0: u32,
+ pub(crate) core_leak_coef: Array<8, F32>,
+ pub(crate) sram_leak_coef: Array<8, F32>,
+ pub(crate) unk_3d34: Array<0x38, u8>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_3d6c: Array<0x38, u8>,
+ }
+ #[versions(AGX)]
+ default_zeroed!(HwDataA::ver);
+ #[versions(AGX)]
+ no_debug!(HwDataA::ver);
+
+ #[derive(Debug, Default, Clone, Copy)]
+ #[repr(C)]
+ pub(crate) struct IOMapping {
+ pub(crate) phys_addr: U64,
+ pub(crate) virt_addr: U64,
+ pub(crate) size: u32,
+ pub(crate) range_size: u32,
+ pub(crate) readwrite: U64,
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct HwDataB {
+ #[ver(V < V13_0B4)]
+ pub(crate) unk_0: U64,
+
+ pub(crate) unk_8: U64,
+
+ #[ver(V < V13_0B4)]
+ pub(crate) unk_10: U64,
+
+ pub(crate) unk_18: U64,
+ pub(crate) unk_20: U64,
+ pub(crate) unk_28: U64,
+ pub(crate) unk_30: U64,
+ pub(crate) unkptr_38: U64,
+ pub(crate) pad_40: Pad<0x20>,
+
+ #[ver(V < V13_0B4)]
+ pub(crate) yuv_matrices: Array<0xf, Array<3, Array<4, i16>>>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) yuv_matrices: Array<0x3f, Array<3, Array<4, i16>>>,
+
+ pub(crate) pad_1c8: Pad<0x8>,
+ pub(crate) io_mappings: Array<0x14, IOMapping>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_450_0: Array<0x68, u8>,
+
+ pub(crate) chip_id: u32,
+ pub(crate) unk_454: u32,
+ pub(crate) unk_458: u32,
+ pub(crate) unk_45c: u32,
+ pub(crate) unk_460: u32,
+ pub(crate) unk_464: u32,
+ pub(crate) unk_468: u32,
+ pub(crate) unk_46c: u32,
+ pub(crate) unk_470: u32,
+ pub(crate) unk_474: u32,
+ pub(crate) unk_478: u32,
+ pub(crate) unk_47c: u32,
+ pub(crate) unk_480: u32,
+ pub(crate) unk_484: u32,
+ pub(crate) unk_488: u32,
+ pub(crate) unk_48c: u32,
+ pub(crate) base_clock_khz: u32,
+ pub(crate) power_sample_period: u32,
+ pub(crate) pad_498: Pad<0x4>,
+ pub(crate) unk_49c: u32,
+ pub(crate) unk_4a0: u32,
+ pub(crate) unk_4a4: u32,
+ pub(crate) pad_4a8: Pad<0x4>,
+ pub(crate) unk_4ac: u32,
+ pub(crate) pad_4b0: Pad<0x8>,
+ pub(crate) unk_4b8: u32,
+ pub(crate) unk_4bc: Array<0x4, u8>,
+ pub(crate) unk_4c0: u32,
+ pub(crate) unk_4c4: u32,
+ pub(crate) unk_4c8: u32,
+ pub(crate) unk_4cc: u32,
+ pub(crate) unk_4d0: u32,
+ pub(crate) unk_4d4: u32,
+ pub(crate) unk_4d8: Array<0x4, u8>,
+ pub(crate) unk_4dc: u32,
+ pub(crate) unk_4e0: U64,
+ pub(crate) unk_4e8: u32,
+ pub(crate) unk_4ec: u32,
+ pub(crate) unk_4f0: u32,
+ pub(crate) unk_4f4: u32,
+ pub(crate) unk_4f8: u32,
+ pub(crate) unk_4fc: u32,
+ pub(crate) unk_500: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_504_0: u32,
+
+ pub(crate) unk_504: u32,
+ pub(crate) unk_508: u32,
+ pub(crate) unk_50c: u32,
+ pub(crate) unk_510: u32,
+ pub(crate) unk_514: u32,
+ pub(crate) unk_518: u32,
+ pub(crate) unk_51c: u32,
+ pub(crate) unk_520: u32,
+ pub(crate) unk_524: u32,
+ pub(crate) unk_528: u32,
+ pub(crate) unk_52c: u32,
+ pub(crate) unk_530: u32,
+ pub(crate) unk_534: u32,
+ pub(crate) unk_538: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_53c_0: u32,
+
+ pub(crate) num_frags: u32,
+ pub(crate) unk_540: u32,
+ pub(crate) unk_544: u32,
+ pub(crate) unk_548: u32,
+ pub(crate) unk_54c: u32,
+ pub(crate) unk_550: u32,
+ pub(crate) unk_554: u32,
+ pub(crate) uat_ttb_base: U64,
+ pub(crate) gpu_core_id: u32,
+ pub(crate) gpu_rev_id: u32,
+ pub(crate) num_cores: u32,
+ pub(crate) max_pstate: u32,
+
+ #[ver(V < V13_0B4)]
+ pub(crate) num_pstates: u32,
+
+ pub(crate) frequencies: Array<0x10, u32>,
+ pub(crate) voltages: Array<0x10, [u32; 0x8]>,
+ pub(crate) voltages_sram: Array<0x10, [u32; 0x8]>,
+ pub(crate) sram_k: Array<0x10, F32>,
+ pub(crate) unk_9f4: Array<0x10, u32>,
+ pub(crate) rel_max_powers: Array<0x10, u32>,
+ pub(crate) rel_boost_freqs: Array<0x10, u32>,
+
+ #[ver(V < V13_0B4)]
+ pub(crate) min_sram_volt: u32,
+
+ #[ver(V < V13_0B4)]
+ pub(crate) unk_ab8: u32,
+
+ #[ver(V < V13_0B4)]
+ pub(crate) unk_abc: u32,
+
+ #[ver(V < V13_0B4)]
+ pub(crate) unk_ac0: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_ac4_0: Array<0x1f0, u8>,
+
+ pub(crate) pad_ac4: Pad<0x8>,
+ pub(crate) unk_acc: u32,
+ pub(crate) unk_ad0: u32,
+ pub(crate) pad_ad4: Pad<0x10>,
+ pub(crate) unk_ae4: Array<0x4, u32>,
+ pub(crate) pad_af4: Pad<0x4>,
+ pub(crate) unk_af8: u32,
+ pub(crate) pad_afc: Pad<0x8>,
+ pub(crate) unk_b04: u32,
+ pub(crate) unk_b08: u32,
+ pub(crate) unk_b0c: u32,
+ pub(crate) unk_b10: u32,
+ pub(crate) pad_b14: Pad<0x8>,
+ pub(crate) unk_b1c: u32,
+ pub(crate) unk_b20: u32,
+ pub(crate) unk_b24: u32,
+ pub(crate) unk_b28: u32,
+ pub(crate) unk_b2c: u32,
+ pub(crate) unk_b30: u32,
+ pub(crate) unk_b34: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_b38_0: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_b38_4: u32,
+
+ pub(crate) unk_b38: Array<0xc, u32>,
+ pub(crate) unk_b68: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_b6c: Array<0xd0, u8>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_c3c: u32,
+ }
+ #[versions(AGX)]
+ default_zeroed!(HwDataB::ver);
+
+ #[derive(Debug, Clone, Copy)]
+ #[repr(C, packed)]
+ pub(crate) struct GpuQueueStatsVtx {
+ pub(crate) busy: u32,
+ pub(crate) unk_4: u32,
+ pub(crate) cur_cmdqueue: U64,
+ pub(crate) cur_count: u32,
+ pub(crate) unk_14: u32,
+ }
+ default_zeroed!(GpuQueueStatsVtx);
+
+ #[versions(AGX)]
+ #[derive(Debug, Default, Clone, Copy)]
+ #[repr(C, packed)]
+ pub(crate) struct GpuStatsVtx {
+ pub(crate) unk_4: u32,
+ pub(crate) queues: Array<0x4, GpuQueueStatsVtx>,
+ pub(crate) unk_68: Array<0x8, u8>,
+ pub(crate) unk_70: u32,
+ pub(crate) unk_74: u32,
+ pub(crate) unk_timestamp: U64,
+ pub(crate) unk_80: Array<0x40, u8>,
+ }
+
+ #[derive(Debug, Default, Clone, Copy)]
+ #[repr(C, packed)]
+ pub(crate) struct GpuQueueStatsFrag {
+ pub(crate) busy: u32,
+ pub(crate) cur_cmdqueue: U64,
+ pub(crate) unk_c: u32,
+ pub(crate) unk_10: u32,
+ pub(crate) unk_14: Array<0x14, u8>,
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct GpuStatsFrag {
+ pub(crate) unk_0: Array<0x18, u8>,
+ pub(crate) queues: Array<0x4, GpuQueueStatsFrag>,
+ pub(crate) unk_d0: Array<0x38, u8>,
+ pub(crate) tvb_overflows_1: u32,
+ pub(crate) tvb_overflows_2: u32,
+ pub(crate) unk_f8: u32,
+ pub(crate) unk_fc: u32,
+ pub(crate) cur_stamp_id: i32,
+ pub(crate) unk_104: Array<0x14, u8>,
+ pub(crate) unk_118: i32,
+ pub(crate) unk_11c: u32,
+ pub(crate) unk_120: u32,
+ pub(crate) unk_124: u32,
+ pub(crate) unk_128: u32,
+ pub(crate) unk_12c: u32,
+ pub(crate) unk_timestamp: U64,
+ pub(crate) unk_134: Array<0x8c, u8>,
+ }
+ #[versions(AGX)]
+ default_zeroed!(GpuStatsFrag::ver);
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct GpuGlobalStatsVtx {
+ pub(crate) total_cmds: u32,
+ pub(crate) stats: GpuStatsVtx::ver,
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_pad: Array<0x5c4, u8>,
+ }
+ #[versions(AGX)]
+ default_zeroed!(GpuGlobalStatsVtx::ver);
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct GpuGlobalStatsFrag {
+ pub(crate) total_cmds: u32,
+ pub(crate) unk_4: u32,
+ pub(crate) stats: GpuStatsFrag::ver,
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_pad: Array<0x580, u8>,
+ }
+ #[versions(AGX)]
+ default_zeroed!(GpuGlobalStatsFrag::ver);
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct GpuStatsComp {
+ pub(crate) unk: Array<0x140, u8>,
+ }
+ default_zeroed!(GpuStatsComp);
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct RuntimeScratch {
+ pub(crate) unk_280: Array<0x6800, u8>,
+ pub(crate) unk_6a80: u32,
+ pub(crate) gpu_idle: u32,
+ pub(crate) unkpad_6a88: Pad<0x14>,
+ pub(crate) unk_6a9c: u32,
+ pub(crate) unk_ctr0: u32,
+ pub(crate) unk_ctr1: u32,
+ pub(crate) unk_6aa8: u32,
+ pub(crate) unk_6aac: u32,
+ pub(crate) unk_ctr2: u32,
+ pub(crate) unk_6ab4: u32,
+ pub(crate) unk_6ab8: u32,
+ pub(crate) unk_6abc: u32,
+ pub(crate) unk_6ac0: u32,
+ pub(crate) unk_6ac4: u32,
+ pub(crate) unk_ctr3: u32,
+ pub(crate) unk_6acc: u32,
+ pub(crate) unk_6ad0: u32,
+ pub(crate) unk_6ad4: u32,
+ pub(crate) unk_6ad8: u32,
+ pub(crate) unk_6adc: u32,
+ pub(crate) unk_6ae0: u32,
+ pub(crate) unk_6ae4: u32,
+ pub(crate) unk_6ae8: u32,
+ pub(crate) unk_6aec: u32,
+ pub(crate) unk_6af0: u32,
+ pub(crate) unk_ctr4: u32,
+ pub(crate) unk_ctr5: u32,
+ pub(crate) unk_6afc: u32,
+ pub(crate) pad_6b00: Pad<0x38>,
+ pub(crate) unk_6b38: u32,
+ pub(crate) pad_6b3c: Pad<0x84>,
+ }
+ default_zeroed!(RuntimeScratch);
+
+ pub(crate) type BufferMgrCtl = Array<4, u32>;
+
+ #[versions(AGX)]
+ #[repr(C)]
+ pub(crate) struct RuntimePointers<'a> {
+ pub(crate) pipes: Array<4, PipeChannels::ver>,
+
+ pub(crate) device_control:
+ ChannelRing<channels::ChannelState, channels::DeviceControlMsg::ver>,
+ pub(crate) event: ChannelRing<channels::ChannelState, channels::RawEventMsg>,
+ pub(crate) fw_log: ChannelRing<channels::FwLogChannelState, channels::RawFwLogMsg>,
+ pub(crate) ktrace: ChannelRing<channels::ChannelState, channels::RawKTraceMsg>,
+ pub(crate) stats: ChannelRing<channels::ChannelState, channels::RawStatsMsg::ver>,
+
+ pub(crate) __pad0: Pad<0x50>,
+ pub(crate) unk_160: U64,
+ pub(crate) unk_168: U64,
+ pub(crate) stats_vtx: GpuPointer<'a, super::GpuGlobalStatsVtx::ver>,
+ pub(crate) stats_frag: GpuPointer<'a, super::GpuGlobalStatsFrag::ver>,
+ pub(crate) stats_comp: GpuPointer<'a, super::GpuStatsComp>,
+ pub(crate) hwdata_a: GpuPointer<'a, super::HwDataA::ver>,
+ pub(crate) unkptr_190: GpuPointer<'a, &'a [u8]>,
+ pub(crate) unkptr_198: GpuPointer<'a, &'a [u8]>,
+ pub(crate) hwdata_b: GpuPointer<'a, super::HwDataB::ver>,
+ pub(crate) hwdata_b_2: GpuPointer<'a, super::HwDataB::ver>,
+ pub(crate) fwlog_buf: Option<GpuWeakPointer<[channels::RawFwLogPayloadMsg]>>,
+ pub(crate) unkptr_1b8: GpuPointer<'a, &'a [u8]>,
+ pub(crate) unkptr_1c0: GpuPointer<'a, &'a [u8]>,
+ pub(crate) unkptr_1c8: GpuPointer<'a, &'a [u8]>,
+ pub(crate) unk_1d0: u32,
+ pub(crate) unk_1d4: u32,
+ pub(crate) unk_1d8: Array<0x3c, u8>,
+ pub(crate) buffer_mgr_ctl: GpuPointer<'a, &'a [BufferMgrCtl]>,
+ pub(crate) buffer_mgr_ctl_2: GpuPointer<'a, &'a [BufferMgrCtl]>,
+ pub(crate) __pad1: Pad<0x5c>,
+ pub(crate) gpu_scratch: RuntimeScratch,
+ }
+ #[versions(AGX)]
+ no_debug!(RuntimePointers::ver<'_>);
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct PendingStamp {
+ pub(crate) info: AtomicU32,
+ pub(crate) wait_value: AtomicU32,
+ }
+ default_zeroed!(PendingStamp);
+
+ #[derive(Debug, Clone, Copy)]
+ #[repr(C, packed)]
+ pub(crate) struct FaultInfo {
+ pub(crate) unk_0: u32,
+ pub(crate) unk_4: u32,
+ pub(crate) queue_uuid: u32,
+ pub(crate) unk_c: u32,
+ pub(crate) unk_10: u32,
+ pub(crate) unk_14: u32,
+ }
+ default_zeroed!(FaultInfo);
+
+ #[versions(AGX)]
+ #[derive(Debug, Clone, Copy)]
+ #[repr(C, packed)]
+ pub(crate) struct GlobalsSub {
+ pub(crate) unk_54: u16,
+ pub(crate) unk_56: u16,
+ pub(crate) unk_58: u16,
+ pub(crate) unk_5a: U32,
+ pub(crate) unk_5e: U32,
+ pub(crate) unk_62: U32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_66_0: Array<0xc, u8>,
+
+ pub(crate) unk_66: U32,
+ pub(crate) unk_6a: Array<0x16, u8>,
+ }
+ #[versions(AGX)]
+ default_zeroed!(GlobalsSub::ver);
+
+ #[derive(Debug, Clone, Copy)]
+ #[repr(C)]
+ pub(crate) struct PowerZoneGlobal {
+ pub(crate) target: u32,
+ pub(crate) target_off: u32,
+ pub(crate) filter_tc: u32,
+ }
+ default_zeroed!(PowerZoneGlobal);
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct Globals {
+ pub(crate) ktrace_enable: u32,
+ pub(crate) unk_4: Array<0x20, u8>,
+
+ #[ver(V >= V13_2)]
+ pub(crate) unk_24_0: u32,
+
+ pub(crate) unk_24: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_28_0: u32,
+
+ pub(crate) unk_28: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_2c_0: u32,
+
+ pub(crate) unk_2c: u32,
+ pub(crate) unk_30: u32,
+ pub(crate) unk_34: u32,
+ pub(crate) unk_38: Array<0x1c, u8>,
+
+ pub(crate) sub: GlobalsSub::ver,
+
+ pub(crate) unk_80: Array<0xf80, u8>,
+ pub(crate) unk_1000: Array<0x7000, u8>,
+ pub(crate) unk_8000: Array<0x900, u8>,
+
+ #[ver(V >= V13_0B4 && V < V13_2)]
+ pub(crate) unk_8900_0: u32,
+
+ pub(crate) unk_8900: u32,
+ pub(crate) pending_submissions: AtomicU32,
+ pub(crate) max_power: u32,
+ pub(crate) max_pstate_scaled: u32,
+ pub(crate) max_pstate_scaled_2: u32,
+ pub(crate) unk_8914: u32,
+ pub(crate) unk_8918: u32,
+ pub(crate) max_pstate_scaled_3: u32,
+ pub(crate) unk_8920: u32,
+ pub(crate) power_zone_count: u32,
+ pub(crate) avg_power_filter_tc_periods: u32,
+ pub(crate) avg_power_ki_dt: F32,
+ pub(crate) avg_power_kp: F32,
+ pub(crate) avg_power_min_duty_cycle: u32,
+ pub(crate) avg_power_target_filter_tc: u32,
+ pub(crate) power_zones: Array<5, PowerZoneGlobal>,
+ pub(crate) unk_8978: Array<0x44, u8>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_89bc_0: Array<0x3c, u8>,
+
+ pub(crate) unk_89bc: u32,
+ pub(crate) fast_die0_release_temp: u32,
+ pub(crate) unk_89c4: i32,
+ pub(crate) fast_die0_prop_tgt_delta: u32,
+ pub(crate) fast_die0_kp: F32,
+ pub(crate) fast_die0_ki_dt: F32,
+ pub(crate) unk_89d4: Array<0xc, u8>,
+ pub(crate) unk_89e0: u32,
+ pub(crate) max_power_2: u32,
+ pub(crate) ppm_kp: F32,
+ pub(crate) ppm_ki_dt: F32,
+ pub(crate) unk_89f0: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_89f4_0: Array<0x8, u8>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_89f4_8: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_89f4_c: Array<0x50, u8>,
+
+ pub(crate) unk_89f4: u32,
+ pub(crate) hws1: HwDataShared1,
+ pub(crate) hws2: HwDataShared2,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_hws2_0: Array<0x28, u8>,
+
+ pub(crate) hws3: HwDataShared3,
+ pub(crate) unk_9004: Array<8, u8>,
+ pub(crate) unk_900c: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_9010_0: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_9010_4: Array<0x14, u8>,
+
+ pub(crate) unk_9010: Array<0x2c, u8>,
+ pub(crate) unk_903c: u32,
+ pub(crate) unk_9040: Array<0xc0, u8>,
+ pub(crate) unk_9100: Array<0x6f00, u8>,
+ pub(crate) unk_10000: Array<0xe50, u8>,
+ pub(crate) unk_10e50: u32,
+ pub(crate) unk_10e54: Array<0x2c, u8>,
+ pub(crate) fault_control: u32,
+ pub(crate) do_init: u32,
+ pub(crate) unk_10e88: Array<0x188, u8>,
+ pub(crate) idle_ts: U64,
+ pub(crate) idle_unk: U64,
+ pub(crate) unk_11020: u32,
+ pub(crate) unk_11024: u32,
+ pub(crate) unk_11028: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_1102c_0: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_1102c_4: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_1102c_8: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_1102c_c: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_1102c_10: u32,
+
+ pub(crate) unk_1102c: u32,
+ pub(crate) idle_off_delay_ms: AtomicU32,
+ pub(crate) fender_idle_off_delay_ms: u32,
+ pub(crate) fw_early_wake_timeout_ms: u32,
+ pub(crate) pending_stamps: Array<0x110, PendingStamp>,
+ pub(crate) unk_117bc: u32,
+ pub(crate) fault_info: FaultInfo,
+ pub(crate) counter: u32,
+ pub(crate) unk_118dc: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_118e0_0: Array<0x9c, u8>,
+
+ pub(crate) unk_118e0: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_118e4_0: u32,
+
+ pub(crate) unk_118e4: u32,
+ pub(crate) unk_118e8: u32,
+ pub(crate) unk_118ec: Array<0x15, u8>,
+ pub(crate) unk_11901: Array<0x43f, u8>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_11d40: Array<0x19c, u8>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_11edc: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_11ee0: Array<0x1c, u8>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_11efc: u32,
+ }
+ #[versions(AGX)]
+ default_zeroed!(Globals::ver);
+
+ #[derive(Debug, Default, Clone, Copy)]
+ #[repr(C, packed)]
+ pub(crate) struct UatLevelInfo {
+ pub(crate) unk_3: u8,
+ pub(crate) unk_1: u8,
+ pub(crate) unk_2: u8,
+ pub(crate) index_shift: u8,
+ pub(crate) num_entries: u16,
+ pub(crate) unk_4: u16,
+ pub(crate) unk_8: U64,
+ pub(crate) unk_10: U64,
+ pub(crate) index_mask: U64,
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct InitData<'a> {
+ #[ver(V >= V13_0B4)]
+ pub(crate) ver_info: Array<0x4, u16>,
+
+ pub(crate) unk_buf: GpuPointer<'a, &'a [u8]>,
+ pub(crate) unk_8: u32,
+ pub(crate) unk_c: u32,
+ pub(crate) runtime_pointers: GpuPointer<'a, super::RuntimePointers::ver>,
+ pub(crate) globals: GpuPointer<'a, super::Globals::ver>,
+ pub(crate) fw_status: GpuPointer<'a, super::FwStatus>,
+ pub(crate) uat_page_size: u16,
+ pub(crate) uat_page_bits: u8,
+ pub(crate) uat_num_levels: u8,
+ pub(crate) uat_level_info: Array<0x3, UatLevelInfo>,
+ pub(crate) __pad0: Pad<0x14>,
+ pub(crate) host_mapped_fw_allocations: u32,
+ pub(crate) unk_ac: u32,
+ pub(crate) unk_b0: u32,
+ pub(crate) unk_b4: u32,
+ pub(crate) unk_b8: u32,
+ }
+}
+
+#[derive(Debug)]
+pub(crate) struct ChannelRing<T: GpuStruct + Debug + Default, U: Copy>
+where
+ for<'a> <T as GpuStruct>::Raw<'a>: Debug,
+{
+ pub(crate) state: GpuObject<T>,
+ pub(crate) ring: GpuArray<U>,
+}
+
+impl<T: GpuStruct + Debug + Default, U: Copy> ChannelRing<T, U>
+where
+ for<'a> <T as GpuStruct>::Raw<'a>: Debug,
+{
+ pub(crate) fn to_raw(&self) -> raw::ChannelRing<T, U> {
+ raw::ChannelRing {
+ state: Some(self.state.weak_pointer()),
+ ring: Some(self.ring.weak_pointer()),
+ }
+ }
+}
+
+trivial_gpustruct!(FwStatus);
+
+#[versions(AGX)]
+#[derive(Debug, Default)]
+pub(crate) struct GpuGlobalStatsVtx {}
+
+#[versions(AGX)]
+impl GpuStruct for GpuGlobalStatsVtx::ver {
+ type Raw<'a> = raw::GpuGlobalStatsVtx::ver;
+}
+
+#[versions(AGX)]
+#[derive(Debug, Default)]
+pub(crate) struct GpuGlobalStatsFrag {}
+
+#[versions(AGX)]
+impl GpuStruct for GpuGlobalStatsFrag::ver {
+ type Raw<'a> = raw::GpuGlobalStatsFrag::ver;
+}
+
+#[derive(Debug, Default)]
+pub(crate) struct GpuStatsComp {}
+
+impl GpuStruct for GpuStatsComp {
+ type Raw<'a> = raw::GpuStatsComp;
+}
+
+#[versions(AGX)]
+#[derive(Debug, Default)]
+pub(crate) struct HwDataA {}
+
+#[versions(AGX)]
+impl GpuStruct for HwDataA::ver {
+ type Raw<'a> = raw::HwDataA::ver;
+}
+
+#[versions(AGX)]
+#[derive(Debug, Default)]
+pub(crate) struct HwDataB {}
+
+#[versions(AGX)]
+impl GpuStruct for HwDataB::ver {
+ type Raw<'a> = raw::HwDataB::ver;
+}
+
+#[versions(AGX)]
+#[derive(Debug)]
+pub(crate) struct Stats {
+ pub(crate) vtx: GpuObject<GpuGlobalStatsVtx::ver>,
+ pub(crate) frag: GpuObject<GpuGlobalStatsFrag::ver>,
+ pub(crate) comp: GpuObject<GpuStatsComp>,
+}
+
+#[versions(AGX)]
+#[derive(Debug)]
+pub(crate) struct RuntimePointers {
+ pub(crate) stats: Stats::ver,
+
+ pub(crate) hwdata_a: GpuObject<HwDataA::ver>,
+ pub(crate) unkptr_190: GpuArray<u8>,
+ pub(crate) unkptr_198: GpuArray<u8>,
+ pub(crate) hwdata_b: GpuObject<HwDataB::ver>,
+
+ pub(crate) unkptr_1b8: GpuArray<u8>,
+ pub(crate) unkptr_1c0: GpuArray<u8>,
+ pub(crate) unkptr_1c8: GpuArray<u8>,
+
+ pub(crate) buffer_mgr_ctl: GpuArray<raw::BufferMgrCtl>,
+}
+
+#[versions(AGX)]
+impl GpuStruct for RuntimePointers::ver {
+ type Raw<'a> = raw::RuntimePointers::ver<'a>;
+}
+
+#[versions(AGX)]
+#[derive(Debug, Default)]
+pub(crate) struct Globals {}
+
+#[versions(AGX)]
+impl GpuStruct for Globals::ver {
+ type Raw<'a> = raw::Globals::ver;
+}
+
+#[versions(AGX)]
+#[derive(Debug)]
+pub(crate) struct InitData {
+ pub(crate) unk_buf: GpuArray<u8>,
+ pub(crate) runtime_pointers: GpuObject<RuntimePointers::ver>,
+ pub(crate) globals: GpuObject<Globals::ver>,
+ pub(crate) fw_status: GpuObject<FwStatus>,
+}
+
+#[versions(AGX)]
+impl GpuStruct for InitData::ver {
+ type Raw<'a> = raw::InitData::ver<'a>;
+}
diff --git a/drivers/gpu/drm/asahi/fw/job.rs b/drivers/gpu/drm/asahi/fw/job.rs
new file mode 100644
index 000000000000..a0bbf67b1b1d
--- /dev/null
+++ b/drivers/gpu/drm/asahi/fw/job.rs
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Common GPU job firmware structures
+
+use super::types::*;
+use crate::{default_zeroed, trivial_gpustruct};
+
+pub(crate) mod raw {
+ use super::*;
+
+ #[derive(Debug, Clone, Copy)]
+ #[repr(C)]
+ pub(crate) struct JobMeta {
+ pub(crate) unk_4: u32,
+ pub(crate) stamp: GpuWeakPointer<Stamp>,
+ pub(crate) fw_stamp: GpuWeakPointer<FwStamp>,
+ pub(crate) stamp_value: EventValue,
+ pub(crate) stamp_slot: u32,
+ pub(crate) evctl_index: u32,
+ pub(crate) flush_stamps: u32,
+ pub(crate) uuid: u32,
+ pub(crate) cmd_seq: u32,
+ }
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct EncoderParams<'a> {
+ pub(crate) unk_8: u32,
+ pub(crate) unk_c: u32,
+ pub(crate) unk_10: u32,
+ pub(crate) encoder_id: u32,
+ pub(crate) unk_18: u32,
+ pub(crate) iogpu_compute_unk44: u32,
+ pub(crate) seq_buffer: GpuPointer<'a, &'a [u64]>,
+ pub(crate) unk_28: U64,
+ }
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct JobTimestamps {
+ pub(crate) start: AtomicU64,
+ pub(crate) end: AtomicU64,
+ }
+ default_zeroed!(JobTimestamps);
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct RenderTimestamps {
+ pub(crate) vtx: JobTimestamps,
+ pub(crate) frag: JobTimestamps,
+ }
+ default_zeroed!(RenderTimestamps);
+}
+
+trivial_gpustruct!(JobTimestamps);
+trivial_gpustruct!(RenderTimestamps);
diff --git a/drivers/gpu/drm/asahi/fw/microseq.rs b/drivers/gpu/drm/asahi/fw/microseq.rs
new file mode 100644
index 000000000000..8deea3fb9914
--- /dev/null
+++ b/drivers/gpu/drm/asahi/fw/microseq.rs
@@ -0,0 +1,384 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU firmware microsequence operations
+
+use super::types::*;
+use super::{buffer, compute, fragment, initdata, vertex, workqueue};
+use crate::default_zeroed;
+
+pub(crate) trait Operation {}
+
+#[derive(Debug, Copy, Clone)]
+#[repr(u32)]
+enum OpCode {
+ WaitForIdle = 0x01,
+ RetireStamp = 0x18,
+ #[allow(dead_code)]
+ Timestamp = 0x19,
+ StartVertex = 0x22,
+ FinalizeVertex = 0x23,
+ StartFragment = 0x24,
+ FinalizeFragment = 0x25,
+ StartCompute = 0x29,
+ FinalizeCompute = 0x2a,
+}
+
+#[derive(Debug, Copy, Clone)]
+#[repr(u32)]
+pub(crate) enum Pipe {
+ Vertex = 1 << 0,
+ Fragment = 1 << 8,
+ Compute = 1 << 15,
+}
+
+pub(crate) const MAX_ATTACHMENTS: usize = 16;
+
+#[derive(Debug, Clone, Copy)]
+#[repr(C)]
+pub(crate) struct Attachment {
+ pub(crate) address: U64,
+ pub(crate) size: u32,
+ pub(crate) unk_c: u16,
+ pub(crate) unk_e: u16,
+}
+default_zeroed!(Attachment);
+
+#[derive(Debug, Clone, Copy, Default)]
+#[repr(C)]
+pub(crate) struct Attachments {
+ pub(crate) list: Array<MAX_ATTACHMENTS, Attachment>,
+ pub(crate) count: u32,
+}
+
+#[derive(Debug, Copy, Clone)]
+#[repr(transparent)]
+pub(crate) struct OpHeader(u32);
+
+impl OpHeader {
+ const fn new(opcode: OpCode) -> OpHeader {
+ OpHeader(opcode as u32)
+ }
+ const fn with_args(opcode: OpCode, args: u32) -> OpHeader {
+ OpHeader(opcode as u32 | args)
+ }
+}
+
+macro_rules! simple_op {
+ ($name:ident) => {
+ #[derive(Debug, Copy, Clone)]
+ pub(crate) struct $name(OpHeader);
+
+ impl $name {
+ pub(crate) const HEADER: $name = $name(OpHeader::new(OpCode::$name));
+ }
+ };
+}
+
+pub(crate) mod op {
+ use super::*;
+
+ simple_op!(StartVertex);
+ simple_op!(FinalizeVertex);
+ simple_op!(StartFragment);
+ simple_op!(FinalizeFragment);
+ simple_op!(StartCompute);
+ simple_op!(FinalizeCompute);
+
+ #[derive(Debug, Copy, Clone)]
+ pub(crate) struct RetireStamp(OpHeader);
+ impl RetireStamp {
+ pub(crate) const HEADER: RetireStamp =
+ RetireStamp(OpHeader::with_args(OpCode::RetireStamp, 0x40000000));
+ }
+
+ #[derive(Debug, Copy, Clone)]
+ pub(crate) struct WaitForIdle(OpHeader);
+ impl WaitForIdle {
+ pub(crate) const fn new(pipe: Pipe) -> WaitForIdle {
+ WaitForIdle(OpHeader::with_args(OpCode::WaitForIdle, (pipe as u32) << 8))
+ }
+ }
+
+ #[derive(Debug, Copy, Clone)]
+ pub(crate) struct Timestamp(OpHeader);
+ impl Timestamp {
+ #[allow(dead_code)]
+ pub(crate) const fn new(flag: bool) -> Timestamp {
+ Timestamp(OpHeader::with_args(OpCode::Timestamp, (flag as u32) << 31))
+ }
+ }
+}
+
+#[derive(Debug)]
+#[repr(C)]
+pub(crate) struct WaitForIdle {
+ pub(crate) header: op::WaitForIdle,
+}
+
+impl Operation for WaitForIdle {}
+
+#[derive(Debug)]
+#[repr(C)]
+pub(crate) struct RetireStamp {
+ pub(crate) header: op::RetireStamp,
+}
+
+impl Operation for RetireStamp {}
+
+#[versions(AGX)]
+#[derive(Debug)]
+#[repr(C)]
+pub(crate) struct Timestamp<'a> {
+ pub(crate) header: op::Timestamp,
+ pub(crate) cur_ts: GpuWeakPointer<U64>,
+ pub(crate) start_ts: GpuWeakPointer<Option<GpuPointer<'a, AtomicU64>>>,
+ pub(crate) update_ts: GpuWeakPointer<Option<GpuPointer<'a, AtomicU64>>>,
+ pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>,
+ pub(crate) unk_24: U64,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_ts: GpuWeakPointer<U64>,
+
+ pub(crate) uuid: u32,
+ pub(crate) unk_30_padding: u32,
+}
+
+#[versions(AGX)]
+impl<'a> Operation for Timestamp::ver<'a> {}
+
+#[versions(AGX)]
+#[derive(Debug)]
+#[repr(C)]
+pub(crate) struct StartVertex<'a> {
+ pub(crate) header: op::StartVertex,
+ pub(crate) tiling_params: GpuWeakPointer<vertex::raw::TilingParameters>,
+ pub(crate) job_params1: GpuWeakPointer<vertex::raw::JobParameters1::ver<'a>>,
+ pub(crate) buffer: GpuWeakPointer<buffer::Info::ver>,
+ pub(crate) scene: GpuWeakPointer<buffer::Scene::ver>,
+ pub(crate) stats: GpuWeakPointer<initdata::raw::GpuStatsVtx::ver>,
+ pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>,
+ pub(crate) vm_slot: u32,
+ pub(crate) unk_38: u32,
+ pub(crate) event_generation: u32,
+ pub(crate) buffer_slot: u32,
+ pub(crate) unk_44: u32,
+ pub(crate) cmd_seq: U64,
+ pub(crate) unk_50: u32,
+ pub(crate) unk_pointer: GpuWeakPointer<u32>,
+ pub(crate) unk_job_buf: GpuWeakPointer<U64>,
+ pub(crate) unk_64: u32,
+ pub(crate) unk_68: u32,
+ pub(crate) uuid: u32,
+ pub(crate) unk_70: u32,
+ pub(crate) unk_74: Array<0x1d, U64>,
+ pub(crate) unk_15c: u32,
+ pub(crate) unk_160: U64,
+ pub(crate) unk_168: u32,
+ pub(crate) unk_16c: u32,
+ pub(crate) unk_170: U64,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) counter: U64,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) notifier_buf: GpuWeakPointer<Array<0x8, u8>>,
+
+ pub(crate) unk_178: u32,
+}
+
+#[versions(AGX)]
+impl<'a> Operation for StartVertex::ver<'a> {}
+
+#[versions(AGX)]
+#[derive(Debug)]
+#[repr(C)]
+pub(crate) struct FinalizeVertex {
+ pub(crate) header: op::FinalizeVertex,
+ pub(crate) scene: GpuWeakPointer<buffer::Scene::ver>,
+ pub(crate) buffer: GpuWeakPointer<buffer::Info::ver>,
+ pub(crate) stats: GpuWeakPointer<initdata::raw::GpuStatsVtx::ver>,
+ pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>,
+ pub(crate) vm_slot: u32,
+ pub(crate) unk_28: u32,
+ pub(crate) unk_pointer: GpuWeakPointer<u32>,
+ pub(crate) unk_34: u32,
+ pub(crate) uuid: u32,
+ pub(crate) fw_stamp: GpuWeakPointer<FwStamp>,
+ pub(crate) stamp_value: EventValue,
+ pub(crate) unk_48: U64,
+ pub(crate) unk_50: u32,
+ pub(crate) unk_54: u32,
+ pub(crate) unk_58: U64,
+ pub(crate) unk_60: u32,
+ pub(crate) unk_64: u32,
+ pub(crate) unk_68: u32,
+
+ #[ver(G >= G14 && V < V13_0B4)]
+ pub(crate) unk_68_g14: U64,
+
+ pub(crate) restart_branch_offset: i32,
+ pub(crate) unk_70: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_74: Array<0x10, u8>,
+}
+
+#[versions(AGX)]
+impl Operation for FinalizeVertex::ver {}
+
+#[versions(AGX)]
+#[derive(Debug)]
+#[repr(C)]
+pub(crate) struct StartFragment<'a> {
+ pub(crate) header: op::StartFragment,
+ pub(crate) job_params2: GpuWeakPointer<fragment::raw::JobParameters2>,
+ pub(crate) job_params1: GpuWeakPointer<fragment::raw::JobParameters1::ver<'a>>,
+ pub(crate) scene: GpuPointer<'a, buffer::Scene::ver>,
+ pub(crate) stats: GpuWeakPointer<initdata::raw::GpuStatsFrag::ver>,
+ pub(crate) busy_flag: GpuWeakPointer<u32>,
+ pub(crate) tvb_overflow_count: GpuWeakPointer<u32>,
+ pub(crate) unk_pointer: GpuWeakPointer<u32>,
+ pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>,
+ pub(crate) work_item: GpuWeakPointer<fragment::RunFragment::ver>,
+ pub(crate) vm_slot: u32,
+ pub(crate) unk_50: u32,
+ pub(crate) event_generation: u32,
+ pub(crate) buffer_slot: u32,
+ pub(crate) unk_5c: u32,
+ pub(crate) cmd_seq: U64,
+ pub(crate) unk_68: u32,
+ pub(crate) unk_758_flag: GpuWeakPointer<u32>,
+ pub(crate) unk_job_buf: GpuWeakPointer<U64>,
+ pub(crate) unk_7c: u32,
+ pub(crate) unk_80: u32,
+ pub(crate) unk_84: u32,
+ pub(crate) uuid: u32,
+ pub(crate) attachments: Attachments,
+ pub(crate) unk_190: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) counter: U64,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) notifier_buf: GpuWeakPointer<Array<0x8, u8>>,
+}
+
+#[versions(AGX)]
+impl<'a> Operation for StartFragment::ver<'a> {}
+
+#[versions(AGX)]
+#[derive(Debug)]
+#[repr(C)]
+pub(crate) struct FinalizeFragment {
+ pub(crate) header: op::FinalizeFragment,
+ pub(crate) uuid: u32,
+ pub(crate) unk_8: u32,
+ pub(crate) fw_stamp: GpuWeakPointer<FwStamp>,
+ pub(crate) stamp_value: EventValue,
+ pub(crate) unk_18: u32,
+ pub(crate) scene: GpuWeakPointer<buffer::Scene::ver>,
+ pub(crate) buffer: GpuWeakPointer<buffer::Info::ver>,
+ pub(crate) unk_2c: U64,
+ pub(crate) stats: GpuWeakPointer<initdata::raw::GpuStatsFrag::ver>,
+ pub(crate) unk_pointer: GpuWeakPointer<u32>,
+ pub(crate) busy_flag: GpuWeakPointer<u32>,
+ pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>,
+ pub(crate) work_item: GpuWeakPointer<fragment::RunFragment::ver>,
+ pub(crate) vm_slot: u32,
+ pub(crate) unk_60: u32,
+ pub(crate) unk_758_flag: GpuWeakPointer<u32>,
+ pub(crate) unk_6c: U64,
+ pub(crate) unk_74: U64,
+ pub(crate) unk_7c: U64,
+ pub(crate) unk_84: U64,
+ pub(crate) unk_8c: U64,
+
+ #[ver(G == G14 && V < V13_0B4)]
+ pub(crate) unk_8c_g14: U64,
+
+ pub(crate) restart_branch_offset: i32,
+ pub(crate) unk_98: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_9c: Array<0x10, u8>,
+}
+
+#[versions(AGX)]
+impl Operation for FinalizeFragment::ver {}
+
+#[versions(AGX)]
+#[derive(Debug)]
+#[repr(C)]
+pub(crate) struct StartCompute<'a> {
+ pub(crate) header: op::StartCompute,
+ pub(crate) unk_pointer: GpuWeakPointer<Array<0x54, u8>>,
+ pub(crate) job_params1: GpuWeakPointer<compute::raw::JobParameters1<'a>>,
+ pub(crate) stats: GpuWeakPointer<initdata::GpuStatsComp>,
+ pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>,
+ pub(crate) vm_slot: u32,
+ pub(crate) unk_28: u32,
+ pub(crate) event_generation: u32,
+ pub(crate) cmd_seq: U64,
+ pub(crate) unk_38: u32,
+ pub(crate) job_params2: GpuWeakPointer<compute::raw::JobParameters2::ver<'a>>,
+ pub(crate) unk_44: u32,
+ pub(crate) uuid: u32,
+ pub(crate) attachments: Attachments,
+ pub(crate) padding: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_flag: GpuWeakPointer<U32>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) counter: U64,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) notifier_buf: GpuWeakPointer<Array<0x8, u8>>,
+}
+
+#[versions(AGX)]
+impl<'a> Operation for StartCompute::ver<'a> {}
+
+#[versions(AGX)]
+#[derive(Debug)]
+#[repr(C)]
+pub(crate) struct FinalizeCompute<'a> {
+ pub(crate) header: op::FinalizeCompute,
+ pub(crate) stats: GpuWeakPointer<initdata::GpuStatsComp>,
+ pub(crate) work_queue: GpuWeakPointer<workqueue::QueueInfo::ver>,
+ pub(crate) vm_slot: u32,
+ #[ver(V < V13_0B4)]
+ pub(crate) unk_18: u32,
+ pub(crate) job_params2: GpuWeakPointer<compute::raw::JobParameters2::ver<'a>>,
+ pub(crate) unk_24: u32,
+ pub(crate) uuid: u32,
+ pub(crate) fw_stamp: GpuWeakPointer<FwStamp>,
+ pub(crate) stamp_value: EventValue,
+ pub(crate) unk_38: u32,
+ pub(crate) unk_3c: u32,
+ pub(crate) unk_40: u32,
+ pub(crate) unk_44: u32,
+ pub(crate) unk_48: u32,
+ pub(crate) unk_4c: u32,
+ pub(crate) unk_50: u32,
+ pub(crate) unk_54: u32,
+ pub(crate) unk_58: u32,
+
+ #[ver(G == G14 && V < V13_0B4)]
+ pub(crate) unk_5c_g14: U64,
+
+ pub(crate) restart_branch_offset: i32,
+ pub(crate) unk_60: u32,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_64: Array<0xd, u8>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_flag: GpuWeakPointer<U32>,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_79: Array<0x7, u8>,
+}
+
+#[versions(AGX)]
+impl<'a> Operation for FinalizeCompute::ver<'a> {}
diff --git a/drivers/gpu/drm/asahi/fw/mod.rs b/drivers/gpu/drm/asahi/fw/mod.rs
new file mode 100644
index 000000000000..a5649aa20d3a
--- /dev/null
+++ b/drivers/gpu/drm/asahi/fw/mod.rs
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Firmware structures for Apple AGX GPUs
+
+pub(crate) mod buffer;
+pub(crate) mod channels;
+pub(crate) mod compute;
+pub(crate) mod event;
+pub(crate) mod fragment;
+pub(crate) mod initdata;
+pub(crate) mod job;
+pub(crate) mod microseq;
+pub(crate) mod types;
+pub(crate) mod vertex;
+pub(crate) mod workqueue;
diff --git a/drivers/gpu/drm/asahi/fw/types.rs b/drivers/gpu/drm/asahi/fw/types.rs
new file mode 100644
index 000000000000..c1a07be1e047
--- /dev/null
+++ b/drivers/gpu/drm/asahi/fw/types.rs
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Common types for firmware structure definitions
+
+use crate::{alloc, object};
+use core::fmt;
+use core::ops::{Deref, DerefMut, Index, IndexMut};
+
+pub(crate) use crate::event::EventValue;
+pub(crate) use crate::object::{GpuPointer, GpuStruct, GpuWeakPointer};
+pub(crate) use crate::{f32, float::F32};
+
+pub(crate) use ::alloc::boxed::Box;
+pub(crate) use core::fmt::Debug;
+pub(crate) use core::marker::PhantomData;
+pub(crate) use core::sync::atomic::{AtomicI32, AtomicU32, AtomicU64};
+pub(crate) use kernel::macros::versions;
+
+// Make the trait visible
+pub(crate) use crate::alloc::Allocator as _Allocator;
+
+/// General allocator type used for the driver
+pub(crate) type Allocator = alloc::DefaultAllocator;
+
+/// General GpuObject type used for the driver
+pub(crate) type GpuObject<T> =
+ object::GpuObject<T, alloc::GenericAlloc<T, alloc::DefaultAllocation>>;
+
+/// General GpuArray type used for the driver
+pub(crate) type GpuArray<T> = object::GpuArray<T, alloc::GenericAlloc<T, alloc::DefaultAllocation>>;
+
+/// General GpuOnlyArray type used for the driver
+pub(crate) type GpuOnlyArray<T> =
+ object::GpuOnlyArray<T, alloc::GenericAlloc<T, alloc::DefaultAllocation>>;
+
+/// A stamp slot that is shared between firmware and the driver.
+#[derive(Debug, Default)]
+#[repr(transparent)]
+pub(crate) struct Stamp(pub(crate) AtomicU32);
+
+/// A stamp slot that is for private firmware use.
+///
+/// This is a separate type to guard against pointer type confusion.
+#[derive(Debug, Default)]
+#[repr(transparent)]
+pub(crate) struct FwStamp(pub(crate) AtomicU32);
+
+/// An unaligned u64 type.
+///
+/// This is useful to avoid having to pack firmware structures entirely, since that is incompatible
+/// with `#[derive(Debug)]` and atomics.
+#[derive(Copy, Clone, Default)]
+#[repr(C, packed(1))]
+pub(crate) struct U64(pub(crate) u64);
+
+unsafe impl Zeroed for U64 {}
+
+impl fmt::Debug for U64 {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let v = self.0;
+ f.write_fmt(format_args!("{:#x}", v))
+ }
+}
+
+/// An unaligned u32 type.
+///
+/// This is useful to avoid having to pack firmware structures entirely, since that is incompatible
+/// with `#[derive(Debug)]` and atomics.
+#[derive(Copy, Clone, Default)]
+#[repr(C, packed(1))]
+pub(crate) struct U32(pub(crate) u32);
+
+unsafe impl Zeroed for U32 {}
+
+impl fmt::Debug for U32 {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ let v = self.0;
+ f.write_fmt(format_args!("{:#x}", v))
+ }
+}
+
+unsafe impl Zeroed for u8 {}
+unsafe impl Zeroed for u16 {}
+unsafe impl Zeroed for u32 {}
+unsafe impl Zeroed for u64 {}
+unsafe impl Zeroed for i8 {}
+unsafe impl Zeroed for i16 {}
+unsafe impl Zeroed for i32 {}
+unsafe impl Zeroed for i64 {}
+
+/// Create a dummy `Debug` implementation, for when we need it but it's too painful to write by
+/// hand or not very useful.
+#[macro_export]
+macro_rules! no_debug {
+ ($type:ty) => {
+ impl Debug for $type {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ write!(f, "...")
+ }
+ }
+ };
+}
+
+/// Types which can be safely initialized with an all-zero bit pattern.
+///
+/// See: https://github.com/rust-lang/rfcs/issues/2626
+///
+/// # Safety
+///
+/// This trait must only be implemented if a type only contains primitive types which can be
+/// zero-initialized, FFI structs intended to be zero-initialized, or other types which impl Zeroed.
+pub(crate) unsafe trait Zeroed: Default {
+ fn zeroed() -> Self {
+ // SAFETY: The user is responsible for ensuring this is safe.
+ unsafe { core::mem::zeroed() }
+ }
+}
+
+/// Implement Zeroed for a given type (and Default along with it).
+///
+/// # Safety
+///
+/// This macro must only be used if a type only contains primitive types which can be
+/// zero-initialized, FFI structs intended to be zero-initialized, or other types which impl Zeroed.
+#[macro_export]
+macro_rules! default_zeroed {
+ (<$($lt:lifetime),*>, $type:ty) => {
+ impl<$($lt),*> Default for $type {
+ fn default() -> $type {
+ Zeroed::zeroed()
+ }
+ }
+ // SAFETY: The user is responsible for ensuring this is safe.
+ unsafe impl<$($lt),*> Zeroed for $type {}
+ };
+ ($type:ty) => {
+ impl Default for $type {
+ fn default() -> $type {
+ Zeroed::zeroed()
+ }
+ }
+ // SAFETY: The user is responsible for ensuring this is safe.
+ unsafe impl Zeroed for $type {}
+ };
+}
+
+/// A convenience type for a number of padding bytes. Hidden from Debug formatting.
+#[derive(Copy, Clone)]
+#[repr(C, packed)]
+pub(crate) struct Pad<const N: usize>([u8; N]);
+
+/// SAFETY: Primitive type, safe to zero-init.
+unsafe impl<const N: usize> Zeroed for Pad<N> {}
+
+impl<const N: usize> Default for Pad<N> {
+ fn default() -> Self {
+ Zeroed::zeroed()
+ }
+}
+
+impl<const N: usize> fmt::Debug for Pad<N> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ f.write_fmt(format_args!("<pad>"))
+ }
+}
+
+/// A convenience type for a fixed-sized array with Default/Zeroed impls.
+#[derive(Copy, Clone)]
+#[repr(C)]
+pub(crate) struct Array<const N: usize, T>([T; N]);
+
+impl<const N: usize, T> Array<N, T> {
+ pub(crate) fn new(data: [T; N]) -> Self {
+ Self(data)
+ }
+}
+
+// SAFETY: Arrays of Zeroed values can be safely Zeroed.
+unsafe impl<const N: usize, T: Zeroed> Zeroed for Array<N, T> {}
+
+impl<const N: usize, T: Zeroed> Default for Array<N, T> {
+ fn default() -> Self {
+ Zeroed::zeroed()
+ }
+}
+
+impl<const N: usize, T> Index<usize> for Array<N, T> {
+ type Output = T;
+
+ fn index(&self, index: usize) -> &Self::Output {
+ &self.0[index]
+ }
+}
+
+impl<const N: usize, T> IndexMut<usize> for Array<N, T> {
+ fn index_mut(&mut self, index: usize) -> &mut Self::Output {
+ &mut self.0[index]
+ }
+}
+
+impl<const N: usize, T> Deref for Array<N, T> {
+ type Target = [T; N];
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl<const N: usize, T> DerefMut for Array<N, T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.0
+ }
+}
+
+impl<const N: usize, T: Sized + fmt::Debug> fmt::Debug for Array<N, T> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ self.0.fmt(f)
+ }
+}
+
+/// Convenience macro to define an identically-named trivial GpuStruct with no inner fields for a
+/// given raw type name.
+#[macro_export]
+macro_rules! trivial_gpustruct {
+ ($type:ident) => {
+ #[derive(Debug, Default)]
+ pub(crate) struct $type {}
+
+ impl GpuStruct for $type {
+ type Raw<'a> = raw::$type;
+ }
+ };
+}
diff --git a/drivers/gpu/drm/asahi/fw/vertex.rs b/drivers/gpu/drm/asahi/fw/vertex.rs
new file mode 100644
index 000000000000..959a0913e693
--- /dev/null
+++ b/drivers/gpu/drm/asahi/fw/vertex.rs
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU vertex job firmware structures
+
+use super::types::*;
+use super::{event, job, workqueue};
+use crate::{buffer, fw, microseq, mmu};
+use kernel::sync::Arc;
+
+pub(crate) mod raw {
+ use super::*;
+
+ #[derive(Debug, Default, Copy, Clone)]
+ #[repr(C)]
+ pub(crate) struct TilingParameters {
+ pub(crate) rgn_size: u32,
+ pub(crate) unk_4: u32,
+ pub(crate) ppp_ctrl: u32,
+ pub(crate) x_max: u16,
+ pub(crate) y_max: u16,
+ pub(crate) te_screen: u32,
+ pub(crate) te_mtile1: u32,
+ pub(crate) te_mtile2: u32,
+ pub(crate) tiles_per_mtile: u32,
+ pub(crate) tpc_stride: u32,
+ pub(crate) unk_24: u32,
+ pub(crate) unk_28: u32,
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct JobParameters1<'a> {
+ pub(crate) unk_0: U64,
+ pub(crate) unk_8: F32,
+ pub(crate) unk_c: F32,
+ pub(crate) tvb_tilemap: GpuPointer<'a, &'a [u8]>,
+ #[ver(G < G14)]
+ pub(crate) tvb_cluster_tilemaps: Option<GpuPointer<'a, &'a [u8]>>,
+ pub(crate) tpc: GpuPointer<'a, &'a [u8]>,
+ pub(crate) tvb_heapmeta: GpuPointer<'a, &'a [u8]>,
+ pub(crate) iogpu_unk_54: u32,
+ pub(crate) iogpu_unk_55: u32,
+ pub(crate) iogpu_unk_56: U64,
+ #[ver(G < G14)]
+ pub(crate) tvb_cluster_meta1: Option<GpuPointer<'a, &'a [u8]>>,
+ pub(crate) utile_config: u32,
+ pub(crate) unk_4c: u32,
+ pub(crate) ppp_multisamplectl: U64,
+ pub(crate) tvb_heapmeta_2: GpuPointer<'a, &'a [u8]>,
+ #[ver(G < G14)]
+ pub(crate) unk_60: U64,
+ #[ver(G < G14)]
+ pub(crate) core_mask: Array<2, u32>,
+ pub(crate) preempt_buf1: GpuPointer<'a, &'a [u8]>,
+ pub(crate) preempt_buf2: GpuPointer<'a, &'a [u8]>,
+ pub(crate) unk_80: U64,
+ pub(crate) preempt_buf3: GpuPointer<'a, &'a [u8]>,
+ pub(crate) encoder_addr: U64,
+ #[ver(G < G14)]
+ pub(crate) tvb_cluster_meta2: Option<GpuPointer<'a, &'a [u8]>>,
+ #[ver(G < G14)]
+ pub(crate) tvb_cluster_meta3: Option<GpuPointer<'a, &'a [u8]>>,
+ #[ver(G < G14)]
+ pub(crate) tiling_control: u32,
+ #[ver(G < G14)]
+ pub(crate) unk_ac: u32,
+ pub(crate) unk_b0: Array<6, U64>,
+ pub(crate) pipeline_base: U64,
+ #[ver(G < G14)]
+ pub(crate) tvb_cluster_meta4: Option<GpuPointer<'a, &'a [u8]>>,
+ #[ver(G < G14)]
+ pub(crate) unk_f0: U64,
+ pub(crate) unk_f8: U64,
+ pub(crate) unk_100: Array<3, U64>,
+ pub(crate) unk_118: u32,
+ #[ver(G >= G14)]
+ pub(crate) __pad: Pad<{ 8 * 9 }>,
+ }
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct JobParameters2<'a> {
+ pub(crate) unk_480: Array<4, u32>,
+ pub(crate) unk_498: U64,
+ pub(crate) unk_4a0: u32,
+ pub(crate) preempt_buf1: GpuPointer<'a, &'a [u8]>,
+ pub(crate) unk_4ac: u32,
+ pub(crate) unk_4b0: U64,
+ pub(crate) unk_4b8: u32,
+ pub(crate) unk_4bc: U64,
+ pub(crate) unk_4c4_padding: Array<0x48, u8>,
+ pub(crate) unk_50c: u32,
+ pub(crate) unk_510: U64,
+ pub(crate) unk_518: U64,
+ pub(crate) unk_520: U64,
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct RunVertex<'a> {
+ pub(crate) tag: workqueue::CommandType,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) counter: U64,
+
+ pub(crate) vm_slot: u32,
+ pub(crate) unk_8: u32,
+ pub(crate) notifier: GpuPointer<'a, event::Notifier::ver>,
+ pub(crate) buffer_slot: u32,
+ pub(crate) unk_1c: u32,
+ pub(crate) buffer: GpuPointer<'a, fw::buffer::Info::ver>,
+ pub(crate) scene: GpuPointer<'a, fw::buffer::Scene::ver>,
+ pub(crate) unk_buffer_buf: GpuWeakPointer<[u8]>,
+ pub(crate) unk_34: u32,
+ pub(crate) job_params1: JobParameters1::ver<'a>,
+ pub(crate) unk_154: Array<0x268, u8>,
+ pub(crate) tiling_params: TilingParameters,
+ pub(crate) unk_3e8: Array<0x74, u8>,
+ pub(crate) tpc: GpuPointer<'a, &'a [u8]>,
+ pub(crate) tpc_size: U64,
+ pub(crate) microsequence: GpuPointer<'a, &'a [u8]>,
+ pub(crate) microsequence_size: u32,
+ pub(crate) fragment_stamp_slot: u32,
+ pub(crate) fragment_stamp_value: EventValue,
+ pub(crate) unk_pointee: u32,
+ pub(crate) unk_pad: u32,
+ pub(crate) job_params2: JobParameters2<'a>,
+ pub(crate) encoder_params: job::raw::EncoderParams<'a>,
+ pub(crate) unk_55c: u32,
+ pub(crate) unk_560: u32,
+ pub(crate) memoryless_rts_used: u32,
+ pub(crate) unk_568: u32,
+ pub(crate) unk_56c: u32,
+ pub(crate) meta: job::raw::JobMeta,
+ pub(crate) unk_after_meta: u32,
+ pub(crate) unk_buf_0: U64,
+ pub(crate) unk_buf_8: U64,
+ pub(crate) unk_buf_10: U64,
+ pub(crate) cur_ts: U64,
+ pub(crate) start_ts: Option<GpuPointer<'a, AtomicU64>>,
+ pub(crate) end_ts: Option<GpuPointer<'a, AtomicU64>>,
+ pub(crate) unk_5c4: u32,
+ pub(crate) unk_5c8: u32,
+ pub(crate) unk_5cc: u32,
+ pub(crate) unk_5d0: u32,
+ pub(crate) client_sequence: u8,
+ pub(crate) pad_5d5: Array<3, u8>,
+ pub(crate) unk_5d8: u32,
+ pub(crate) unk_5dc: u8,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_ts: U64,
+
+ #[ver(V >= V13_0B4)]
+ pub(crate) unk_5dd_8: Array<0x1b, u8>,
+ }
+}
+
+#[versions(AGX)]
+#[derive(Debug)]
+pub(crate) struct RunVertex {
+ pub(crate) notifier: Arc<GpuObject<event::Notifier::ver>>,
+ pub(crate) scene: Arc<buffer::Scene::ver>,
+ pub(crate) micro_seq: microseq::MicroSequence,
+ pub(crate) vm_bind: mmu::VmBind,
+ pub(crate) timestamps: Arc<GpuObject<job::RenderTimestamps>>,
+}
+
+#[versions(AGX)]
+impl GpuStruct for RunVertex::ver {
+ type Raw<'a> = raw::RunVertex::ver<'a>;
+}
+
+#[versions(AGX)]
+impl workqueue::Command for RunVertex::ver {}
diff --git a/drivers/gpu/drm/asahi/fw/workqueue.rs b/drivers/gpu/drm/asahi/fw/workqueue.rs
new file mode 100644
index 000000000000..e81025b6c014
--- /dev/null
+++ b/drivers/gpu/drm/asahi/fw/workqueue.rs
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU work queue firmware structes
+
+use super::event;
+use super::types::*;
+use crate::event::EventValue;
+use crate::{default_zeroed, trivial_gpustruct};
+use kernel::sync::Arc;
+
+#[derive(Debug)]
+#[repr(u32)]
+pub(crate) enum CommandType {
+ RunVertex = 0,
+ RunFragment = 1,
+ #[allow(dead_code)]
+ RunBlitter = 2,
+ RunCompute = 3,
+ Barrier = 4,
+ InitBuffer = 6,
+}
+
+pub(crate) trait Command: GpuStruct + Send + Sync {}
+
+pub(crate) mod raw {
+ use super::*;
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct Barrier {
+ pub(crate) tag: CommandType,
+ pub(crate) wait_stamp: GpuWeakPointer<FwStamp>,
+ pub(crate) wait_value: EventValue,
+ pub(crate) wait_slot: u32,
+ pub(crate) stamp_self: EventValue,
+ pub(crate) uuid: u32,
+ pub(crate) unk: u32,
+ }
+
+ #[derive(Debug, Clone, Copy)]
+ #[repr(C)]
+ pub(crate) struct GpuContextData {
+ pub(crate) unk_0: u8,
+ pub(crate) unk_1: u8,
+ unk_2: Array<0x2, u8>,
+ pub(crate) unk_4: u8,
+ pub(crate) unk_5: u8,
+ unk_6: Array<0x18, u8>,
+ pub(crate) unk_1e: u8,
+ pub(crate) unk_1f: u8,
+ unk_20: Array<0x3, u8>,
+ pub(crate) unk_23: u8,
+ unk_24: Array<0x1c, u8>,
+ }
+
+ impl Default for GpuContextData {
+ fn default() -> Self {
+ Self {
+ unk_0: 0xff,
+ unk_1: 0xff,
+ unk_2: Default::default(),
+ unk_4: 0,
+ unk_5: 1,
+ unk_6: Default::default(),
+ unk_1e: 0xff,
+ unk_1f: 0,
+ unk_20: Default::default(),
+ unk_23: 2,
+ unk_24: Default::default(),
+ }
+ }
+ }
+
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct RingState {
+ pub(crate) gpu_doneptr: AtomicU32,
+ __pad0: Pad<0xc>,
+ pub(crate) unk_10: AtomicU32,
+ __pad1: Pad<0xc>,
+ pub(crate) unk_20: AtomicU32,
+ __pad2: Pad<0xc>,
+ pub(crate) gpu_rptr: AtomicU32,
+ __pad3: Pad<0xc>,
+ pub(crate) cpu_wptr: AtomicU32,
+ __pad4: Pad<0xc>,
+ pub(crate) rb_size: u32,
+ __pad5: Pad<0xc>,
+ // This isn't part of the structure, but it's here as a
+ // debugging hack so we can inspect what ring position
+ // the driver considered complete and freeable.
+ pub(crate) cpu_freeptr: AtomicU32,
+ __pad6: Pad<0xc>,
+ }
+ default_zeroed!(RingState);
+
+ #[derive(Debug, Clone, Copy)]
+ #[repr(C)]
+ pub(crate) struct Priority(u32, u32, U64, u32, u32, u32);
+
+ pub(crate) const PRIORITY: [Priority; 4] = [
+ Priority(0, 0, U64(0xffff_ffff_ffff_0000), 1, 0, 1),
+ Priority(1, 1, U64(0xffff_ffff_0000_0000), 0, 0, 0),
+ Priority(2, 2, U64(0xffff_0000_0000_0000), 0, 0, 2),
+ Priority(3, 3, U64(0x0000_0000_0000_0000), 0, 0, 3),
+ ];
+
+ impl Default for Priority {
+ fn default() -> Priority {
+ PRIORITY[2]
+ }
+ }
+
+ #[versions(AGX)]
+ #[derive(Debug)]
+ #[repr(C)]
+ pub(crate) struct QueueInfo<'a> {
+ pub(crate) state: GpuPointer<'a, super::RingState>,
+ pub(crate) ring: GpuPointer<'a, &'a [u64]>,
+ pub(crate) notifier_list: GpuPointer<'a, event::NotifierList>,
+ pub(crate) gpu_buf: GpuPointer<'a, &'a [u8]>,
+ pub(crate) gpu_rptr1: AtomicU32,
+ pub(crate) gpu_rptr2: AtomicU32,
+ pub(crate) gpu_rptr3: AtomicU32,
+ pub(crate) event_id: AtomicI32,
+ pub(crate) priority: Priority,
+ pub(crate) unk_4c: i32,
+ pub(crate) uuid: u32,
+ pub(crate) unk_54: i32,
+ pub(crate) unk_58: U64,
+ pub(crate) busy: AtomicU32,
+ pub(crate) __pad: Pad<0x20>,
+ pub(crate) unk_84_state: AtomicU32,
+ pub(crate) unk_88: u32,
+ pub(crate) unk_8c: u32,
+ pub(crate) unk_90: u32,
+ pub(crate) unk_94: u32,
+ pub(crate) pending: AtomicU32,
+ pub(crate) unk_9c: u32,
+ #[ver(V >= V13_2)]
+ pub(crate) unk_a0_0: u32,
+ pub(crate) gpu_context: GpuPointer<'a, super::GpuContextData>,
+ pub(crate) unk_a8: U64,
+ #[ver(V >= V13_2)]
+ pub(crate) unk_b0: u32,
+ }
+}
+
+trivial_gpustruct!(Barrier);
+trivial_gpustruct!(GpuContextData);
+trivial_gpustruct!(RingState);
+
+impl Command for Barrier {}
+
+#[versions(AGX)]
+#[derive(Debug)]
+pub(crate) struct QueueInfo {
+ pub(crate) state: GpuObject<RingState>,
+ pub(crate) ring: GpuArray<u64>,
+ pub(crate) gpu_buf: GpuArray<u8>,
+ pub(crate) notifier_list: Arc<GpuObject<event::NotifierList>>,
+ pub(crate) gpu_context: Arc<crate::workqueue::GpuContext>,
+}
+
+#[versions(AGX)]
+impl GpuStruct for QueueInfo::ver {
+ type Raw<'a> = raw::QueueInfo::ver<'a>;
+}
diff --git a/drivers/gpu/drm/asahi/gem.rs b/drivers/gpu/drm/asahi/gem.rs
new file mode 100644
index 000000000000..b334bebb0e8e
--- /dev/null
+++ b/drivers/gpu/drm/asahi/gem.rs
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Asahi driver GEM object implementation
+//!
+//! Basic wrappers and adaptations between generic GEM shmem objects and this driver's
+//! view of what a GPU buffer object is. It is in charge of keeping track of all mappings for
+//! each GEM object so we can remove them when a client (File) or a Vm are destroyed, as well as
+//! implementing RTKit buffers on top of GEM objects for firmware use.
+
+use kernel::{
+ bindings,
+ drm::{gem, gem::shmem},
+ error::Result,
+ prelude::*,
+ soc::apple::rtkit,
+ sync::smutex::Mutex,
+};
+
+use kernel::drm::gem::BaseObject;
+
+use core::sync::atomic::{AtomicU64, Ordering};
+
+use crate::debug::*;
+use crate::driver::AsahiDevice;
+use crate::file::DrmFile;
+
+const DEBUG_CLASS: DebugFlags = DebugFlags::Gem;
+
+/// Represents the inner data of a GEM object for this driver.
+pub(crate) struct DriverObject {
+ /// Whether this is a kernel-created object.
+ kernel: bool,
+ /// Object creation flags.
+ flags: u32,
+ /// VM ID for VM-private objects.
+ vm_id: Option<u64>,
+ /// Locked list of mapping tuples: (file_id, vm_id, mapping)
+ mappings: Mutex<Vec<(u64, u64, crate::mmu::Mapping)>>,
+ /// ID for debug
+ id: u64,
+}
+
+/// Type alias for the shmem GEM object type for this driver.
+pub(crate) type Object = shmem::Object<DriverObject>;
+
+/// Type alias for the SGTable type for this driver.
+pub(crate) type SGTable = shmem::SGTable<DriverObject>;
+
+/// A shared reference to a GEM object for this driver.
+pub(crate) struct ObjectRef {
+ /// The underlying GEM object reference
+ pub(crate) gem: gem::ObjectRef<shmem::Object<DriverObject>>,
+ /// The kernel-side VMap of this object, if needed
+ vmap: Option<shmem::VMap<DriverObject>>,
+}
+
+static GEM_ID: AtomicU64 = AtomicU64::new(0);
+
+impl DriverObject {
+ /// Drop all object mappings for a given file ID.
+ ///
+ /// Used on file close.
+ fn drop_file_mappings(&self, file_id: u64) {
+ let mut mappings = self.mappings.lock();
+ for (index, (mapped_fid, _mapped_vmid, _mapping)) in mappings.iter().enumerate() {
+ if *mapped_fid == file_id {
+ mappings.swap_remove(index);
+ return;
+ }
+ }
+ }
+
+ /// Drop all object mappings for a given VM ID.
+ ///
+ /// Used on VM destroy.
+ fn drop_vm_mappings(&self, vm_id: u64) {
+ let mut mappings = self.mappings.lock();
+ for (index, (_mapped_fid, mapped_vmid, _mapping)) in mappings.iter().enumerate() {
+ if *mapped_vmid == vm_id {
+ mappings.swap_remove(index);
+ return;
+ }
+ }
+ }
+}
+
+impl ObjectRef {
+ /// Create a new wrapper for a raw GEM object reference.
+ pub(crate) fn new(gem: gem::ObjectRef<shmem::Object<DriverObject>>) -> ObjectRef {
+ ObjectRef { gem, vmap: None }
+ }
+
+ /// Return the `VMap` for this object, creating it if necessary.
+ pub(crate) fn vmap(&mut self) -> Result<&mut shmem::VMap<DriverObject>> {
+ if self.vmap.is_none() {
+ self.vmap = Some(self.gem.vmap()?);
+ }
+ Ok(self.vmap.as_mut().unwrap())
+ }
+
+ /// Return the IOVA of this object at which it is mapped in a given `Vm` identified by its ID,
+ /// if it is mapped in that `Vm`.
+ pub(crate) fn iova(&self, vm_id: u64) -> Option<usize> {
+ let mappings = self.gem.mappings.lock();
+ for (_mapped_fid, mapped_vmid, mapping) in mappings.iter() {
+ if *mapped_vmid == vm_id {
+ return Some(mapping.iova());
+ }
+ }
+
+ None
+ }
+
+ /// Returns the size of an object in bytes
+ pub(crate) fn size(&self) -> usize {
+ self.gem.size()
+ }
+
+ /// Maps an object into a given `Vm` at any free address.
+ ///
+ /// Returns Err(EBUSY) if there is already a mapping.
+ pub(crate) fn map_into(&mut self, vm: &crate::mmu::Vm) -> Result<usize> {
+ let vm_id = vm.id();
+
+ if self.gem.vm_id.is_some() && self.gem.vm_id != Some(vm_id) {
+ return Err(EINVAL);
+ }
+
+ let mut mappings = self.gem.mappings.lock();
+ for (_mapped_fid, mapped_vmid, _mapping) in mappings.iter() {
+ if *mapped_vmid == vm_id {
+ return Err(EBUSY);
+ }
+ }
+
+ let sgt = self.gem.sg_table()?;
+ let new_mapping = vm.map(self.gem.size(), sgt)?;
+
+ let iova = new_mapping.iova();
+ mappings.try_push((vm.file_id(), vm_id, new_mapping))?;
+ Ok(iova)
+ }
+
+ /// Maps an object into a given `Vm` at any free address within a given range.
+ ///
+ /// Returns Err(EBUSY) if there is already a mapping.
+ pub(crate) fn map_into_range(
+ &mut self,
+ vm: &crate::mmu::Vm,
+ start: u64,
+ end: u64,
+ alignment: u64,
+ prot: u32,
+ guard: bool,
+ ) -> Result<usize> {
+ let vm_id = vm.id();
+
+ if self.gem.vm_id.is_some() && self.gem.vm_id != Some(vm_id) {
+ return Err(EINVAL);
+ }
+
+ let mut mappings = self.gem.mappings.lock();
+ for (_mapped_fid, mapped_vmid, _mapping) in mappings.iter() {
+ if *mapped_vmid == vm_id {
+ return Err(EBUSY);
+ }
+ }
+
+ let sgt = self.gem.sg_table()?;
+ let new_mapping =
+ vm.map_in_range(self.gem.size(), sgt, alignment, start, end, prot, guard)?;
+
+ let iova = new_mapping.iova();
+ mappings.try_push((vm.file_id(), vm_id, new_mapping))?;
+ Ok(iova)
+ }
+
+ /// Maps an object into a given `Vm` at a specific address.
+ ///
+ /// Returns Err(EBUSY) if there is already a mapping.
+ /// Returns Err(ENOSPC) if the requested address is already busy.
+ pub(crate) fn map_at(
+ &mut self,
+ vm: &crate::mmu::Vm,
+ addr: u64,
+ prot: u32,
+ guard: bool,
+ ) -> Result {
+ let vm_id = vm.id();
+
+ if self.gem.vm_id.is_some() && self.gem.vm_id != Some(vm_id) {
+ return Err(EINVAL);
+ }
+
+ let mut mappings = self.gem.mappings.lock();
+ for (_mapped_fid, mapped_vmid, _mapping) in mappings.iter() {
+ if *mapped_vmid == vm_id {
+ return Err(EBUSY);
+ }
+ }
+
+ let sgt = self.gem.sg_table()?;
+ let new_mapping = vm.map_at(addr, self.gem.size(), sgt, prot, guard)?;
+
+ let iova = new_mapping.iova();
+ assert!(iova == addr as usize);
+ mappings.try_push((vm.file_id(), vm_id, new_mapping))?;
+ Ok(())
+ }
+
+ /// Drop all mappings for this object owned by a given `Vm` identified by its ID.
+ pub(crate) fn drop_vm_mappings(&mut self, vm_id: u64) {
+ self.gem.drop_vm_mappings(vm_id);
+ }
+
+ /// Drop all mappings for this object owned by a given `File` identified by its ID.
+ pub(crate) fn drop_file_mappings(&mut self, file_id: u64) {
+ self.gem.drop_file_mappings(file_id);
+ }
+}
+
+/// Create a new kernel-owned GEM object.
+pub(crate) fn new_kernel_object(dev: &AsahiDevice, size: usize) -> Result<ObjectRef> {
+ let mut gem = shmem::Object::<DriverObject>::new(dev, size)?;
+ gem.kernel = true;
+ gem.flags = 0;
+
+ gem.set_exportable(false);
+
+ mod_pr_debug!("DriverObject new kernel object id={}\n", gem.id);
+ Ok(ObjectRef::new(gem.into_ref()))
+}
+
+/// Create a new user-owned GEM object with the given flags.
+pub(crate) fn new_object(
+ dev: &AsahiDevice,
+ size: usize,
+ flags: u32,
+ vm_id: Option<u64>,
+) -> Result<ObjectRef> {
+ let mut gem = shmem::Object::<DriverObject>::new(dev, size)?;
+ gem.kernel = false;
+ gem.flags = flags;
+ gem.vm_id = vm_id;
+
+ gem.set_exportable(vm_id.is_none());
+ gem.set_wc(flags & bindings::ASAHI_GEM_WRITEBACK == 0);
+
+ mod_pr_debug!(
+ "DriverObject new user object: vm_id={:?} id={}\n",
+ vm_id,
+ gem.id
+ );
+ Ok(ObjectRef::new(gem.into_ref()))
+}
+
+/// Look up a GEM object handle for a `File` and return an `ObjectRef` for it.
+pub(crate) fn lookup_handle(file: &DrmFile, handle: u32) -> Result<ObjectRef> {
+ Ok(ObjectRef::new(shmem::Object::lookup_handle(file, handle)?))
+}
+
+impl gem::BaseDriverObject<Object> for DriverObject {
+ /// Callback to create the inner data of a GEM object
+ fn new(_dev: &AsahiDevice, _size: usize) -> Result<DriverObject> {
+ let id = GEM_ID.fetch_add(1, Ordering::Relaxed);
+ mod_pr_debug!("DriverObject::new id={}\n", id);
+ Ok(DriverObject {
+ kernel: false,
+ flags: 0,
+ vm_id: None,
+ mappings: Mutex::new(Vec::new()),
+ id,
+ })
+ }
+
+ /// Callback to drop all mappings for a GEM object owned by a given `File`
+ fn close(obj: &Object, file: &DrmFile) {
+ mod_pr_debug!("DriverObject::close vm_id={:?} id={}\n", obj.vm_id, obj.id);
+ obj.drop_file_mappings(file.file_id());
+ }
+}
+
+impl Drop for DriverObject {
+ fn drop(&mut self) {
+ mod_pr_debug!("DriverObject::drop vm_id={:?} id={}\n", self.vm_id, self.id);
+ }
+}
+
+impl shmem::DriverObject for DriverObject {
+ type Driver = crate::driver::AsahiDriver;
+}
+
+impl rtkit::Buffer for ObjectRef {
+ fn iova(&self) -> Result<usize> {
+ self.iova(0).ok_or(EIO)
+ }
+ fn buf(&mut self) -> Result<&mut [u8]> {
+ let vmap = self.vmap.as_mut().ok_or(ENOMEM)?;
+ Ok(vmap.as_mut_slice())
+ }
+}
diff --git a/drivers/gpu/drm/asahi/gpu.rs b/drivers/gpu/drm/asahi/gpu.rs
new file mode 100644
index 000000000000..1384b52d11a2
--- /dev/null
+++ b/drivers/gpu/drm/asahi/gpu.rs
@@ -0,0 +1,1088 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Top-level GPU manager
+//!
+//! This module is the root of all GPU firmware management for a given driver instance. It is
+//! responsible for initialization, owning the top-level managers (events, UAT, etc.), and
+//! communicating with the raw RtKit endpoints to send and receive messages to/from the GPU
+//! firmware.
+//!
+//! It is also the point where diverging driver firmware/GPU variants (using the versions macro)
+//! are unified, so that the top level of the driver itself (in `driver`) does not have to concern
+//! itself with version dependence.
+
+use core::any::Any;
+use core::sync::atomic::{AtomicBool, AtomicU64, Ordering};
+use core::time::Duration;
+
+use kernel::{
+ delay::coarse_sleep,
+ error::code::*,
+ macros::versions,
+ prelude::*,
+ soc::apple::rtkit,
+ sync::{smutex::Mutex, Arc, Guard, UniqueArc},
+ time,
+ types::ForeignOwnable,
+};
+
+use crate::alloc::Allocator;
+use crate::box_in_place;
+use crate::debug::*;
+use crate::driver::AsahiDevice;
+use crate::fw::channels::PipeType;
+use crate::fw::types::U64;
+use crate::{
+ alloc, buffer, channel, event, fw, gem, hw, initdata, mem, mmu, queue, regs, workqueue,
+};
+
+const DEBUG_CLASS: DebugFlags = DebugFlags::Gpu;
+
+/// Firmware endpoint for init & incoming notifications.
+const EP_FIRMWARE: u8 = 0x20;
+
+/// Doorbell endpoint for work/message submissions.
+const EP_DOORBELL: u8 = 0x21;
+
+/// Initialize the GPU firmware.
+const MSG_INIT: u64 = 0x81 << 48;
+const INIT_DATA_MASK: u64 = (1 << 44) - 1;
+
+/// TX channel doorbell.
+const MSG_TX_DOORBELL: u64 = 0x83 << 48;
+/// Firmware control channel doorbell.
+const MSG_FWCTL: u64 = 0x84 << 48;
+// /// Halt the firmware (?).
+// const MSG_HALT: u64 = 0x85 << 48;
+
+/// Receive channel doorbell notification.
+const MSG_RX_DOORBELL: u64 = 0x42 << 48;
+
+/// Doorbell number for firmware kicks/wakeups.
+const DOORBELL_KICKFW: u64 = 0x10;
+/// Doorbell number for device control channel kicks.
+const DOORBELL_DEVCTRL: u64 = 0x11;
+
+// Upper kernel half VA address ranges.
+/// Private (cached) firmware structure VA range base.
+const IOVA_KERN_PRIV_BASE: u64 = 0xffffffa000000000;
+/// Private (cached) firmware structure VA range top.
+const IOVA_KERN_PRIV_TOP: u64 = 0xffffffa7ffffffff;
+/// Shared (uncached) firmware structure VA range base.
+const IOVA_KERN_SHARED_BASE: u64 = 0xffffffa800000000;
+/// Shared (uncached) firmware structure VA range top.
+const IOVA_KERN_SHARED_TOP: u64 = 0xffffffa9ffffffff;
+/// Shared (uncached) read-only firmware structure VA range base.
+const IOVA_KERN_SHARED_RO_BASE: u64 = 0xffffffaa00000000;
+/// Shared (uncached) read-only firmware structure VA range top.
+const IOVA_KERN_SHARED_RO_TOP: u64 = 0xffffffabffffffff;
+/// GPU/FW shared structure VA range base.
+const IOVA_KERN_GPU_BASE: u64 = 0xffffffaf00000000;
+/// GPU/FW shared structure VA range top.
+const IOVA_KERN_GPU_TOP: u64 = 0xffffffafffffffff;
+
+/// Timeout for entering the halt state after a fault or request.
+const HALT_ENTER_TIMEOUT_MS: u64 = 100;
+
+/// Global allocators used for kernel-half structures.
+pub(crate) struct KernelAllocators {
+ pub(crate) private: alloc::DefaultAllocator,
+ pub(crate) shared: alloc::DefaultAllocator,
+ pub(crate) shared_ro: alloc::DefaultAllocator,
+ pub(crate) gpu: alloc::DefaultAllocator,
+}
+
+/// Receive (GPU->driver) ring buffer channels.
+#[versions(AGX)]
+struct RxChannels {
+ event: channel::EventChannel,
+ fw_log: channel::FwLogChannel,
+ ktrace: channel::KTraceChannel,
+ stats: channel::StatsChannel::ver,
+}
+
+/// GPU work submission pipe channels (driver->GPU).
+#[versions(AGX)]
+struct PipeChannels {
+ pub(crate) vtx: Vec<Mutex<channel::PipeChannel::ver>>,
+ pub(crate) frag: Vec<Mutex<channel::PipeChannel::ver>>,
+ pub(crate) comp: Vec<Mutex<channel::PipeChannel::ver>>,
+}
+
+/// Misc command transmit (driver->GPU) channels.
+#[versions(AGX)]
+struct TxChannels {
+ pub(crate) device_control: channel::DeviceControlChannel::ver,
+}
+
+/// Number of work submission pipes per type, one for each priority level.
+const NUM_PIPES: usize = 4;
+
+/// A generic monotonically incrementing ID used to uniquely identify object instances within the
+/// driver.
+pub(crate) struct ID(AtomicU64);
+
+impl ID {
+ /// Create a new ID counter with a given value.
+ fn new(val: u64) -> ID {
+ ID(AtomicU64::new(val))
+ }
+
+ /// Fetch the next unique ID.
+ pub(crate) fn next(&self) -> u64 {
+ self.0.fetch_add(1, Ordering::Relaxed)
+ }
+}
+
+impl Default for ID {
+ /// IDs default to starting at 2, as 0/1 are considered reserved for the system.
+ fn default() -> Self {
+ Self::new(2)
+ }
+}
+
+/// A guard representing one active submission on the GPU. When dropped, decrements the active
+/// submission count.
+pub(crate) struct OpGuard(Arc<dyn GpuManagerPriv>);
+
+impl Drop for OpGuard {
+ fn drop(&mut self) {
+ self.0.end_op();
+ }
+}
+
+/// Set of global sequence IDs used in the driver.
+#[derive(Default)]
+pub(crate) struct SequenceIDs {
+ /// `File` instance ID.
+ pub(crate) file: ID,
+ /// `Vm` instance ID.
+ pub(crate) vm: ID,
+ /// Submission instance ID.
+ pub(crate) submission: ID,
+ /// `Queue` instance ID.
+ pub(crate) queue: ID,
+}
+
+/// Top-level GPU manager that owns all the global state relevant to the driver instance.
+#[versions(AGX)]
+pub(crate) struct GpuManager {
+ dev: AsahiDevice,
+ cfg: &'static hw::HwConfig,
+ dyncfg: Box<hw::DynConfig>,
+ pub(crate) initdata: Box<fw::types::GpuObject<fw::initdata::InitData::ver>>,
+ uat: Box<mmu::Uat>,
+ crashed: AtomicBool,
+ alloc: Mutex<KernelAllocators>,
+ io_mappings: Vec<mmu::Mapping>,
+ rtkit: Mutex<Option<Box<rtkit::RtKit<GpuManager::ver>>>>,
+ rx_channels: Mutex<Box<RxChannels::ver>>,
+ tx_channels: Mutex<Box<TxChannels::ver>>,
+ fwctl_channel: Mutex<Box<channel::FwCtlChannel>>,
+ pipes: PipeChannels::ver,
+ event_manager: Arc<event::EventManager>,
+ buffer_mgr: buffer::BufferManager,
+ ids: SequenceIDs,
+}
+
+/// Trait used to abstract the firmware/GPU-dependent variants of the GpuManager.
+pub(crate) trait GpuManager: Send + Sync {
+ /// Cast as an Any type.
+ fn as_any(&self) -> &dyn Any;
+ /// Cast Arc<Self> as an Any type.
+ fn arc_as_any(self: Arc<Self>) -> Arc<dyn Any + Sync + Send>;
+ /// Initialize the GPU.
+ fn init(&self) -> Result;
+ /// Update the GPU globals from global info
+ ///
+ /// TODO: Unclear what can and cannot be updated like this.
+ fn update_globals(&self);
+ /// Get a reference to the KernelAllocators.
+ fn alloc(&self) -> Guard<'_, Mutex<KernelAllocators>>;
+ /// Create a new `Vm` given a unique `File` ID.
+ fn new_vm(&self, file_id: u64) -> Result<mmu::Vm>;
+ /// Bind a `Vm` to an available slot and return the `VmBind`.
+ fn bind_vm(&self, vm: &mmu::Vm) -> Result<mmu::VmBind>;
+ /// Create a new user command queue.
+ fn new_queue(
+ &self,
+ vm: mmu::Vm,
+ ualloc: Arc<Mutex<alloc::DefaultAllocator>>,
+ ualloc_priv: Arc<Mutex<alloc::DefaultAllocator>>,
+ priority: u32,
+ caps: u32,
+ ) -> Result<Box<dyn queue::Queue>>;
+ /// Return a reference to the global `SequenceIDs` instance.
+ fn ids(&self) -> &SequenceIDs;
+ /// Kick the firmware (wake it up if asleep).
+ ///
+ /// This should be useful to reduce latency on work submission, so we can ask the firmware to
+ /// wake up while we do some preparatory work for the work submission.
+ fn kick_firmware(&self) -> Result;
+ /// Invalidate a GPU scheduler context. Must be called before the relevant structures are freed.
+ fn invalidate_context(
+ &self,
+ context: &fw::types::GpuObject<fw::workqueue::GpuContextData>,
+ ) -> Result;
+ /// Flush the entire firmware cache.
+ ///
+ /// TODO: Does this actually work?
+ fn flush_fw_cache(&self) -> Result;
+ /// Handle a GPU work timeout event.
+ fn handle_timeout(&self, counter: u32, event_slot: u32);
+ /// Handle a GPU fault event.
+ fn handle_fault(&self);
+ /// Wait for the GPU to become idle and power off.
+ fn wait_for_poweroff(&self, timeout: usize) -> Result;
+ /// Send a firmware control command (secure cache flush).
+ fn fwctl(&self, msg: fw::channels::FwCtlMsg) -> Result;
+ /// Get the static GPU configuration for this SoC.
+ fn get_cfg(&self) -> &'static hw::HwConfig;
+ /// Get the dynamic GPU configuration for this SoC.
+ fn get_dyncfg(&self) -> &hw::DynConfig;
+}
+
+/// Private generic trait for functions that don't need to escape this module.
+trait GpuManagerPriv {
+ /// Decrement the pending submission counter.
+ fn end_op(&self);
+}
+
+#[versions(AGX)]
+#[vtable]
+impl rtkit::Operations for GpuManager::ver {
+ type Data = Arc<GpuManager::ver>;
+ type Buffer = gem::ObjectRef;
+
+ fn recv_message(data: <Self::Data as ForeignOwnable>::Borrowed<'_>, ep: u8, msg: u64) {
+ let dev = &data.dev;
+ //dev_info!(dev, "RtKit message: {:#x}:{:#x}\n", ep, msg);
+
+ if ep != EP_FIRMWARE || msg != MSG_RX_DOORBELL {
+ dev_err!(dev, "Unknown message: {:#x}:{:#x}\n", ep, msg);
+ return;
+ }
+
+ let mut ch = data.rx_channels.lock();
+
+ ch.fw_log.poll();
+ ch.ktrace.poll();
+ ch.stats.poll();
+ ch.event.poll();
+ }
+
+ fn crashed(data: <Self::Data as ForeignOwnable>::Borrowed<'_>) {
+ let dev = &data.dev;
+ dev_err!(dev, "GPU firmware crashed, failing all jobs\n");
+
+ data.crashed.store(true, Ordering::Relaxed);
+ data.event_manager.fail_all(workqueue::WorkError::NoDevice);
+ }
+
+ fn shmem_alloc(
+ data: <Self::Data as ForeignOwnable>::Borrowed<'_>,
+ size: usize,
+ ) -> Result<Self::Buffer> {
+ let dev = &data.dev;
+ mod_dev_dbg!(dev, "shmem_alloc() {:#x} bytes\n", size);
+
+ let mut obj = gem::new_kernel_object(dev, size)?;
+ obj.vmap()?;
+ let iova = obj.map_into(data.uat.kernel_vm())?;
+ mod_dev_dbg!(dev, "shmem_alloc() -> VA {:#x}\n", iova);
+ Ok(obj)
+ }
+}
+
+#[versions(AGX)]
+impl GpuManager::ver {
+ /// Create a new GpuManager of this version/GPU combination.
+ #[inline(never)]
+ pub(crate) fn new(
+ dev: &AsahiDevice,
+ res: &regs::Resources,
+ cfg: &'static hw::HwConfig,
+ ) -> Result<Arc<GpuManager::ver>> {
+ let uat = Self::make_uat(dev, cfg)?;
+ let dyncfg = Self::make_dyncfg(dev, res, cfg, &uat)?;
+
+ let mut alloc = KernelAllocators {
+ private: alloc::DefaultAllocator::new(
+ dev,
+ uat.kernel_vm(),
+ IOVA_KERN_PRIV_BASE,
+ IOVA_KERN_PRIV_TOP,
+ 0x80,
+ mmu::PROT_FW_PRIV_RW,
+ 1024 * 1024,
+ true,
+ fmt!("Kernel Private"),
+ true,
+ )?,
+ shared: alloc::DefaultAllocator::new(
+ dev,
+ uat.kernel_vm(),
+ IOVA_KERN_SHARED_BASE,
+ IOVA_KERN_SHARED_TOP,
+ 0x80,
+ mmu::PROT_FW_SHARED_RW,
+ 1024 * 1024,
+ true,
+ fmt!("Kernel Shared"),
+ false,
+ )?,
+ shared_ro: alloc::DefaultAllocator::new(
+ dev,
+ uat.kernel_vm(),
+ IOVA_KERN_SHARED_RO_BASE,
+ IOVA_KERN_SHARED_RO_TOP,
+ 0x80,
+ mmu::PROT_FW_SHARED_RO,
+ 64 * 1024,
+ true,
+ fmt!("Kernel RO Shared"),
+ false,
+ )?,
+ gpu: alloc::DefaultAllocator::new(
+ dev,
+ uat.kernel_vm(),
+ IOVA_KERN_GPU_BASE,
+ IOVA_KERN_GPU_TOP,
+ 0x80,
+ mmu::PROT_GPU_FW_SHARED_RW,
+ 64 * 1024,
+ true,
+ fmt!("Kernel GPU Shared"),
+ false,
+ )?,
+ };
+
+ let event_manager = Self::make_event_manager(&mut alloc)?;
+ let initdata = Self::make_initdata(cfg, &dyncfg, &mut alloc)?;
+ let mut mgr = Self::make_mgr(dev, cfg, dyncfg, uat, alloc, event_manager, initdata)?;
+
+ {
+ let fwctl = mgr.fwctl_channel.lock();
+ let p_fwctl = fwctl.to_raw();
+ core::mem::drop(fwctl);
+
+ mgr.initdata.fw_status.with_mut(|raw, _inner| {
+ raw.fwctl_channel = p_fwctl;
+ });
+ }
+
+ {
+ let txc = mgr.tx_channels.lock();
+ let p_device_control = txc.device_control.to_raw();
+ core::mem::drop(txc);
+
+ let rxc = mgr.rx_channels.lock();
+ let p_event = rxc.event.to_raw();
+ let p_fw_log = rxc.fw_log.to_raw();
+ let p_ktrace = rxc.ktrace.to_raw();
+ let p_stats = rxc.stats.to_raw();
+ let p_fwlog_buf = rxc.fw_log.get_buf();
+ core::mem::drop(rxc);
+
+ mgr.initdata.runtime_pointers.with_mut(|raw, _inner| {
+ raw.device_control = p_device_control;
+ raw.event = p_event;
+ raw.fw_log = p_fw_log;
+ raw.ktrace = p_ktrace;
+ raw.stats = p_stats;
+ raw.fwlog_buf = Some(p_fwlog_buf);
+ });
+ }
+
+ let mut p_pipes: Vec<fw::initdata::raw::PipeChannels::ver> = Vec::new();
+
+ for ((v, f), c) in mgr
+ .pipes
+ .vtx
+ .iter()
+ .zip(&mgr.pipes.frag)
+ .zip(&mgr.pipes.comp)
+ {
+ p_pipes.try_push(fw::initdata::raw::PipeChannels::ver {
+ vtx: v.lock().to_raw(),
+ frag: f.lock().to_raw(),
+ comp: c.lock().to_raw(),
+ })?;
+ }
+
+ mgr.initdata.runtime_pointers.with_mut(|raw, _inner| {
+ for (i, p) in p_pipes.into_iter().enumerate() {
+ raw.pipes[i].vtx = p.vtx;
+ raw.pipes[i].frag = p.frag;
+ raw.pipes[i].comp = p.comp;
+ }
+ });
+
+ for (i, map) in cfg.io_mappings.iter().enumerate() {
+ if let Some(map) = map.as_ref() {
+ mgr.iomap(i, map)?;
+ }
+ }
+
+ let mgr = Arc::from(mgr);
+
+ let rtkit = Box::try_new(rtkit::RtKit::<GpuManager::ver>::new(
+ dev,
+ None,
+ 0,
+ mgr.clone(),
+ )?)?;
+
+ *mgr.rtkit.lock() = Some(rtkit);
+
+ {
+ let mut rxc = mgr.rx_channels.lock();
+ rxc.event.set_manager(mgr.clone());
+ }
+
+ Ok(mgr)
+ }
+
+ /// Build the entire GPU InitData structure tree and return it as a boxed GpuObject.
+ fn make_initdata(
+ cfg: &'static hw::HwConfig,
+ dyncfg: &hw::DynConfig,
+ alloc: &mut KernelAllocators,
+ ) -> Result<Box<fw::types::GpuObject<fw::initdata::InitData::ver>>> {
+ let mut builder = initdata::InitDataBuilder::ver::new(alloc, cfg, dyncfg);
+ builder.build()
+ }
+
+ /// Create a fresh boxed Uat instance.
+ ///
+ /// Force disable inlining to avoid blowing up the stack.
+ #[inline(never)]
+ fn make_uat(dev: &AsahiDevice, cfg: &'static hw::HwConfig) -> Result<Box<mmu::Uat>> {
+ Ok(Box::try_new(mmu::Uat::new(dev, cfg)?)?)
+ }
+
+ /// Actually create the final GpuManager instance, as a UniqueArc.
+ ///
+ /// Force disable inlining to avoid blowing up the stack.
+ #[inline(never)]
+ fn make_mgr(
+ dev: &AsahiDevice,
+ cfg: &'static hw::HwConfig,
+ dyncfg: Box<hw::DynConfig>,
+ uat: Box<mmu::Uat>,
+ mut alloc: KernelAllocators,
+ event_manager: Arc<event::EventManager>,
+ initdata: Box<fw::types::GpuObject<fw::initdata::InitData::ver>>,
+ ) -> Result<UniqueArc<GpuManager::ver>> {
+ let mut pipes = PipeChannels::ver {
+ vtx: Vec::new(),
+ frag: Vec::new(),
+ comp: Vec::new(),
+ };
+
+ for _i in 0..=NUM_PIPES - 1 {
+ pipes
+ .vtx
+ .try_push(Mutex::new(channel::PipeChannel::ver::new(dev, &mut alloc)?))?;
+ pipes
+ .frag
+ .try_push(Mutex::new(channel::PipeChannel::ver::new(dev, &mut alloc)?))?;
+ pipes
+ .comp
+ .try_push(Mutex::new(channel::PipeChannel::ver::new(dev, &mut alloc)?))?;
+ }
+
+ UniqueArc::try_new(GpuManager::ver {
+ dev: dev.clone(),
+ cfg,
+ dyncfg,
+ initdata,
+ uat,
+ io_mappings: Vec::new(),
+ rtkit: Mutex::new(None),
+ crashed: AtomicBool::new(false),
+ rx_channels: Mutex::new(box_in_place!(RxChannels::ver {
+ event: channel::EventChannel::new(dev, &mut alloc, event_manager.clone())?,
+ fw_log: channel::FwLogChannel::new(dev, &mut alloc)?,
+ ktrace: channel::KTraceChannel::new(dev, &mut alloc)?,
+ stats: channel::StatsChannel::ver::new(dev, &mut alloc)?,
+ })?),
+ tx_channels: Mutex::new(Box::try_new(TxChannels::ver {
+ device_control: channel::DeviceControlChannel::ver::new(dev, &mut alloc)?,
+ })?),
+ fwctl_channel: Mutex::new(Box::try_new(channel::FwCtlChannel::new(dev, &mut alloc)?)?),
+ pipes,
+ event_manager,
+ buffer_mgr: buffer::BufferManager::new()?,
+ alloc: Mutex::new(alloc),
+ ids: Default::default(),
+ })
+ }
+
+ /// Fetch and validate the GPU dynamic configuration from the device tree and hardware.
+ ///
+ /// Force disable inlining to avoid blowing up the stack.
+ #[inline(never)]
+ fn make_dyncfg(
+ dev: &AsahiDevice,
+ res: &regs::Resources,
+ cfg: &'static hw::HwConfig,
+ uat: &mmu::Uat,
+ ) -> Result<Box<hw::DynConfig>> {
+ let gpu_id = res.get_gpu_id()?;
+
+ dev_info!(dev, "GPU Information:\n");
+ dev_info!(
+ dev,
+ " Type: {:?}{:?}\n",
+ gpu_id.gpu_gen,
+ gpu_id.gpu_variant
+ );
+ dev_info!(dev, " Max dies: {}\n", gpu_id.max_dies);
+ dev_info!(dev, " Clusters: {}\n", gpu_id.num_clusters);
+ dev_info!(
+ dev,
+ " Cores: {} ({})\n",
+ gpu_id.num_cores,
+ gpu_id.num_cores * gpu_id.num_clusters
+ );
+ dev_info!(
+ dev,
+ " Frags: {} ({})\n",
+ gpu_id.num_frags,
+ gpu_id.num_frags * gpu_id.num_clusters
+ );
+ dev_info!(
+ dev,
+ " GPs: {} ({})\n",
+ gpu_id.num_gps,
+ gpu_id.num_gps * gpu_id.num_clusters
+ );
+ dev_info!(dev, " Core masks: {:#x?}\n", gpu_id.core_masks);
+ dev_info!(dev, " Active cores: {}\n", gpu_id.total_active_cores);
+
+ dev_info!(dev, "Getting configuration from device tree...\n");
+ let pwr_cfg = hw::PwrConfig::load(dev, cfg)?;
+ dev_info!(dev, "Dynamic configuration fetched\n");
+
+ if gpu_id.gpu_gen != cfg.gpu_gen || gpu_id.gpu_variant != cfg.gpu_variant {
+ dev_err!(
+ dev,
+ "GPU type mismatch (expected {:?}{:?}, found {:?}{:?})\n",
+ cfg.gpu_gen,
+ cfg.gpu_variant,
+ gpu_id.gpu_gen,
+ gpu_id.gpu_variant
+ );
+ return Err(EIO);
+ }
+ if gpu_id.num_clusters > cfg.max_num_clusters {
+ dev_err!(
+ dev,
+ "Too many clusters ({} > {})\n",
+ gpu_id.num_clusters,
+ cfg.max_num_clusters
+ );
+ return Err(EIO);
+ }
+ if gpu_id.num_cores > cfg.max_num_cores {
+ dev_err!(
+ dev,
+ "Too many cores ({} > {})\n",
+ gpu_id.num_cores,
+ cfg.max_num_cores
+ );
+ return Err(EIO);
+ }
+ if gpu_id.num_frags > cfg.max_num_frags {
+ dev_err!(
+ dev,
+ "Too many frags ({} > {})\n",
+ gpu_id.num_frags,
+ cfg.max_num_frags
+ );
+ return Err(EIO);
+ }
+ if gpu_id.num_gps > cfg.max_num_gps {
+ dev_err!(
+ dev,
+ "Too many GPs ({} > {})\n",
+ gpu_id.num_gps,
+ cfg.max_num_gps
+ );
+ return Err(EIO);
+ }
+
+ Ok(Box::try_new(hw::DynConfig {
+ pwr: pwr_cfg,
+ uat_ttb_base: uat.ttb_base(),
+ id: gpu_id,
+ })?)
+ }
+
+ /// Create the global GPU event manager, and return an `Arc<>` to it.
+ fn make_event_manager(alloc: &mut KernelAllocators) -> Result<Arc<event::EventManager>> {
+ Arc::try_new(event::EventManager::new(alloc)?)
+ }
+
+ /// Create a new MMIO mapping and add it to the mappings list in initdata at the specified
+ /// index.
+ fn iomap(&mut self, index: usize, map: &hw::IOMapping) -> Result {
+ let off = map.base & mmu::UAT_PGMSK;
+ let base = map.base - off;
+ let end = (map.base + map.size + mmu::UAT_PGMSK) & !mmu::UAT_PGMSK;
+ let mapping = self
+ .uat
+ .kernel_vm()
+ .map_io(base, end - base, map.writable)?;
+
+ self.initdata.runtime_pointers.hwdata_b.with_mut(|raw, _| {
+ raw.io_mappings[index] = fw::initdata::raw::IOMapping {
+ phys_addr: U64(map.base as u64),
+ virt_addr: U64((mapping.iova() + off) as u64),
+ size: map.size as u32,
+ range_size: map.range_size as u32,
+ readwrite: U64(map.writable as u64),
+ };
+ });
+
+ self.io_mappings.try_push(mapping)?;
+ Ok(())
+ }
+
+ /// Mark work associated with currently in-progress event slots as failed, after a fault or
+ /// timeout.
+ fn mark_pending_events(&self, culprit_slot: Option<u32>, error: workqueue::WorkError) {
+ dev_err!(self.dev, " Pending events:\n");
+
+ self.initdata.globals.with(|raw, _inner| {
+ for i in raw.pending_stamps.iter() {
+ let info = i.info.load(Ordering::Relaxed);
+ let wait_value = i.wait_value.load(Ordering::Relaxed);
+
+ if info & 1 != 0 {
+ let slot = info >> 3;
+ let flags = info & 0x7;
+ dev_err!(
+ self.dev,
+ " [{}] flags={} value={:#x}\n",
+ slot,
+ flags,
+ wait_value
+ );
+ let error = if culprit_slot.is_some() && culprit_slot != Some(slot) {
+ workqueue::WorkError::Killed
+ } else {
+ error
+ };
+ self.event_manager.mark_error(slot, wait_value, error);
+ i.info.store(0, Ordering::Relaxed);
+ i.wait_value.store(0, Ordering::Relaxed);
+ }
+ }
+ });
+ }
+
+ /// Fetch the GPU MMU fault information from the hardware registers.
+ fn get_fault_info(&self) -> Option<regs::FaultInfo> {
+ let data = self.dev.data();
+
+ let res = match data.resources() {
+ Some(res) => res,
+ None => {
+ dev_err!(self.dev, " Failed to acquire resources\n");
+ return None;
+ }
+ };
+
+ let info = res.get_fault_info();
+ if info.is_some() {
+ dev_err!(self.dev, " Fault info: {:#x?}\n", info.as_ref().unwrap());
+ }
+ info
+ }
+
+ /// Resume the GPU firmware after it halts (due to a timeout, fault, or request).
+ fn recover(&self) {
+ self.initdata.fw_status.with(|raw, _inner| {
+ let halt_count = raw.flags.halt_count.load(Ordering::Relaxed);
+ let mut halted = raw.flags.halted.load(Ordering::Relaxed);
+ dev_err!(self.dev, " Halt count: {}\n", halt_count);
+ dev_err!(self.dev, " Halted: {}\n", halted);
+
+ if halted == 0 {
+ let timeout = time::ktime_get() + Duration::from_millis(HALT_ENTER_TIMEOUT_MS);
+ while time::ktime_get() < timeout {
+ halted = raw.flags.halted.load(Ordering::Relaxed);
+ if halted != 0 {
+ break;
+ }
+ mem::sync();
+ }
+ halted = raw.flags.halted.load(Ordering::Relaxed);
+ }
+
+ if debug_enabled(DebugFlags::NoGpuRecovery) {
+ dev_crit!(self.dev, " GPU recovery is disabled, wedging forever!\n");
+ } else if halted != 0 {
+ dev_err!(self.dev, " Attempting recovery...\n");
+ raw.flags.halted.store(0, Ordering::SeqCst);
+ raw.flags.resume.store(1, Ordering::SeqCst);
+ } else {
+ dev_err!(self.dev, " Cannot recover.\n");
+ }
+ });
+ }
+
+ /// Return the packed GPU enabled core masks.
+ // Only used for some versions
+ #[allow(dead_code)]
+ pub(crate) fn core_masks_packed(&self) -> &[u32] {
+ self.dyncfg.id.core_masks_packed.as_slice()
+ }
+
+ /// Kick a submission pipe for a submitted job to tell the firmware to start processing it.
+ pub(crate) fn run_job(&self, job: workqueue::JobSubmission::ver<'_>) -> Result {
+ mod_dev_dbg!(self.dev, "GPU: run_job\n");
+
+ let pipe_type = job.pipe_type();
+ mod_dev_dbg!(self.dev, "GPU: run_job: pipe_type={:?}\n", pipe_type);
+
+ let pipes = match pipe_type {
+ PipeType::Vertex => &self.pipes.vtx,
+ PipeType::Fragment => &self.pipes.frag,
+ PipeType::Compute => &self.pipes.comp,
+ };
+
+ let index: usize = job.priority() as usize;
+ let mut pipe = pipes.get(index).ok_or(EIO)?.lock();
+
+ mod_dev_dbg!(self.dev, "GPU: run_job: run()\n");
+ job.run(&mut pipe);
+ mod_dev_dbg!(self.dev, "GPU: run_job: ring doorbell\n");
+
+ let mut guard = self.rtkit.lock();
+ let rtk = guard.as_mut().unwrap();
+ rtk.send_message(
+ EP_DOORBELL,
+ MSG_TX_DOORBELL | pipe_type as u64 | ((index as u64) << 2),
+ )?;
+ mod_dev_dbg!(self.dev, "GPU: run_job: done\n");
+
+ Ok(())
+ }
+
+ pub(crate) fn is_crashed(&self) -> bool {
+ self.crashed.load(Ordering::Relaxed)
+ }
+
+ pub(crate) fn start_op(self: &Arc<GpuManager::ver>) -> Result<OpGuard> {
+ if self.is_crashed() {
+ return Err(ENODEV);
+ }
+
+ let val = self
+ .initdata
+ .globals
+ .with(|raw, _inner| raw.pending_submissions.fetch_add(1, Ordering::Acquire));
+
+ mod_dev_dbg!(self.dev, "OP start (pending: {})\n", val + 1);
+ self.kick_firmware()?;
+ Ok(OpGuard(self.clone()))
+ }
+}
+
+#[versions(AGX)]
+impl GpuManager for GpuManager::ver {
+ fn as_any(&self) -> &dyn Any {
+ self
+ }
+
+ fn arc_as_any(self: Arc<Self>) -> Arc<dyn Any + Sync + Send> {
+ self as Arc<dyn Any + Sync + Send>
+ }
+
+ fn init(&self) -> Result {
+ self.tx_channels.lock().device_control.send(
+ &fw::channels::DeviceControlMsg::ver::Initialize(Default::default()),
+ );
+
+ let initdata = self.initdata.gpu_va().get();
+ let mut guard = self.rtkit.lock();
+ let rtk = guard.as_mut().unwrap();
+
+ rtk.boot()?;
+ rtk.start_endpoint(EP_FIRMWARE)?;
+ rtk.start_endpoint(EP_DOORBELL)?;
+ rtk.send_message(EP_FIRMWARE, MSG_INIT | (initdata & INIT_DATA_MASK))?;
+ rtk.send_message(EP_DOORBELL, MSG_TX_DOORBELL | DOORBELL_DEVCTRL)?;
+ core::mem::drop(guard);
+
+ self.kick_firmware()?;
+ Ok(())
+ }
+
+ fn update_globals(&self) {
+ let mut timeout: u32 = 2;
+ if debug_enabled(DebugFlags::WaitForPowerOff) {
+ timeout = 0;
+ } else if debug_enabled(DebugFlags::KeepGpuPowered) {
+ timeout = 5000;
+ }
+
+ self.initdata.globals.with(|raw, _inner| {
+ raw.idle_off_delay_ms.store(timeout, Ordering::Relaxed);
+ });
+ }
+
+ fn alloc(&self) -> Guard<'_, Mutex<KernelAllocators>> {
+ let mut guard = self.alloc.lock();
+ let (garbage_count, garbage_bytes) = guard.private.garbage();
+ if garbage_bytes > 1024 * 1024 {
+ mod_dev_dbg!(
+ self.dev,
+ "Collecting kalloc garbage ({} objects, {} bytes)\n",
+ garbage_count,
+ garbage_bytes
+ );
+ if self.flush_fw_cache().is_err() {
+ dev_err!(self.dev, "Failed to flush FW cache\n");
+ } else {
+ guard.private.collect_garbage(garbage_count);
+ }
+ }
+
+ guard
+ }
+
+ fn new_vm(&self, file_id: u64) -> Result<mmu::Vm> {
+ self.uat.new_vm(self.ids.vm.next(), file_id)
+ }
+
+ fn bind_vm(&self, vm: &mmu::Vm) -> Result<mmu::VmBind> {
+ self.uat.bind(vm)
+ }
+
+ fn new_queue(
+ &self,
+ vm: mmu::Vm,
+ ualloc: Arc<Mutex<alloc::DefaultAllocator>>,
+ ualloc_priv: Arc<Mutex<alloc::DefaultAllocator>>,
+ priority: u32,
+ caps: u32,
+ ) -> Result<Box<dyn queue::Queue>> {
+ let mut kalloc = self.alloc();
+ let id = self.ids.queue.next();
+ Ok(Box::try_new(queue::Queue::ver::new(
+ &self.dev,
+ vm,
+ &mut kalloc,
+ ualloc,
+ ualloc_priv,
+ self.event_manager.clone(),
+ &self.buffer_mgr,
+ id,
+ priority,
+ caps,
+ )?)?)
+ }
+
+ fn kick_firmware(&self) -> Result {
+ if self.is_crashed() {
+ return Err(ENODEV);
+ }
+
+ let mut guard = self.rtkit.lock();
+ let rtk = guard.as_mut().unwrap();
+ rtk.send_message(EP_DOORBELL, MSG_TX_DOORBELL | DOORBELL_KICKFW)?;
+
+ Ok(())
+ }
+
+ fn invalidate_context(
+ &self,
+ context: &fw::types::GpuObject<fw::workqueue::GpuContextData>,
+ ) -> Result {
+ mod_dev_dbg!(
+ self.dev,
+ "Invalidating GPU context @ {:?}\n",
+ context.weak_pointer()
+ );
+
+ if self.is_crashed() {
+ return Err(ENODEV);
+ }
+
+ let mut guard = self.alloc.lock();
+ let (garbage_count, _) = guard.private.garbage();
+
+ let dc = context.with(
+ |raw, _inner| fw::channels::DeviceControlMsg::ver::DestroyContext {
+ unk_4: 0,
+ ctx_23: raw.unk_23,
+ __pad0: Default::default(),
+ unk_c: 0,
+ unk_10: 0,
+ ctx_0: raw.unk_0,
+ ctx_1: raw.unk_1,
+ ctx_4: raw.unk_4,
+ __pad1: Default::default(),
+ unk_18: 0,
+ gpu_context: Some(context.weak_pointer()),
+ __pad2: Default::default(),
+ },
+ );
+
+ mod_dev_dbg!(self.dev, "Context invalidation command: {:?}\n", &dc);
+
+ let mut txch = self.tx_channels.lock();
+
+ let token = txch.device_control.send(&dc);
+
+ {
+ let mut guard = self.rtkit.lock();
+ let rtk = guard.as_mut().unwrap();
+ rtk.send_message(EP_DOORBELL, MSG_TX_DOORBELL | DOORBELL_DEVCTRL)?;
+ }
+
+ txch.device_control.wait_for(token)?;
+
+ mod_dev_dbg!(
+ self.dev,
+ "GPU context invalidated: {:?}\n",
+ context.weak_pointer()
+ );
+
+ // The invalidation does a cache flush, so it is okay to collect garbage
+ guard.private.collect_garbage(garbage_count);
+
+ Ok(())
+ }
+
+ fn flush_fw_cache(&self) -> Result {
+ mod_dev_dbg!(self.dev, "Flushing coprocessor data cache\n");
+
+ if self.is_crashed() {
+ return Err(ENODEV);
+ }
+
+ // ctx_0 == 0xff or ctx_1 == 0xff cause no effect on context,
+ // but this command does a full cache flush too, so abuse it
+ // for that.
+
+ let dc = fw::channels::DeviceControlMsg::ver::DestroyContext {
+ unk_4: 0,
+ ctx_23: 0,
+ __pad0: Default::default(),
+ unk_c: 0,
+ unk_10: 0,
+ ctx_0: 0xff,
+ ctx_1: 0xff,
+ ctx_4: 0,
+ __pad1: Default::default(),
+ unk_18: 0,
+ gpu_context: None,
+ __pad2: Default::default(),
+ };
+
+ let mut txch = self.tx_channels.lock();
+
+ let token = txch.device_control.send(&dc);
+ {
+ let mut guard = self.rtkit.lock();
+ let rtk = guard.as_mut().unwrap();
+ rtk.send_message(EP_DOORBELL, MSG_TX_DOORBELL | DOORBELL_DEVCTRL)?;
+ }
+
+ txch.device_control.wait_for(token)?;
+ Ok(())
+ }
+
+ fn ids(&self) -> &SequenceIDs {
+ &self.ids
+ }
+
+ fn handle_timeout(&self, counter: u32, event_slot: u32) {
+ dev_err!(self.dev, " (\\________/) \n");
+ dev_err!(self.dev, " | | \n");
+ dev_err!(self.dev, "'.| \\ , / |.'\n");
+ dev_err!(self.dev, "--| / (( \\ |--\n");
+ dev_err!(self.dev, ".'| _-_- |'.\n");
+ dev_err!(self.dev, " |________| \n");
+ dev_err!(self.dev, "** GPU timeout nya~!!!!! **\n");
+ dev_err!(self.dev, " Event slot: {}\n", event_slot);
+ dev_err!(self.dev, " Timeout count: {}\n", counter);
+
+ // If we have fault info, consider it a fault.
+ let error = match self.get_fault_info() {
+ Some(info) => workqueue::WorkError::Fault(info),
+ None => workqueue::WorkError::Timeout,
+ };
+ self.mark_pending_events(Some(event_slot), error);
+ self.recover();
+ }
+
+ fn handle_fault(&self) {
+ dev_err!(self.dev, " (\\________/) \n");
+ dev_err!(self.dev, " | | \n");
+ dev_err!(self.dev, "'.| \\ , / |.'\n");
+ dev_err!(self.dev, "--| / (( \\ |--\n");
+ dev_err!(self.dev, ".'| _-_- |'.\n");
+ dev_err!(self.dev, " |________| \n");
+ dev_err!(self.dev, "GPU fault nya~!!!!!\n");
+ let error = match self.get_fault_info() {
+ Some(info) => workqueue::WorkError::Fault(info),
+ None => workqueue::WorkError::Unknown,
+ };
+ self.mark_pending_events(None, error);
+ self.recover();
+ }
+
+ fn wait_for_poweroff(&self, timeout: usize) -> Result {
+ self.initdata.runtime_pointers.hwdata_a.with(|raw, _inner| {
+ for _i in 0..timeout {
+ if raw.pwr_status.load(Ordering::Relaxed) == 4 {
+ return Ok(());
+ }
+ coarse_sleep(Duration::from_millis(1));
+ }
+ Err(ETIMEDOUT)
+ })
+ }
+
+ fn fwctl(&self, msg: fw::channels::FwCtlMsg) -> Result {
+ if self.is_crashed() {
+ return Err(ENODEV);
+ }
+
+ let mut fwctl = self.fwctl_channel.lock();
+ let token = fwctl.send(&msg);
+ {
+ let mut guard = self.rtkit.lock();
+ let rtk = guard.as_mut().unwrap();
+ rtk.send_message(EP_DOORBELL, MSG_FWCTL)?;
+ }
+ fwctl.wait_for(token)?;
+ Ok(())
+ }
+
+ fn get_cfg(&self) -> &'static hw::HwConfig {
+ self.cfg
+ }
+
+ fn get_dyncfg(&self) -> &hw::DynConfig {
+ &self.dyncfg
+ }
+}
+
+#[versions(AGX)]
+impl GpuManagerPriv for GpuManager::ver {
+ fn end_op(&self) {
+ let val = self
+ .initdata
+ .globals
+ .with(|raw, _inner| raw.pending_submissions.fetch_sub(1, Ordering::Release));
+
+ mod_dev_dbg!(self.dev, "OP end (pending: {})\n", val - 1);
+ }
+}
diff --git a/drivers/gpu/drm/asahi/hw/mod.rs b/drivers/gpu/drm/asahi/hw/mod.rs
new file mode 100644
index 000000000000..a92bb70aeae8
--- /dev/null
+++ b/drivers/gpu/drm/asahi/hw/mod.rs
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Per-SoC hardware configuration structures
+//!
+//! This module contains the definitions used to store per-GPU and per-SoC configuration data.
+
+use crate::driver::AsahiDevice;
+use crate::fw::types::*;
+use alloc::vec::Vec;
+use kernel::c_str;
+use kernel::device::RawDevice;
+use kernel::prelude::*;
+
+const MAX_POWERZONES: usize = 5;
+
+pub(crate) mod t600x;
+pub(crate) mod t8103;
+pub(crate) mod t8112;
+
+/// GPU generation enumeration. Note: Part of the UABI.
+#[derive(Debug, PartialEq, Copy, Clone)]
+#[repr(u32)]
+pub(crate) enum GpuGen {
+ G13 = 13,
+ G14 = 14,
+}
+
+/// GPU variant enumeration. Note: Part of the UABI.
+#[derive(Debug, PartialEq, Copy, Clone)]
+#[repr(u32)]
+pub(crate) enum GpuVariant {
+ P = 'P' as u32,
+ G = 'G' as u32,
+ S = 'S' as u32,
+ C = 'C' as u32,
+ D = 'D' as u32,
+}
+
+/// GPU revision enumeration. Note: Part of the UABI.
+#[derive(Debug, PartialEq, Copy, Clone)]
+#[repr(u32)]
+pub(crate) enum GpuRevision {
+ A0 = 0x00,
+ A1 = 0x01,
+ B0 = 0x10,
+ B1 = 0x11,
+ C0 = 0x20,
+ C1 = 0x21,
+}
+
+/// GPU core type enumeration. Note: Part of the firmware ABI.
+#[derive(Debug, Copy, Clone)]
+#[repr(u32)]
+pub(crate) enum GpuCore {
+ // Unknown = 0,
+ // G5P = 1,
+ // G5G = 2,
+ // G9P = 3,
+ // G9G = 4,
+ // G10P = 5,
+ // G11P = 6,
+ // G11M = 7,
+ // G11G = 8,
+ // G12P = 9,
+ // G13P = 10,
+ G13G = 11,
+ G13S = 12,
+ G13C = 13,
+ // G14P = 14,
+ G14G = 15,
+}
+
+/// GPU revision ID. Note: Part of the firmware ABI.
+#[derive(Debug, PartialEq, Copy, Clone)]
+#[repr(u32)]
+pub(crate) enum GpuRevisionID {
+ // Unknown = 0,
+ A0 = 1,
+ A1 = 2,
+ B0 = 3,
+ B1 = 4,
+ C0 = 5,
+ C1 = 6,
+}
+
+/// GPU driver/hardware features, from the UABI.
+pub(crate) mod feat {
+ /// Backwards-compatible features.
+ pub(crate) mod compat {}
+
+ /// Backwards-incompatible features.
+ pub(crate) mod incompat {
+ use kernel::bindings;
+
+ /// Hardware requires Z/S compression to be mandatorily enabled.
+ pub(crate) const MANDATORY_ZS_COMPRESSION: u64 =
+ bindings::drm_asahi_feat_incompat_DRM_ASAHI_FEAT_MANDATORY_ZS_COMPRESSION as u64;
+ }
+}
+
+/// A single performance state of the GPU.
+#[derive(Debug)]
+pub(crate) struct PState {
+ /// Voltage in millivolts, per GPU cluster.
+ pub(crate) volt_mv: Vec<u32>,
+ /// Frequency in hertz.
+ pub(crate) freq_hz: u32,
+ /// Maximum power consumption of the GPU at this pstate, in milliwatts.
+ pub(crate) pwr_mw: u32,
+}
+
+/// A power zone definition (we have no idea what this is but Apple puts them in the DT).
+#[allow(missing_docs)]
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct PowerZone {
+ pub(crate) target: u32,
+ pub(crate) target_offset: u32,
+ pub(crate) filter_tc: u32,
+}
+
+/// An MMIO mapping used by the firmware.
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct IOMapping {
+ /// Base physical address of the mapping.
+ pub(crate) base: usize,
+ /// Size of the mapping.
+ pub(crate) size: usize,
+ /// Range size of the mapping (for arrays?)
+ pub(crate) range_size: usize,
+ /// Whether the mapping should be writable.
+ pub(crate) writable: bool,
+}
+
+impl IOMapping {
+ /// Convenience constructor for a new IOMapping.
+ pub(crate) const fn new(
+ base: usize,
+ size: usize,
+ range_size: usize,
+ writable: bool,
+ ) -> IOMapping {
+ IOMapping {
+ base,
+ size,
+ range_size,
+ writable,
+ }
+ }
+}
+
+/// Unknown HwConfigA fields that vary from SoC to SoC.
+#[allow(missing_docs)]
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct HwConfigA {
+ pub(crate) unk_87c: i32,
+ pub(crate) unk_8cc: u32,
+ pub(crate) unk_e24: u32,
+}
+
+/// Unknown HwConfigB fields that vary from SoC to SoC.
+#[allow(missing_docs)]
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct HwConfigB {
+ pub(crate) unk_4e0: u64,
+ pub(crate) unk_534: u32,
+ pub(crate) unk_ab8: u32,
+ pub(crate) unk_abc: u32,
+ pub(crate) unk_b30: u32,
+}
+
+/// Render command configs that vary from SoC to SoC.
+#[derive(Debug, Copy, Clone)]
+pub(crate) struct HwRenderConfig {
+ /// Vertex/tiling-related configuration register (lsb: disable clustering)
+ pub(crate) tiling_control: u32,
+}
+
+/// Static hardware configuration for a given SoC model.
+#[derive(Debug)]
+pub(crate) struct HwConfig {
+ /// Chip ID in hex format (e.g. 0x8103 for t8103).
+ pub(crate) chip_id: u32,
+ /// GPU generation.
+ pub(crate) gpu_gen: GpuGen,
+ /// GPU variant type.
+ pub(crate) gpu_variant: GpuVariant,
+ /// GPU core type ID (as known by the firmware).
+ pub(crate) gpu_core: GpuCore,
+ /// Compatible feature bitmask for this GPU.
+ pub(crate) gpu_feat_compat: u64,
+ /// Incompatible feature bitmask for this GPU.
+ pub(crate) gpu_feat_incompat: u64,
+
+ /// Base clock used used for timekeeping.
+ pub(crate) base_clock_hz: u32,
+ /// Output address space for the UAT on this SoC.
+ pub(crate) uat_oas: usize,
+ /// Maximum number of clusters on this SoC.
+ pub(crate) max_num_clusters: u32,
+ /// Maximum number of cores per cluster for this GPU.
+ pub(crate) max_num_cores: u32,
+ /// Maximum number of frags per cluster for this GPU.
+ pub(crate) max_num_frags: u32,
+ /// Maximum number of GPs per cluster for this GPU.
+ pub(crate) max_num_gps: u32,
+
+ /// Required size of the first preemption buffer.
+ pub(crate) preempt1_size: usize,
+ /// Required size of the second preemption buffer.
+ pub(crate) preempt2_size: usize,
+ /// Required size of the third preemption buffer.
+ pub(crate) preempt3_size: usize,
+
+ /// Rendering-relevant configuration.
+ pub(crate) render: HwRenderConfig,
+
+ /// Misc HWDataA field values.
+ pub(crate) da: HwConfigA,
+ /// Misc HWDataB field values.
+ pub(crate) db: HwConfigB,
+ /// HwDataShared1.table.
+ pub(crate) shared1_tab: &'static [i32],
+ /// HwDataShared1.unk_a4.
+ pub(crate) shared1_a4: u32,
+ /// HwDataShared2.table.
+ pub(crate) shared2_tab: &'static [i32],
+ /// HwDataShared2.unk_508.
+ pub(crate) shared2_unk_508: u32,
+ /// Constant related to SRAM voltages.
+ pub(crate) sram_k: F32,
+ /// Unknown per-cluster coefficients 1.
+ pub(crate) unk_coef_a: &'static [&'static [F32]],
+ /// Unknown per-cluster coefficients 2.
+ pub(crate) unk_coef_b: &'static [&'static [F32]],
+ /// Unknown table in Global struct.
+ pub(crate) global_tab: Option<&'static [u8]>,
+
+ /// Temperature sensor list (8 bits per sensor).
+ pub(crate) fast_die0_sensor_mask: u64,
+ /// Temperature sensor list (alternate).
+ pub(crate) fast_die0_sensor_mask_alt: u64,
+ /// Temperature sensor present bitmask.
+ pub(crate) fast_die0_sensor_present: u32,
+ /// Required MMIO mappings for this GPU/firmware.
+ pub(crate) io_mappings: &'static [Option<IOMapping>],
+}
+
+/// Dynamic (fetched from hardware/DT) configuration.
+#[derive(Debug)]
+pub(crate) struct DynConfig {
+ /// Base physical address of the UAT TTB (from DT reserved memory region).
+ pub(crate) uat_ttb_base: u64,
+ /// GPU ID configuration read from hardware.
+ pub(crate) id: GpuIdConfig,
+ /// Power calibration configuration for this specific chip/device.
+ pub(crate) pwr: PwrConfig,
+}
+
+/// Specific GPU ID configuration fetched from SGX MMIO registers.
+#[derive(Debug)]
+pub(crate) struct GpuIdConfig {
+ /// GPU generation (should match static config).
+ pub(crate) gpu_gen: GpuGen,
+ /// GPU variant type (should match static config).
+ pub(crate) gpu_variant: GpuVariant,
+ /// GPU silicon revision.
+ pub(crate) gpu_rev: GpuRevision,
+ /// GPU silicon revision ID (firmware enum).
+ pub(crate) gpu_rev_id: GpuRevisionID,
+ /// Maximum number of dies supported.
+ pub(crate) max_dies: u32,
+ /// Total number of GPU clusters.
+ pub(crate) num_clusters: u32,
+ /// Maximum number of GPU cores per cluster.
+ pub(crate) num_cores: u32,
+ /// Number of frags per cluster.
+ pub(crate) num_frags: u32,
+ /// Number of GPs per cluster.
+ pub(crate) num_gps: u32,
+ /// Total number of active cores for the whole GPU.
+ pub(crate) total_active_cores: u32,
+ /// Mask of active cores per cluster.
+ pub(crate) core_masks: Vec<u32>,
+ /// Packed mask of all active cores.
+ pub(crate) core_masks_packed: Vec<u32>,
+}
+
+/// Configurable GPU power settings from the device tree.
+#[derive(Debug)]
+pub(crate) struct PwrConfig {
+ /// GPU performance state list.
+ pub(crate) perf_states: Vec<PState>,
+ /// GPU power zone list.
+ pub(crate) power_zones: Vec<PowerZone>,
+
+ /// Core leakage coefficient per cluster.
+ pub(crate) core_leak_coef: Vec<F32>,
+ /// SRAM leakage coefficient per cluster.
+ pub(crate) sram_leak_coef: Vec<F32>,
+
+ /// Maximum total power of the GPU in milliwatts.
+ pub(crate) max_power_mw: u32,
+ /// Maximum frequency of the GPU in megahertz.
+ pub(crate) max_freq_mhz: u32,
+
+ /// Minimum performance state to start at.
+ pub(crate) perf_base_pstate: u32,
+ /// Maximum enabled performance state.
+ pub(crate) perf_max_pstate: u32,
+
+ /// Minimum voltage for the SRAM power domain in microvolts.
+ pub(crate) min_sram_microvolt: u32,
+
+ // Most of these fields are just named after Apple ADT property names and we don't fully
+ // understand them. They configure various power-related PID loops and filters.
+ /// Average power filter time constant in milliseconds.
+ pub(crate) avg_power_filter_tc_ms: u32,
+ /// Average power filter PID integral gain?
+ pub(crate) avg_power_ki_only: F32,
+ /// Average power filter PID proportional gain?
+ pub(crate) avg_power_kp: F32,
+ pub(crate) avg_power_min_duty_cycle: u32,
+ /// Average power target filter time constant in periods.
+ pub(crate) avg_power_target_filter_tc: u32,
+ /// "Fast die0" (temperature?) PID integral gain.
+ pub(crate) fast_die0_integral_gain: F32,
+ /// "Fast die0" (temperature?) PID proportional gain.
+ pub(crate) fast_die0_proportional_gain: F32,
+ pub(crate) fast_die0_prop_tgt_delta: u32,
+ pub(crate) fast_die0_release_temp: u32,
+ /// Delay from the fender (?) becoming idle to powerdown
+ pub(crate) fender_idle_off_delay_ms: u32,
+ /// Timeout from firmware early wake to sleep if no work was submitted (?)
+ pub(crate) fw_early_wake_timeout_ms: u32,
+ /// Delay from the GPU becoming idle to powerdown
+ pub(crate) idle_off_delay_ms: u32,
+ /// Percent?
+ pub(crate) perf_boost_ce_step: u32,
+ /// Minimum utilization before performance state is increased in %.
+ pub(crate) perf_boost_min_util: u32,
+ pub(crate) perf_filter_drop_threshold: u32,
+ /// Performance PID filter time constant? (periods?)
+ pub(crate) perf_filter_time_constant: u32,
+ /// Performance PID filter time constant 2? (periods?)
+ pub(crate) perf_filter_time_constant2: u32,
+ /// Performance PID integral gain.
+ pub(crate) perf_integral_gain: F32,
+ /// Performance PID integral gain 2 (?).
+ pub(crate) perf_integral_gain2: F32,
+ pub(crate) perf_integral_min_clamp: u32,
+ /// Performance PID proportional gain.
+ pub(crate) perf_proportional_gain: F32,
+ /// Performance PID proportional gain 2 (?).
+ pub(crate) perf_proportional_gain2: F32,
+ pub(crate) perf_reset_iters: u32,
+ /// Target GPU utilization for the performance controller in %.
+ pub(crate) perf_tgt_utilization: u32,
+ /// Power sampling period in milliseconds.
+ pub(crate) power_sample_period: u32,
+ /// PPM (?) filter time constant in milliseconds.
+ pub(crate) ppm_filter_time_constant_ms: u32,
+ /// PPM (?) filter PID integral gain.
+ pub(crate) ppm_ki: F32,
+ /// PPM (?) filter PID proportional gain.
+ pub(crate) ppm_kp: F32,
+ /// Power consumption filter time constant (periods?)
+ pub(crate) pwr_filter_time_constant: u32,
+ /// Power consumption filter PID integral gain.
+ pub(crate) pwr_integral_gain: F32,
+ pub(crate) pwr_integral_min_clamp: u32,
+ pub(crate) pwr_min_duty_cycle: u32,
+ pub(crate) pwr_proportional_gain: F32,
+}
+
+impl PwrConfig {
+ /// Load the GPU power configuration from the device tree.
+ pub(crate) fn load(dev: &AsahiDevice, cfg: &HwConfig) -> Result<PwrConfig> {
+ let mut perf_states = Vec::new();
+
+ let node = dev.of_node().ok_or(EIO)?;
+ let opps = node
+ .parse_phandle(c_str!("operating-points-v2"), 0)
+ .ok_or(EIO)?;
+
+ let mut max_power_mw: u32 = 0;
+ let mut max_freq_mhz: u32 = 0;
+
+ macro_rules! prop {
+ ($prop:expr, $default:expr) => {{
+ node.get_opt_property(c_str!($prop))
+ .map_err(|e| {
+ dev_err!(dev, "Error reading property {}: {:?}\n", $prop, e);
+ e
+ })?
+ .unwrap_or($default)
+ }};
+ ($prop:expr) => {{
+ node.get_property(c_str!($prop)).map_err(|e| {
+ dev_err!(dev, "Error reading property {}: {:?}\n", $prop, e);
+ e
+ })?
+ }};
+ }
+
+ for opp in opps.children() {
+ let freq_hz: u64 = opp.get_property(c_str!("opp-hz"))?;
+ let mut volt_uv: Vec<u32> = opp.get_property(c_str!("opp-microvolt"))?;
+ let pwr_uw: u32 = opp.get_property(c_str!("opp-microwatt"))?;
+
+ if volt_uv.len() != cfg.max_num_clusters as usize {
+ dev_err!(
+ dev,
+ "Invalid opp-microvolt length (expected {}, got {})\n",
+ cfg.max_num_clusters,
+ volt_uv.len()
+ );
+ return Err(EINVAL);
+ }
+
+ volt_uv.iter_mut().for_each(|a| *a /= 1000);
+ let volt_mv = volt_uv;
+
+ let pwr_mw = pwr_uw / 1000;
+ max_power_mw = max_power_mw.max(pwr_mw);
+
+ let freq_mhz: u32 = (freq_hz / 1_000_000).try_into()?;
+ max_freq_mhz = max_freq_mhz.max(freq_mhz);
+
+ perf_states.try_push(PState {
+ freq_hz: freq_hz.try_into()?,
+ volt_mv,
+ pwr_mw,
+ })?;
+ }
+
+ let pz_data = prop!("apple,power-zones", Vec::new());
+
+ if pz_data.len() > 3 * MAX_POWERZONES || pz_data.len() % 3 != 0 {
+ dev_err!(dev, "Invalid apple,power-zones value\n");
+ return Err(EINVAL);
+ }
+
+ let pz_count = pz_data.len() / 3;
+ let mut power_zones = Vec::new();
+ for i in (0..pz_count).step_by(3) {
+ power_zones.try_push(PowerZone {
+ target: pz_data[i],
+ target_offset: pz_data[i + 1],
+ filter_tc: pz_data[i + 2],
+ })?;
+ }
+
+ let core_leak_coef: Vec<F32> = prop!("apple,core-leak-coef");
+ let sram_leak_coef: Vec<F32> = prop!("apple,sram-leak-coef");
+
+ if core_leak_coef.len() != cfg.max_num_clusters as usize {
+ dev_err!(dev, "Invalid apple,core-leak-coef\n");
+ return Err(EINVAL);
+ }
+ if sram_leak_coef.len() != cfg.max_num_clusters as usize {
+ dev_err!(dev, "Invalid apple,sram_leak_coef\n");
+ return Err(EINVAL);
+ }
+
+ Ok(PwrConfig {
+ core_leak_coef,
+ sram_leak_coef,
+
+ max_power_mw,
+ max_freq_mhz,
+
+ perf_base_pstate: prop!("apple,perf-base-pstate", 1),
+ perf_max_pstate: perf_states.len() as u32 - 1,
+ min_sram_microvolt: prop!("apple,min-sram-microvolt"),
+
+ avg_power_filter_tc_ms: prop!("apple,avg-power-filter-tc-ms"),
+ avg_power_ki_only: prop!("apple,avg-power-ki-only"),
+ avg_power_kp: prop!("apple,avg-power-kp"),
+ avg_power_min_duty_cycle: prop!("apple,avg-power-min-duty-cycle"),
+ avg_power_target_filter_tc: prop!("apple,avg-power-target-filter-tc"),
+ fast_die0_integral_gain: prop!("apple,fast-die0-integral-gain"),
+ fast_die0_proportional_gain: prop!("apple,fast-die0-proportional-gain"),
+ fast_die0_prop_tgt_delta: prop!("apple,fast-die0-prop-tgt-delta", 0),
+ fast_die0_release_temp: prop!("apple,fast-die0-release-temp", 80),
+ fender_idle_off_delay_ms: prop!("apple,fender-idle-off-delay-ms", 40),
+ fw_early_wake_timeout_ms: prop!("apple,fw-early-wake-timeout-ms", 5),
+ idle_off_delay_ms: prop!("apple,idle-off-delay-ms", 2),
+ perf_boost_ce_step: prop!("apple,perf-boost-ce-step", 25),
+ perf_boost_min_util: prop!("apple,perf-boost-min-util", 100),
+ perf_filter_drop_threshold: prop!("apple,perf-filter-drop-threshold"),
+ perf_filter_time_constant2: prop!("apple,perf-filter-time-constant2"),
+ perf_filter_time_constant: prop!("apple,perf-filter-time-constant"),
+ perf_integral_gain2: prop!("apple,perf-integral-gain2"),
+ perf_integral_gain: prop!("apple,perf-integral-gain", f32!(7.8956833)),
+ perf_integral_min_clamp: prop!("apple,perf-integral-min-clamp"),
+ perf_proportional_gain2: prop!("apple,perf-proportional-gain2"),
+ perf_proportional_gain: prop!("apple,perf-proportional-gain", f32!(14.707963)),
+ perf_reset_iters: prop!("apple,perf-reset-iters", 6),
+ perf_tgt_utilization: prop!("apple,perf-tgt-utilization"),
+ power_sample_period: prop!("apple,power-sample-period"),
+ ppm_filter_time_constant_ms: prop!("apple,ppm-filter-time-constant-ms"),
+ ppm_ki: prop!("apple,ppm-ki"),
+ ppm_kp: prop!("apple,ppm-kp"),
+ pwr_filter_time_constant: prop!("apple,pwr-filter-time-constant", 313),
+ pwr_integral_gain: prop!("apple,pwr-integral-gain", f32!(0.0202129)),
+ pwr_integral_min_clamp: prop!("apple,pwr-integral-min-clamp", 0),
+ pwr_min_duty_cycle: prop!("apple,pwr-min-duty-cycle"),
+ pwr_proportional_gain: prop!("apple,pwr-proportional-gain", f32!(5.2831855)),
+
+ perf_states,
+ power_zones,
+ })
+ }
+
+ pub(crate) fn min_frequency_khz(&self) -> u32 {
+ self.perf_states[self.perf_base_pstate as usize].freq_hz / 1000
+ }
+
+ pub(crate) fn max_frequency_khz(&self) -> u32 {
+ self.perf_states[self.perf_max_pstate as usize].freq_hz / 1000
+ }
+}
diff --git a/drivers/gpu/drm/asahi/hw/t600x.rs b/drivers/gpu/drm/asahi/hw/t600x.rs
new file mode 100644
index 000000000000..8a8267a7e18a
--- /dev/null
+++ b/drivers/gpu/drm/asahi/hw/t600x.rs
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Hardware configuration for t600x (M1 Pro/Max/Ultra) platforms.
+
+use crate::f32;
+
+use super::*;
+
+const fn iomaps(mcc_count: usize, has_die1: bool) -> [Option<IOMapping>; 20] {
+ [
+ Some(IOMapping::new(0x404d00000, 0x1c000, 0x1c000, true)), // Fender
+ Some(IOMapping::new(0x20e100000, 0x4000, 0x4000, false)), // AICTimer
+ Some(IOMapping::new(0x28e104000, 0x4000, 0x4000, true)), // AICSWInt
+ Some(IOMapping::new(0x404000000, 0x20000, 0x20000, true)), // RGX
+ None, // UVD
+ None, // unused
+ None, // DisplayUnderrunWA
+ Some(IOMapping::new(0x28e494000, 0x1000, 0x1000, false)), // AnalogTempSensorControllerRegs
+ None, // PMPDoorbell
+ Some(IOMapping::new(0x404d80000, 0x8000, 0x8000, true)), // MetrologySensorRegs
+ Some(IOMapping::new(0x204d61000, 0x1000, 0x1000, true)), // GMGIFAFRegs
+ Some(IOMapping::new(
+ 0x200000000,
+ mcc_count * 0xd8000,
+ 0xd6400,
+ true,
+ )), // MCache registers
+ None, // AICBankedRegisters
+ None, // PMGRScratch
+ Some(IOMapping::new(0x2643c4000, 0x1000, 0x1000, true)), // NIA Special agent idle register die 0
+ if has_die1 {
+ // NIA Special agent idle register die 1
+ Some(IOMapping::new(0x22643c4000, 0x1000, 0x1000, true))
+ } else {
+ None
+ },
+ None, // CRE registers
+ None, // Streaming codec registers
+ Some(IOMapping::new(0x28e3d0000, 0x1000, 0x1000, true)), // ?
+ Some(IOMapping::new(0x28e3c0000, 0x1000, 0x1000, false)), // ?
+ ]
+}
+
+pub(crate) const HWCONFIG_T6002: super::HwConfig = HwConfig {
+ chip_id: 0x6002,
+ gpu_gen: GpuGen::G13,
+ gpu_variant: GpuVariant::D,
+ gpu_core: GpuCore::G13C,
+ gpu_feat_compat: 0,
+ gpu_feat_incompat: feat::incompat::MANDATORY_ZS_COMPRESSION,
+
+ base_clock_hz: 24_000_000,
+ uat_oas: 42,
+ max_num_clusters: 8,
+ max_num_cores: 8,
+ max_num_frags: 8,
+ max_num_gps: 4,
+
+ preempt1_size: 0x540,
+ preempt2_size: 0x280,
+ preempt3_size: 0x20,
+
+ render: HwRenderConfig {
+ tiling_control: 0xa540,
+ },
+
+ da: HwConfigA {
+ unk_87c: 900,
+ unk_8cc: 11000,
+ unk_e24: 125,
+ },
+ db: HwConfigB {
+ unk_4e0: 4,
+ unk_534: 1,
+ unk_ab8: 0x2084,
+ unk_abc: 0x80,
+ unk_b30: 0,
+ },
+ shared1_tab: &[
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ ],
+ shared1_a4: 0xffff,
+ shared2_tab: &[-1, -1, -1, -1, 0x2aa, 0xaaa, -1, -1, 0, 0],
+ shared2_unk_508: 0xcc00001,
+ sram_k: f32!(1.02),
+ unk_coef_a: &[
+ &f32!([9.838]),
+ &f32!([9.819]),
+ &f32!([9.826]),
+ &f32!([9.799]),
+ &f32!([9.799]),
+ &f32!([9.826]),
+ &f32!([9.819]),
+ &f32!([9.838]),
+ ],
+ unk_coef_b: &[
+ &f32!([13.0]),
+ &f32!([13.0]),
+ &f32!([13.0]),
+ &f32!([13.0]),
+ &f32!([13.0]),
+ &f32!([13.0]),
+ &f32!([13.0]),
+ &f32!([13.0]),
+ ],
+ global_tab: Some(&[
+ 0, 1, 2, 1, 1, 90, 75, 1, 1, 1, 2, 90, 75, 1, 1, 1, 1, 90, 75, 1, 1,
+ ]),
+ fast_die0_sensor_mask: 0x8080808080808080,
+ fast_die0_sensor_mask_alt: 0x9090909090909090,
+ fast_die0_sensor_present: 0xff,
+ io_mappings: &iomaps(16, true),
+};
+
+pub(crate) const HWCONFIG_T6001: super::HwConfig = HwConfig {
+ chip_id: 0x6001,
+ gpu_variant: GpuVariant::C,
+ gpu_core: GpuCore::G13C,
+
+ max_num_clusters: 4,
+ fast_die0_sensor_mask: 0x80808080,
+ fast_die0_sensor_mask_alt: 0x90909090,
+ fast_die0_sensor_present: 0x0f,
+ io_mappings: &iomaps(8, false),
+ ..HWCONFIG_T6002
+};
+
+pub(crate) const HWCONFIG_T6000: super::HwConfig = HwConfig {
+ chip_id: 0x6000,
+ gpu_variant: GpuVariant::S,
+ gpu_core: GpuCore::G13S,
+
+ max_num_clusters: 2,
+ fast_die0_sensor_mask: 0x8080,
+ fast_die0_sensor_mask_alt: 0x9090,
+ fast_die0_sensor_present: 0x03,
+ io_mappings: &iomaps(4, false),
+ ..HWCONFIG_T6001
+};
diff --git a/drivers/gpu/drm/asahi/hw/t8103.rs b/drivers/gpu/drm/asahi/hw/t8103.rs
new file mode 100644
index 000000000000..3d38b088a0f5
--- /dev/null
+++ b/drivers/gpu/drm/asahi/hw/t8103.rs
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Hardware configuration for t8103 platforms (M1).
+
+use crate::f32;
+
+use super::*;
+
+pub(crate) const HWCONFIG: super::HwConfig = HwConfig {
+ chip_id: 0x8103,
+ gpu_gen: GpuGen::G13,
+ gpu_variant: GpuVariant::G,
+ gpu_core: GpuCore::G13G,
+ gpu_feat_compat: 0,
+ gpu_feat_incompat: 0,
+
+ base_clock_hz: 24_000_000,
+ uat_oas: 40,
+ max_num_clusters: 1,
+ max_num_cores: 8,
+ max_num_frags: 8,
+ max_num_gps: 4,
+
+ preempt1_size: 0x540,
+ preempt2_size: 0x280,
+ preempt3_size: 0x20,
+
+ render: HwRenderConfig {
+ // bit 0: disable clustering (always)
+ tiling_control: 0xa041,
+ },
+
+ da: HwConfigA {
+ unk_87c: -220,
+ unk_8cc: 9880,
+ unk_e24: 112,
+ },
+ db: HwConfigB {
+ unk_4e0: 0,
+ unk_534: 0,
+ unk_ab8: 0x48,
+ unk_abc: 0x8,
+ unk_b30: 0,
+ },
+ shared1_tab: &[
+ -1, 0x7282, 0x50ea, 0x370a, 0x25be, 0x1c1f, 0x16fb, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ ],
+ shared1_a4: 0xffff,
+ shared2_tab: &[0x800, 0x1555, -1, -1, -1, -1, -1, -1, 0, 0],
+ shared2_unk_508: 0xc00007,
+ sram_k: f32!(1.02),
+ unk_coef_a: &[],
+ unk_coef_b: &[],
+ global_tab: None,
+ fast_die0_sensor_mask: 0x12,
+ fast_die0_sensor_mask_alt: 0x12,
+ fast_die0_sensor_present: 0x01,
+ io_mappings: &[
+ Some(IOMapping::new(0x204d00000, 0x1c000, 0x1c000, true)), // Fender
+ Some(IOMapping::new(0x20e100000, 0x4000, 0x4000, false)), // AICTimer
+ Some(IOMapping::new(0x23b104000, 0x4000, 0x4000, true)), // AICSWInt
+ Some(IOMapping::new(0x204000000, 0x20000, 0x20000, true)), // RGX
+ None, // UVD
+ None, // unused
+ None, // DisplayUnderrunWA
+ Some(IOMapping::new(0x23b2e8000, 0x1000, 0x1000, false)), // AnalogTempSensorControllerRegs
+ Some(IOMapping::new(0x23bc00000, 0x1000, 0x1000, true)), // PMPDoorbell
+ Some(IOMapping::new(0x204d80000, 0x5000, 0x5000, true)), // MetrologySensorRegs
+ Some(IOMapping::new(0x204d61000, 0x1000, 0x1000, true)), // GMGIFAFRegs
+ Some(IOMapping::new(0x200000000, 0xd6400, 0xd6400, true)), // MCache registers
+ None, // AICBankedRegisters
+ Some(IOMapping::new(0x23b738000, 0x1000, 0x1000, true)), // PMGRScratch
+ None, // NIA Special agent idle register die 0
+ None, // NIA Special agent idle register die 1
+ None, // CRE registers
+ None, // Streaming codec registers
+ None, //
+ None, //
+ ],
+};
diff --git a/drivers/gpu/drm/asahi/hw/t8112.rs b/drivers/gpu/drm/asahi/hw/t8112.rs
new file mode 100644
index 000000000000..5624dca130be
--- /dev/null
+++ b/drivers/gpu/drm/asahi/hw/t8112.rs
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Hardware configuration for t8112 platforms (M2).
+
+use crate::f32;
+
+use super::*;
+
+pub(crate) const HWCONFIG: super::HwConfig = HwConfig {
+ chip_id: 0x8112,
+ gpu_gen: GpuGen::G14,
+ gpu_variant: GpuVariant::G,
+ gpu_core: GpuCore::G14G,
+ gpu_feat_compat: 0,
+ gpu_feat_incompat: 0,
+
+ base_clock_hz: 24_000_000,
+ uat_oas: 40,
+ max_num_clusters: 1,
+ max_num_cores: 10,
+ max_num_frags: 10,
+ max_num_gps: 4,
+
+ preempt1_size: 0x540,
+ preempt2_size: 0x280,
+ preempt3_size: 0x20,
+
+ render: HwRenderConfig {
+ // TODO: this is unused here, may be present in newer FW
+ tiling_control: 0xa041,
+ },
+
+ da: HwConfigA {
+ unk_87c: 900,
+ unk_8cc: 11000,
+ unk_e24: 125,
+ },
+ db: HwConfigB {
+ unk_4e0: 4,
+ unk_534: 0,
+ unk_ab8: 0x2048,
+ unk_abc: 0x4000,
+ unk_b30: 1,
+ },
+ shared1_tab: &[
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff,
+ ],
+ shared1_a4: 0,
+ shared2_tab: &[-1, -1, -1, -1, -1, -1, -1, -1, 0xaa5aa, 0],
+ shared2_unk_508: 0xc00000,
+ sram_k: f32!(1.02),
+ // 13.2: last coef changed from 6.6 to 5.3, assuming that was a fix we can backport
+ unk_coef_a: &[&f32!([0.0, 0.0, 0.0, 0.0, 5.3, 0.0, 5.3, /*6.6*/ 5.3])],
+ unk_coef_b: &[&f32!([0.0, 0.0, 0.0, 0.0, 5.3, 0.0, 5.3, /*6.6*/ 5.3])],
+ global_tab: None,
+ fast_die0_sensor_mask: 0x6800,
+ fast_die0_sensor_mask_alt: 0x6800,
+ fast_die0_sensor_present: 0x02,
+ io_mappings: &[
+ Some(IOMapping::new(0x204d00000, 0x14000, 0x14000, true)), // Fender
+ Some(IOMapping::new(0x20e100000, 0x4000, 0x4000, false)), // AICTimer
+ Some(IOMapping::new(0x23b0c4000, 0x4000, 0x4000, true)), // AICSWInt
+ Some(IOMapping::new(0x204000000, 0x20000, 0x20000, true)), // RGX
+ None, // UVD
+ None, // unused
+ None, // DisplayUnderrunWA
+ Some(IOMapping::new(0x23b2c0000, 0x1000, 0x1000, false)), // AnalogTempSensorControllerRegs
+ None, // PMPDoorbell
+ Some(IOMapping::new(0x204d80000, 0x8000, 0x8000, true)), // MetrologySensorRegs
+ Some(IOMapping::new(0x204d61000, 0x1000, 0x1000, true)), // GMGIFAFRegs
+ Some(IOMapping::new(0x200000000, 0xd6400, 0xd6400, true)), // MCache registers
+ None, // AICBankedRegisters
+ None, // PMGRScratch
+ None, // NIA Special agent idle register die 0
+ None, // NIA Special agent idle register die 1
+ Some(IOMapping::new(0x204e00000, 0x10000, 0x10000, true)), // CRE registers
+ Some(IOMapping::new(0x27d050000, 0x4000, 0x4000, true)), // Streaming codec registers
+ Some(IOMapping::new(0x23b3d0000, 0x1000, 0x1000, true)), //
+ Some(IOMapping::new(0x23b3c0000, 0x1000, 0x1000, true)), //
+ ],
+};
diff --git a/drivers/gpu/drm/asahi/initdata.rs b/drivers/gpu/drm/asahi/initdata.rs
new file mode 100644
index 000000000000..472c42169130
--- /dev/null
+++ b/drivers/gpu/drm/asahi/initdata.rs
@@ -0,0 +1,777 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+#![allow(clippy::unusual_byte_groupings)]
+
+//! GPU initialization data builder.
+//!
+//! The root of all interaction between the GPU firmware and the host driver is a complex set of
+//! nested structures that we call InitData. This includes both GPU hardware/firmware configuration
+//! and the pointers to the ring buffers and global data fields that are used for communication at
+//! runtime.
+//!
+//! Many of these structures are poorly understood, so there are lots of hardcoded unknown values
+//! derived from observing the InitData structures that macOS generates.
+
+use crate::fw::initdata::*;
+use crate::fw::types::*;
+use crate::{box_in_place, f32, place};
+use crate::{gpu, hw, mmu};
+use kernel::error::Result;
+use kernel::macros::versions;
+
+/// Builder helper for the global GPU InitData.
+#[versions(AGX)]
+pub(crate) struct InitDataBuilder<'a> {
+ alloc: &'a mut gpu::KernelAllocators,
+ cfg: &'a hw::HwConfig,
+ dyncfg: &'a hw::DynConfig,
+}
+
+#[versions(AGX)]
+impl<'a> InitDataBuilder::ver<'a> {
+ /// Create a new InitData builder
+ pub(crate) fn new(
+ alloc: &'a mut gpu::KernelAllocators,
+ cfg: &'a hw::HwConfig,
+ dyncfg: &'a hw::DynConfig,
+ ) -> InitDataBuilder::ver<'a> {
+ InitDataBuilder::ver { alloc, cfg, dyncfg }
+ }
+
+ /// Create the HwDataShared1 structure, which is used in two places in InitData.
+ #[inline(never)]
+ fn hw_shared1(cfg: &hw::HwConfig) -> raw::HwDataShared1 {
+ let mut ret = raw::HwDataShared1 {
+ unk_a4: cfg.shared1_a4,
+ ..Default::default()
+ };
+ for (i, val) in cfg.shared1_tab.iter().enumerate() {
+ ret.table[i] = *val;
+ }
+ ret
+ }
+
+ fn init_curve(
+ curve: &mut raw::HwDataShared2Curve,
+ unk_0: u32,
+ unk_4: u32,
+ t1: &[i16],
+ t2: &[i16],
+ t3: &[&[i32]],
+ ) {
+ curve.unk_0 = unk_0;
+ curve.unk_4 = unk_4;
+ (*curve.t1)[..t1.len()].copy_from_slice(t1);
+ (*curve.t1)[t1.len()..].fill(t1[0]);
+ (*curve.t2)[..t2.len()].copy_from_slice(t2);
+ (*curve.t2)[t2.len()..].fill(t2[0]);
+ for (i, a) in curve.t3.iter_mut().enumerate() {
+ a.fill(0x3ffffff);
+ if i < t3.len() {
+ let b = t3[i];
+ (**a)[..b.len()].copy_from_slice(b);
+ }
+ }
+ }
+
+ /// Create the HwDataShared2 structure, which is used in two places in InitData.
+ #[inline(never)]
+ fn hw_shared2(cfg: &hw::HwConfig) -> Result<Box<raw::HwDataShared2>> {
+ let mut ret = box_in_place!(raw::HwDataShared2 {
+ unk_28: Array::new([0xff; 16]),
+ t8112: Default::default(),
+ unk_508: cfg.shared2_unk_508,
+ ..Default::default()
+ })?;
+
+ for (i, val) in cfg.shared2_tab.iter().enumerate() {
+ ret.table[i] = *val;
+ }
+
+ if cfg.chip_id == 0x8112 {
+ ret.t8112.unk_14 = 0x6000000;
+ Self::init_curve(&mut ret.t8112.curve1, 0, 0x20000000, &[-1], &[0x0f07], &[]);
+ Self::init_curve(
+ &mut ret.t8112.curve2,
+ 7,
+ 0x80000000,
+ &[-1, 25740, 17429, 12550, 9597, 7910, 6657, 5881, 5421],
+ &[
+ 0x0f07, 0x04c0, 0x06c0, 0x08c0, 0x0ac0, 0x0c40, 0x0dc0, 0x0ec0, 0x0f80,
+ ],
+ &[
+ &[0x3ffffff, 107, 101, 94, 87, 82, 77, 73, 71],
+ &[
+ 0x3ffffff, 38240, 36251, 33562, 31368, 29379, 27693, 26211, 25370,
+ ],
+ &[
+ 0x3ffffff, 123933, 117485, 108771, 101661, 95217, 89751, 84948, 82222,
+ ],
+ ],
+ );
+ }
+
+ Ok(ret)
+ }
+
+ /// Create the HwDataShared3 structure, which is used in two places in InitData.
+ #[inline(never)]
+ fn hw_shared3(cfg: &hw::HwConfig) -> Result<Box<raw::HwDataShared3>> {
+ let mut ret = box_in_place!(raw::HwDataShared3 {
+ ..Default::default()
+ })?;
+
+ if cfg.chip_id == 0x8112 {
+ ret.unk_0 = 1;
+ ret.unk_4 = 500;
+ ret.unk_8 = 5;
+ ret.table.copy_from_slice(&[
+ 10700, 10700, 10700, 10700, 10700, 6000, 1000, 1000, 1000, 10700, 10700, 10700,
+ 10700, 10700, 10700, 10700,
+ ]);
+ ret.unk_4c = 1;
+ }
+
+ Ok(ret)
+ }
+
+ /// Create an unknown T81xx-specific data structure.
+ fn t81xx_data(dyncfg: &'a hw::DynConfig) -> raw::T81xxData {
+ raw::T81xxData {
+ unk_d8c: 0x80000000,
+ unk_d90: 4,
+ unk_d9c: f32!(0.6),
+ unk_da4: f32!(0.4),
+ unk_dac: f32!(0.38552),
+ unk_db8: f32!(65536.0),
+ unk_dbc: f32!(13.56),
+ max_pstate_scaled: 100 * dyncfg.pwr.perf_max_pstate,
+ ..Default::default()
+ }
+ }
+
+ /// Create the HwDataA structure. This mostly contains power-related configuration.
+ #[inline(never)]
+ fn hwdata_a(&mut self) -> Result<GpuObject<HwDataA::ver>> {
+ self.alloc
+ .private
+ .new_inplace(Default::default(), |_inner, ptr| {
+ let pwr = &self.dyncfg.pwr;
+ let period_ms = pwr.power_sample_period;
+ let period_s = F32::from(period_ms) / f32!(1000.0);
+ let ppm_filter_tc_periods = pwr.ppm_filter_time_constant_ms / period_ms;
+ #[ver(V >= V13_0B4)]
+ let ppm_filter_tc_ms_rounded = ppm_filter_tc_periods * period_ms;
+ let ppm_filter_a = f32!(1.0) / ppm_filter_tc_periods.into();
+ let perf_filter_a = f32!(1.0) / pwr.perf_filter_time_constant.into();
+ let perf_filter_a2 = f32!(1.0) / pwr.perf_filter_time_constant2.into();
+ let avg_power_target_filter_a = f32!(1.0) / pwr.avg_power_target_filter_tc.into();
+ let avg_power_filter_tc_periods = pwr.avg_power_filter_tc_ms / period_ms;
+ #[ver(V >= V13_0B4)]
+ let avg_power_filter_tc_ms_rounded = avg_power_filter_tc_periods * period_ms;
+ let avg_power_filter_a = f32!(1.0) / avg_power_filter_tc_periods.into();
+ let pwr_filter_a = f32!(1.0) / pwr.pwr_filter_time_constant.into();
+
+ let base_ps = pwr.perf_base_pstate;
+ let base_ps_scaled = 100 * base_ps;
+ let max_ps = pwr.perf_max_pstate;
+ let max_ps_scaled = 100 * max_ps;
+ let boost_ps_count = max_ps - base_ps;
+
+ let base_clock_khz = self.cfg.base_clock_hz / 1000;
+ let clocks_per_period = base_clock_khz * period_ms;
+
+ let raw = place!(
+ ptr,
+ raw::HwDataA::ver {
+ clocks_per_period: clocks_per_period,
+ #[ver(V >= V13_0B4)]
+ clocks_per_period_2: clocks_per_period,
+ pwr_status: AtomicU32::new(4),
+ unk_10: f32!(1.0),
+ actual_pstate: 1,
+ tgt_pstate: 1,
+ base_pstate_scaled: base_ps_scaled,
+ unk_40: 1,
+ max_pstate_scaled: max_ps_scaled,
+ min_pstate_scaled: 100,
+ unk_64c: 625,
+ pwr_filter_a_neg: f32!(1.0) - pwr_filter_a,
+ pwr_filter_a: pwr_filter_a,
+ pwr_integral_gain: pwr.pwr_integral_gain,
+ pwr_integral_min_clamp: pwr.pwr_integral_min_clamp.into(),
+ max_power_1: pwr.max_power_mw.into(),
+ pwr_proportional_gain: pwr.pwr_proportional_gain,
+ pwr_pstate_related_k: -F32::from(max_ps_scaled) / pwr.max_power_mw.into(),
+ pwr_pstate_max_dc_offset: pwr.pwr_min_duty_cycle as i32
+ - max_ps_scaled as i32,
+ max_pstate_scaled_2: max_ps_scaled,
+ max_power_2: pwr.max_power_mw,
+ max_pstate_scaled_3: max_ps_scaled,
+ ppm_filter_tc_periods_x4: ppm_filter_tc_periods * 4,
+ ppm_filter_a_neg: f32!(1.0) - ppm_filter_a,
+ ppm_filter_a: ppm_filter_a,
+ ppm_ki_dt: pwr.ppm_ki * period_s,
+ unk_6fc: f32!(65536.0),
+ ppm_kp: pwr.ppm_kp,
+ pwr_min_duty_cycle: pwr.pwr_min_duty_cycle,
+ max_pstate_scaled_4: max_ps_scaled,
+ unk_71c: f32!(0.0),
+ max_power_3: pwr.max_power_mw,
+ cur_power_mw_2: 0x0,
+ ppm_filter_tc_ms: pwr.ppm_filter_time_constant_ms,
+ #[ver(V >= V13_0B4)]
+ ppm_filter_tc_clks: ppm_filter_tc_ms_rounded * base_clock_khz,
+ perf_tgt_utilization: pwr.perf_tgt_utilization,
+ perf_boost_min_util: pwr.perf_boost_min_util,
+ perf_boost_ce_step: pwr.perf_boost_ce_step,
+ perf_reset_iters: pwr.perf_reset_iters,
+ unk_774: 6,
+ unk_778: 1,
+ perf_filter_drop_threshold: pwr.perf_filter_drop_threshold,
+ perf_filter_a_neg: f32!(1.0) - perf_filter_a,
+ perf_filter_a2_neg: f32!(1.0) - perf_filter_a2,
+ perf_filter_a: perf_filter_a,
+ perf_filter_a2: perf_filter_a2,
+ perf_ki: pwr.perf_integral_gain,
+ perf_ki2: pwr.perf_integral_gain2,
+ perf_integral_min_clamp: pwr.perf_integral_min_clamp.into(),
+ unk_79c: f32!(95.0),
+ perf_kp: pwr.perf_proportional_gain,
+ perf_kp2: pwr.perf_proportional_gain2,
+ boost_state_unk_k: F32::from(boost_ps_count) / f32!(0.95),
+ base_pstate_scaled_2: base_ps_scaled,
+ max_pstate_scaled_5: max_ps_scaled,
+ base_pstate_scaled_3: base_ps_scaled,
+ perf_tgt_utilization_2: pwr.perf_tgt_utilization,
+ base_pstate_scaled_4: base_ps_scaled,
+ unk_7fc: f32!(65536.0),
+ pwr_min_duty_cycle_2: pwr.pwr_min_duty_cycle.into(),
+ max_pstate_scaled_6: max_ps_scaled.into(),
+ max_freq_mhz: pwr.max_freq_mhz,
+ pwr_min_duty_cycle_3: pwr.pwr_min_duty_cycle,
+ min_pstate_scaled_4: f32!(100.0),
+ max_pstate_scaled_7: max_ps_scaled,
+ unk_alpha_neg: f32!(0.8),
+ unk_alpha: f32!(0.2),
+ fast_die0_sensor_mask: U64(self.cfg.fast_die0_sensor_mask),
+ fast_die0_release_temp_cc: 100 * pwr.fast_die0_release_temp,
+ unk_87c: self.cfg.da.unk_87c,
+ unk_880: 0x4,
+ unk_894: f32!(1.0),
+
+ fast_die0_ki_dt: pwr.fast_die0_integral_gain * period_s,
+ unk_8a8: f32!(65536.0),
+ fast_die0_kp: pwr.fast_die0_proportional_gain,
+ pwr_min_duty_cycle_4: pwr.pwr_min_duty_cycle,
+ max_pstate_scaled_8: max_ps_scaled,
+ max_pstate_scaled_9: max_ps_scaled,
+ fast_die0_prop_tgt_delta: 100 * pwr.fast_die0_prop_tgt_delta,
+ unk_8cc: self.cfg.da.unk_8cc,
+ max_pstate_scaled_10: max_ps_scaled,
+ max_pstate_scaled_11: max_ps_scaled,
+ unk_c2c: 1,
+ power_zone_count: pwr.power_zones.len() as u32,
+ max_power_4: pwr.max_power_mw,
+ max_power_5: pwr.max_power_mw,
+ max_power_6: pwr.max_power_mw,
+ avg_power_target_filter_a_neg: f32!(1.0) - avg_power_target_filter_a,
+ avg_power_target_filter_a: avg_power_target_filter_a,
+ avg_power_target_filter_tc_x4: 4 * pwr.avg_power_target_filter_tc,
+ avg_power_target_filter_tc_xperiod: period_ms
+ * pwr.avg_power_target_filter_tc,
+ #[ver(V >= V13_0B4)]
+ avg_power_target_filter_tc_clks: period_ms
+ * pwr.avg_power_target_filter_tc
+ * base_clock_khz,
+ avg_power_filter_tc_periods_x4: 4 * avg_power_filter_tc_periods,
+ avg_power_filter_a_neg: f32!(1.0) - avg_power_filter_a,
+ avg_power_filter_a: avg_power_filter_a,
+ avg_power_ki_dt: pwr.avg_power_ki_only * period_s,
+ unk_d20: f32!(65536.0),
+ avg_power_kp: pwr.avg_power_kp,
+ avg_power_min_duty_cycle: pwr.avg_power_min_duty_cycle,
+ max_pstate_scaled_12: max_ps_scaled,
+ max_pstate_scaled_13: max_ps_scaled,
+ max_power_7: pwr.max_power_mw.into(),
+ max_power_8: pwr.max_power_mw,
+ avg_power_filter_tc_ms: pwr.avg_power_filter_tc_ms,
+ #[ver(V >= V13_0B4)]
+ avg_power_filter_tc_clks: avg_power_filter_tc_ms_rounded * base_clock_khz,
+ max_pstate_scaled_14: max_ps_scaled,
+ t81xx_data: match self.cfg.chip_id {
+ 0x8103 | 0x8112 => Self::t81xx_data(self.dyncfg),
+ _ => Default::default(),
+ },
+ #[ver(V >= V13_0B4)]
+ unk_e10_0: raw::HwDataA130Extra {
+ unk_38: 4,
+ unk_3c: 8000,
+ unk_40: 2500,
+ unk_48: 0xffffffff,
+ unk_4c: 50,
+ unk_54: 50,
+ unk_58: 0x1,
+ unk_60: f32!(0.8888889),
+ unk_64: f32!(0.6666667),
+ unk_68: f32!(0.11111111),
+ unk_6c: f32!(0.33333333),
+ unk_70: f32!(-0.4),
+ unk_74: f32!(-0.8),
+ unk_7c: f32!(65536.0),
+ unk_80: f32!(-5.0),
+ unk_84: f32!(-10.0),
+ unk_8c: 40,
+ max_pstate_scaled_1: max_ps_scaled,
+ unk_9c: f32!(8000.0),
+ unk_a0: 1400,
+ unk_a8: 72,
+ unk_ac: 24,
+ unk_b0: 1728000,
+ unk_b8: 576000,
+ unk_c4: f32!(65536.0),
+ unk_114: f32!(65536.0),
+ unk_124: 40,
+ max_pstate_scaled_2: max_ps_scaled,
+ ..Default::default()
+ },
+ fast_die0_sensor_mask_2: U64(self.cfg.fast_die0_sensor_mask),
+ unk_e24: self.cfg.da.unk_e24,
+ unk_e28: 1,
+ fast_die0_sensor_mask_alt: U64(self.cfg.fast_die0_sensor_mask_alt),
+ #[ver(V < V13_0B4)]
+ fast_die0_sensor_present: U64(self.cfg.fast_die0_sensor_present as u64),
+ unk_163c: 1,
+ unk_3644: 0,
+ hws1: Self::hw_shared1(self.cfg),
+ hws2: *Self::hw_shared2(self.cfg)?,
+ hws3: *Self::hw_shared3(self.cfg)?,
+ unk_3ce8: 1,
+ ..Default::default()
+ }
+ );
+
+ for i in 0..self.dyncfg.pwr.perf_states.len() {
+ raw.sram_k[i] = self.cfg.sram_k;
+ }
+
+ for (i, coef) in pwr.core_leak_coef.iter().enumerate() {
+ raw.core_leak_coef[i] = *coef;
+ }
+
+ for (i, coef) in pwr.sram_leak_coef.iter().enumerate() {
+ raw.sram_leak_coef[i] = *coef;
+ }
+
+ for i in 0..self.dyncfg.id.num_clusters as usize {
+ if let Some(coef_a) = self.cfg.unk_coef_a.get(i) {
+ (*raw.unk_coef_a1[i])[..coef_a.len()].copy_from_slice(coef_a);
+ (*raw.unk_coef_a2[i])[..coef_a.len()].copy_from_slice(coef_a);
+ }
+ if let Some(coef_b) = self.cfg.unk_coef_b.get(i) {
+ (*raw.unk_coef_b1[i])[..coef_b.len()].copy_from_slice(coef_b);
+ (*raw.unk_coef_b2[i])[..coef_b.len()].copy_from_slice(coef_b);
+ }
+ }
+
+ for (i, pz) in pwr.power_zones.iter().enumerate() {
+ raw.power_zones[i].target = pz.target;
+ raw.power_zones[i].target_off = pz.target - pz.target_offset;
+ raw.power_zones[i].filter_tc_x4 = 4 * pz.filter_tc;
+ raw.power_zones[i].filter_tc_xperiod = period_ms * pz.filter_tc;
+ let filter_a = f32!(1.0) / pz.filter_tc.into();
+ raw.power_zones[i].filter_a = filter_a;
+ raw.power_zones[i].filter_a_neg = f32!(1.0) - filter_a;
+ #[ver(V >= V13_0B4)]
+ raw.power_zones[i].unk_10 = 1320000000;
+ }
+
+ Ok(raw)
+ })
+ }
+
+ /// Create the HwDataB structure. This mostly contains GPU-related configuration.
+ #[inline(never)]
+ fn hwdata_b(&mut self) -> Result<GpuObject<HwDataB::ver>> {
+ self.alloc
+ .private
+ .new_inplace(Default::default(), |_inner, ptr| {
+ let raw = place!(
+ ptr,
+ raw::HwDataB::ver {
+ // Userspace VA map related
+ #[ver(V < V13_0B4)]
+ unk_0: U64(0x13_00000000),
+ unk_8: U64(0x14_00000000),
+ #[ver(V < V13_0B4)]
+ unk_10: U64(0x1_00000000),
+ unk_18: U64(0xffc00000),
+ unk_20: U64(0x11_00000000),
+ unk_28: U64(0x11_00000000),
+ // userspace address?
+ unk_30: U64(0x6f_ffff8000),
+ // unmapped?
+ unkptr_38: U64(0xffffffa0_11800000),
+ // TODO: yuv matrices
+ chip_id: self.cfg.chip_id,
+ unk_454: 0x1,
+ unk_458: 0x1,
+ unk_460: 0x1,
+ unk_464: 0x1,
+ unk_468: 0x1,
+ unk_47c: 0x1,
+ unk_484: 0x1,
+ unk_48c: 0x1,
+ base_clock_khz: self.cfg.base_clock_hz / 1000,
+ power_sample_period: self.dyncfg.pwr.power_sample_period,
+ unk_49c: 0x1,
+ unk_4a0: 0x1,
+ unk_4a4: 0x1,
+ unk_4c0: 0x1f,
+ unk_4e0: U64(self.cfg.db.unk_4e0),
+ unk_4f0: 0x1,
+ unk_4f4: 0x1,
+ unk_504: 0x31,
+ unk_524: 0x1, // use_secure_cache_flush
+ unk_534: self.cfg.db.unk_534,
+ num_frags: self.dyncfg.id.num_frags * self.dyncfg.id.num_clusters,
+ unk_554: 0x1,
+ uat_ttb_base: U64(self.dyncfg.uat_ttb_base),
+ gpu_core_id: self.cfg.gpu_core as u32,
+ gpu_rev_id: self.dyncfg.id.gpu_rev_id as u32,
+ num_cores: self.dyncfg.id.num_cores * self.dyncfg.id.num_clusters,
+ max_pstate: self.dyncfg.pwr.perf_states.len() as u32 - 1,
+ #[ver(V < V13_0B4)]
+ num_pstates: self.dyncfg.pwr.perf_states.len() as u32,
+ #[ver(V < V13_0B4)]
+ min_sram_volt: self.dyncfg.pwr.min_sram_microvolt / 1000,
+ #[ver(V < V13_0B4)]
+ unk_ab8: self.cfg.db.unk_ab8,
+ #[ver(V < V13_0B4)]
+ unk_abc: self.cfg.db.unk_abc,
+ #[ver(V < V13_0B4)]
+ unk_ac0: 0x1020,
+
+ #[ver(V >= V13_0B4)]
+ unk_ae4: Array::new([0x0, 0x3, 0x7, 0x7]),
+ #[ver(V < V13_0B4)]
+ unk_ae4: Array::new([0x0, 0xf, 0x3f, 0x3f]),
+ unk_b10: 0x1,
+ unk_b24: 0x1,
+ unk_b28: 0x1,
+ unk_b2c: 0x1,
+ unk_b30: self.cfg.db.unk_b30,
+ #[ver(V >= V13_0B4)]
+ unk_b38_0: 1,
+ #[ver(V >= V13_0B4)]
+ unk_b38_4: 1,
+ unk_b38: Array::new([0xffffffff; 12]),
+ #[ver(V >= V13_0B4)]
+ unk_c3c: 0x19,
+ ..Default::default()
+ }
+ );
+
+ let base_ps = self.dyncfg.pwr.perf_base_pstate as usize;
+ let max_ps = self.dyncfg.pwr.perf_max_pstate as usize;
+ let base_freq = self.dyncfg.pwr.perf_states[base_ps].freq_hz;
+ let max_freq = self.dyncfg.pwr.perf_states[max_ps].freq_hz;
+
+ for (i, ps) in self.dyncfg.pwr.perf_states.iter().enumerate() {
+ raw.frequencies[i] = ps.freq_hz / 1000000;
+ for (j, mv) in ps.volt_mv.iter().enumerate() {
+ let sram_mv = (*mv).max(self.dyncfg.pwr.min_sram_microvolt / 1000);
+ raw.voltages[i][j] = *mv;
+ raw.voltages_sram[i][j] = sram_mv;
+ }
+ raw.sram_k[i] = self.cfg.sram_k;
+ raw.rel_max_powers[i] = ps.pwr_mw * 100 / self.dyncfg.pwr.max_power_mw;
+ raw.rel_boost_freqs[i] = if i > base_ps {
+ (ps.freq_hz - base_freq) / ((max_freq - base_freq) / 100)
+ } else {
+ 0
+ };
+ }
+
+ Ok(raw)
+ })
+ }
+
+ /// Create the Globals structure, which contains global firmware config including more power
+ /// configuration data and globals used to exchange state between the firmware and driver.
+ #[inline(never)]
+ fn globals(&mut self) -> Result<GpuObject<Globals::ver>> {
+ self.alloc
+ .shared
+ .new_inplace(Default::default(), |_inner, ptr| {
+ let pwr = &self.dyncfg.pwr;
+ let period_ms = pwr.power_sample_period;
+ let period_s = F32::from(period_ms) / f32!(1000.0);
+ let avg_power_filter_tc_periods = pwr.avg_power_filter_tc_ms / period_ms;
+
+ let max_ps = pwr.perf_max_pstate;
+ let max_ps_scaled = 100 * max_ps;
+
+ let raw = place!(
+ ptr,
+ raw::Globals::ver {
+ //ktrace_enable: 0xffffffff,
+ ktrace_enable: 0,
+ #[ver(V >= V13_2)]
+ unk_24_0: 3000,
+ unk_24: 0,
+ #[ver(V >= V13_0B4)]
+ unk_28_0: 0, // debug
+ unk_28: 1,
+ #[ver(V >= V13_0B4)]
+ unk_2c_0: 0,
+ unk_2c: 1,
+ unk_30: 0,
+ unk_34: 120,
+ sub: raw::GlobalsSub::ver {
+ unk_54: 0xffff,
+ unk_56: 40,
+ unk_58: 0xffff,
+ unk_5e: U32(1),
+ unk_66: U32(1),
+ ..Default::default()
+ },
+ unk_8900: 1,
+ pending_submissions: AtomicU32::new(0),
+ max_power: pwr.max_power_mw,
+ max_pstate_scaled: max_ps_scaled,
+ max_pstate_scaled_2: max_ps_scaled,
+ max_pstate_scaled_3: max_ps_scaled,
+ power_zone_count: pwr.power_zones.len() as u32,
+ avg_power_filter_tc_periods: avg_power_filter_tc_periods,
+ avg_power_ki_dt: pwr.avg_power_ki_only * period_s,
+ avg_power_kp: pwr.avg_power_kp,
+ avg_power_min_duty_cycle: pwr.avg_power_min_duty_cycle,
+ avg_power_target_filter_tc: pwr.avg_power_target_filter_tc,
+ unk_89bc: self.cfg.da.unk_8cc,
+ fast_die0_release_temp: 100 * pwr.fast_die0_release_temp,
+ unk_89c4: self.cfg.da.unk_87c,
+ fast_die0_prop_tgt_delta: 100 * pwr.fast_die0_prop_tgt_delta,
+ fast_die0_kp: pwr.fast_die0_proportional_gain,
+ fast_die0_ki_dt: pwr.fast_die0_integral_gain * period_s,
+ unk_89e0: 1,
+ max_power_2: pwr.max_power_mw,
+ ppm_kp: pwr.ppm_kp,
+ ppm_ki_dt: pwr.ppm_ki * period_s,
+ #[ver(V >= V13_0B4)]
+ unk_89f4_8: 1,
+ unk_89f4: 0,
+ hws1: Self::hw_shared1(self.cfg),
+ hws2: *Self::hw_shared2(self.cfg)?,
+ hws3: *Self::hw_shared3(self.cfg)?,
+ unk_900c: 1,
+ #[ver(V >= V13_0B4)]
+ unk_9010_0: 1,
+ #[ver(V >= V13_0B4)]
+ unk_903c: 1,
+ #[ver(V < V13_0B4)]
+ unk_903c: 0,
+ fault_control: *crate::fault_control.read(),
+ do_init: 1,
+ unk_11020: 40,
+ unk_11024: 10,
+ unk_11028: 250,
+ #[ver(V >= V13_0B4)]
+ unk_1102c_0: 1,
+ #[ver(V >= V13_0B4)]
+ unk_1102c_4: 1,
+ #[ver(V >= V13_0B4)]
+ unk_1102c_8: 100,
+ #[ver(V >= V13_0B4)]
+ unk_1102c_c: 1,
+ idle_off_delay_ms: AtomicU32::new(pwr.idle_off_delay_ms),
+ fender_idle_off_delay_ms: pwr.fender_idle_off_delay_ms,
+ fw_early_wake_timeout_ms: pwr.fw_early_wake_timeout_ms,
+ unk_118e0: 40,
+ #[ver(V >= V13_0B4)]
+ unk_118e4_0: 50,
+ #[ver(V >= V13_0B4)]
+ unk_11edc: 0,
+ #[ver(V >= V13_0B4)]
+ unk_11efc: 0,
+ ..Default::default()
+ }
+ );
+
+ for (i, pz) in pwr.power_zones.iter().enumerate() {
+ raw.power_zones[i].target = pz.target;
+ raw.power_zones[i].target_off = pz.target - pz.target_offset;
+ raw.power_zones[i].filter_tc = pz.filter_tc;
+ }
+
+ if let Some(tab) = self.cfg.global_tab.as_ref() {
+ for (i, x) in tab.iter().enumerate() {
+ raw.unk_118ec[i] = *x;
+ }
+ raw.unk_118e8 = 1;
+ }
+
+ Ok(raw)
+ })
+ }
+
+ /// Create the RuntimePointers structure, which contains pointers to most of the other
+ /// structures including the ring buffer channels, statistics structures, and HwDataA/HwDataB.
+ #[inline(never)]
+ fn runtime_pointers(&mut self) -> Result<GpuObject<RuntimePointers::ver>> {
+ let hwa = self.hwdata_a()?;
+ let hwb = self.hwdata_b()?;
+
+ let pointers: Box<RuntimePointers::ver> = box_in_place!(RuntimePointers::ver {
+ stats: Stats::ver {
+ vtx: self.alloc.private.new_default::<GpuGlobalStatsVtx::ver>()?,
+ frag: self.alloc.private.new_inplace(
+ Default::default(),
+ |_inner, ptr: &mut MaybeUninit<raw::GpuGlobalStatsFrag::ver>| {
+ Ok(place!(
+ ptr,
+ raw::GpuGlobalStatsFrag::ver {
+ stats: raw::GpuStatsFrag::ver {
+ cur_stamp_id: -1,
+ unk_118: -1,
+ ..Default::default()
+ },
+ ..Default::default()
+ }
+ ))
+ },
+ )?,
+ comp: self.alloc.private.new_default::<GpuStatsComp>()?,
+ },
+
+ hwdata_a: hwa,
+ unkptr_190: self.alloc.private.array_empty(0x80)?,
+ unkptr_198: self.alloc.private.array_empty(0xc0)?,
+ hwdata_b: hwb,
+
+ unkptr_1b8: self.alloc.private.array_empty(0x1000)?,
+ unkptr_1c0: self.alloc.private.array_empty(0x300)?,
+ unkptr_1c8: self.alloc.private.array_empty(0x1000)?,
+
+ buffer_mgr_ctl: self.alloc.gpu.array_empty(127)?,
+ })?;
+
+ self.alloc.private.new_boxed(pointers, |inner, ptr| {
+ Ok(place!(
+ ptr,
+ raw::RuntimePointers::ver {
+ pipes: Default::default(),
+ device_control: Default::default(),
+ event: Default::default(),
+ fw_log: Default::default(),
+ ktrace: Default::default(),
+ stats: Default::default(),
+
+ stats_vtx: inner.stats.vtx.gpu_pointer(),
+ stats_frag: inner.stats.frag.gpu_pointer(),
+ stats_comp: inner.stats.comp.gpu_pointer(),
+
+ hwdata_a: inner.hwdata_a.gpu_pointer(),
+ unkptr_190: inner.unkptr_190.gpu_pointer(),
+ unkptr_198: inner.unkptr_198.gpu_pointer(),
+ hwdata_b: inner.hwdata_b.gpu_pointer(),
+ hwdata_b_2: inner.hwdata_b.gpu_pointer(),
+
+ fwlog_buf: None,
+
+ unkptr_1b8: inner.unkptr_1b8.gpu_pointer(),
+ unkptr_1c0: inner.unkptr_1c0.gpu_pointer(),
+ unkptr_1c8: inner.unkptr_1c8.gpu_pointer(),
+
+ buffer_mgr_ctl: inner.buffer_mgr_ctl.gpu_pointer(),
+ buffer_mgr_ctl_2: inner.buffer_mgr_ctl.gpu_pointer(),
+
+ __pad0: Default::default(),
+ unk_160: U64(0),
+ unk_168: U64(0),
+ unk_1d0: 0,
+ unk_1d4: 0,
+ unk_1d8: Default::default(),
+
+ __pad1: Default::default(),
+ gpu_scratch: raw::RuntimeScratch {
+ unk_6b38: 0xff,
+ ..Default::default()
+ },
+ }
+ ))
+ })
+ }
+
+ /// Create the FwStatus structure, which is used to coordinate the firmware halt state between
+ /// the firmware and the driver.
+ #[inline(never)]
+ fn fw_status(&mut self) -> Result<GpuObject<FwStatus>> {
+ self.alloc
+ .shared
+ .new_object(Default::default(), |_inner| Default::default())
+ }
+
+ /// Create one UatLevelInfo structure, which describes one level of translation for the UAT MMU.
+ #[inline(never)]
+ fn uat_level_info(
+ cfg: &hw::HwConfig,
+ index_shift: usize,
+ num_entries: usize,
+ ) -> raw::UatLevelInfo {
+ raw::UatLevelInfo {
+ index_shift: index_shift as _,
+ unk_1: 14,
+ unk_2: 14,
+ unk_3: 8,
+ unk_4: 0x4000,
+ num_entries: num_entries as _,
+ unk_8: U64(1),
+ unk_10: U64(((1u64 << cfg.uat_oas) - 1) & !(mmu::UAT_PGMSK as u64)),
+ index_mask: U64(((num_entries - 1) << index_shift) as u64),
+ }
+ }
+
+ /// Build the top-level InitData object.
+ #[inline(never)]
+ pub(crate) fn build(&mut self) -> Result<Box<GpuObject<InitData::ver>>> {
+ let inner: Box<InitData::ver> = box_in_place!(InitData::ver {
+ unk_buf: self.alloc.shared_ro.array_empty(0x4000)?,
+ runtime_pointers: self.runtime_pointers()?,
+ globals: self.globals()?,
+ fw_status: self.fw_status()?,
+ })?;
+
+ Ok(Box::try_new(self.alloc.shared_ro.new_boxed(
+ inner,
+ |inner, ptr| {
+ Ok(place!(
+ ptr,
+ raw::InitData::ver {
+ #[ver(V >= V13_0B4)]
+ ver_info: Array::new([1, 1, 16, 1]),
+ unk_buf: inner.unk_buf.gpu_pointer(),
+ unk_8: 0,
+ unk_c: 0,
+ runtime_pointers: inner.runtime_pointers.gpu_pointer(),
+ globals: inner.globals.gpu_pointer(),
+ fw_status: inner.fw_status.gpu_pointer(),
+ uat_page_size: 0x4000,
+ uat_page_bits: 14,
+ uat_num_levels: 3,
+ uat_level_info: Array::new([
+ Self::uat_level_info(self.cfg, 36, 8),
+ Self::uat_level_info(self.cfg, 25, 2048),
+ Self::uat_level_info(self.cfg, 14, 2048),
+ ]),
+ __pad0: Default::default(),
+ host_mapped_fw_allocations: 1,
+ unk_ac: 0,
+ unk_b0: 0,
+ unk_b4: 0,
+ unk_b8: 0,
+ }
+ ))
+ },
+ )?)?)
+ }
+}
diff --git a/drivers/gpu/drm/asahi/mem.rs b/drivers/gpu/drm/asahi/mem.rs
new file mode 100644
index 000000000000..491d4f8a4016
--- /dev/null
+++ b/drivers/gpu/drm/asahi/mem.rs
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! ARM64 low level memory operations.
+//!
+//! This GPU uses CPU-side `tlbi` outer-shareable instructions to manage its TLBs.
+//! Yes, really. Even though the VA address spaces are unrelated.
+//!
+//! Right now we pick our own ASIDs and don't coordinate with the CPU. This might result
+//! in needless TLB shootdowns on the CPU side... TODO: fix this.
+
+use core::arch::asm;
+use core::cmp::min;
+
+use crate::debug::*;
+use crate::mmu;
+
+type Asid = u8;
+
+/// Invalidate the entire GPU TLB.
+#[inline(always)]
+pub(crate) fn tlbi_all() {
+ unsafe {
+ asm!(".arch armv8.4-a", "tlbi vmalle1os",);
+ }
+}
+
+/// Invalidate all TLB entries for a given ASID.
+#[inline(always)]
+pub(crate) fn tlbi_asid(asid: Asid) {
+ if debug_enabled(DebugFlags::ConservativeTlbi) {
+ tlbi_all();
+ sync();
+ return;
+ }
+
+ unsafe {
+ asm!(
+ ".arch armv8.4-a",
+ "tlbi aside1os, {x}",
+ x = in(reg) ((asid as u64) << 48)
+ );
+ }
+}
+
+/// Invalidate a single page for a given ASID.
+#[inline(always)]
+pub(crate) fn tlbi_page(asid: Asid, va: usize) {
+ if debug_enabled(DebugFlags::ConservativeTlbi) {
+ tlbi_all();
+ sync();
+ return;
+ }
+
+ let val: u64 = ((asid as u64) << 48) | ((va as u64 >> 12) & 0xffffffffffc);
+ unsafe {
+ asm!(
+ ".arch armv8.4-a",
+ "tlbi vae1os, {x}",
+ x = in(reg) val
+ );
+ }
+}
+
+/// Invalidate a range of pages for a given ASID.
+#[inline(always)]
+pub(crate) fn tlbi_range(asid: Asid, va: usize, len: usize) {
+ if debug_enabled(DebugFlags::ConservativeTlbi) {
+ tlbi_all();
+ sync();
+ return;
+ }
+
+ if len == 0 {
+ return;
+ }
+
+ let start_pg = va >> mmu::UAT_PGBIT;
+ let end_pg = (va + len + mmu::UAT_PGMSK) >> mmu::UAT_PGBIT;
+
+ let mut val: u64 = ((asid as u64) << 48) | (2 << 46) | (start_pg as u64 & 0x1fffffffff);
+ let pages = end_pg - start_pg;
+
+ if pages == 1 {
+ tlbi_page(asid, va);
+ return;
+ }
+
+ // Page count is always in units of 2
+ let num = ((pages + 1) >> 1) as u64;
+ // base: 5 bits
+ // exp: 2 bits
+ // pages = (base + 1) << (5 * exp + 1)
+ // 0:00000 -> 2 pages = 2 << 0
+ // 0:11111 -> 32 * 2 pages = 2 << 5
+ // 1:00000 -> 1 * 32 * 2 pages = 2 << 5
+ // 1:11111 -> 32 * 32 * 2 pages = 2 << 10
+ // 2:00000 -> 1 * 32 * 32 * 2 pages = 2 << 10
+ // 2:11111 -> 32 * 32 * 32 * 2 pages = 2 << 15
+ // 3:00000 -> 1 * 32 * 32 * 32 * 2 pages = 2 << 15
+ // 3:11111 -> 32 * 32 * 32 * 32 * 2 pages = 2 << 20
+ let exp = min(3, (64 - num.leading_zeros()) / 5);
+ let bits = 5 * exp;
+ let mut base = (num + (1 << bits) - 1) >> bits;
+
+ val |= (exp as u64) << 44;
+
+ while base > 32 {
+ unsafe {
+ asm!(
+ ".arch armv8.4-a",
+ "tlbi rvae1os, {x}",
+ x = in(reg) val | (31 << 39)
+ );
+ }
+ base -= 32;
+ }
+
+ unsafe {
+ asm!(
+ ".arch armv8.4-a",
+ "tlbi rvae1os, {x}",
+ x = in(reg) val | ((base - 1) << 39)
+ );
+ }
+}
+
+/// Issue a memory barrier (`dsb sy`).
+#[inline(always)]
+pub(crate) fn sync() {
+ unsafe {
+ asm!("dsb sy");
+ }
+}
diff --git a/drivers/gpu/drm/asahi/microseq.rs b/drivers/gpu/drm/asahi/microseq.rs
new file mode 100644
index 000000000000..dca94ebc53a1
--- /dev/null
+++ b/drivers/gpu/drm/asahi/microseq.rs
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU Micro operation sequence builder
+//!
+//! As part of a single job submisssion to the GPU, the GPU firmware interprets a sequence of
+//! commands that we call a "microsequence". These are responsible for setting up the job execution,
+//! timestamping the process, waiting for completion, tearing up any resources, and signaling
+//! completion to the driver via the event stamp mechanism.
+//!
+//! Although the microsequences used by the macOS driver are usually quite uniform and simple, the
+//! firmware actually implements enough operations to make this interpreter Turing-complete (!).
+//! Most of those aren't implemented yet, since we don't need them, but they could come in handy in
+//! the future to do strange things or work around firmware bugs...
+//!
+//! This module simply implements a collection of microsequence operations that can be appended to
+//! and later concatenated into one buffer, ready for firmware execution.
+
+use crate::fw::microseq;
+pub(crate) use crate::fw::microseq::*;
+use crate::fw::types::*;
+use kernel::prelude::*;
+
+/// MicroSequence object type, which is just an opaque byte array.
+pub(crate) type MicroSequence = GpuArray<u8>;
+
+/// MicroSequence builder.
+pub(crate) struct Builder {
+ ops: Vec<u8>,
+}
+
+impl Builder {
+ /// Create a new Builder object
+ pub(crate) fn new() -> Builder {
+ Builder { ops: Vec::new() }
+ }
+
+ /// Get the relative offset from the current pointer to a given target offset.
+ ///
+ /// Used for relative jumps.
+ pub(crate) fn offset_to(&self, target: i32) -> i32 {
+ target - self.ops.len() as i32
+ }
+
+ /// Add an operation to the end of the sequence.
+ pub(crate) fn add<T: microseq::Operation>(&mut self, op: T) -> Result<i32> {
+ let off = self.ops.len();
+ let p: *const T = &op;
+ let p: *const u8 = p as *const u8;
+ let s: &[u8] = unsafe { core::slice::from_raw_parts(p, core::mem::size_of::<T>()) };
+ self.ops.try_extend_from_slice(s)?;
+ Ok(off as i32)
+ }
+
+ /// Collect all submitted operations into a finalized GPU object.
+ pub(crate) fn build(self, alloc: &mut Allocator) -> Result<MicroSequence> {
+ let mut array = alloc.array_empty::<u8>(self.ops.len())?;
+
+ array.as_mut_slice().clone_from_slice(self.ops.as_slice());
+ Ok(array)
+ }
+}
diff --git a/drivers/gpu/drm/asahi/mmu.rs b/drivers/gpu/drm/asahi/mmu.rs
new file mode 100644
index 000000000000..226ca0b7c1d7
--- /dev/null
+++ b/drivers/gpu/drm/asahi/mmu.rs
@@ -0,0 +1,1249 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU UAT (MMU) management
+//!
+//! AGX GPUs use an MMU called the UAT, which is largely compatible with the ARM64 page table
+//! format. This module manages the global MMU structures, including a shared handoff structure
+//! that is used to coordinate VM management operations with the firmware, the TTBAT which points
+//! to currently active GPU VM contexts, as well as the individual `Vm` operations to map and
+//! unmap buffer objects into a single user or kernel address space.
+//!
+//! The actual page table management is delegated to the common kernel `io_pgtable` code.
+
+use core::fmt::Debug;
+use core::mem::size_of;
+use core::ptr::{addr_of_mut, NonNull};
+use core::sync::atomic::{fence, AtomicU32, AtomicU64, AtomicU8, Ordering};
+use core::time::Duration;
+
+use kernel::{
+ bindings, c_str, delay, device,
+ drm::mm,
+ error::{to_result, Result},
+ io_pgtable,
+ io_pgtable::{prot, AppleUAT, IoPageTable},
+ prelude::*,
+ sync::{smutex::Mutex, Guard},
+ sync::{Arc, LockClassKey, UniqueArc},
+ time,
+ types::ForeignOwnable,
+};
+
+use crate::debug::*;
+use crate::no_debug;
+use crate::{driver, fw, gem, hw, mem, slotalloc};
+
+const DEBUG_CLASS: DebugFlags = DebugFlags::Mmu;
+
+/// PPL magic number for the handoff region
+const PPL_MAGIC: u64 = 0x4b1d000000000002;
+
+/// Number of supported context entries in the TTBAT
+const UAT_NUM_CTX: usize = 64;
+/// First context available for users
+const UAT_USER_CTX_START: usize = 1;
+/// Number of available user contexts
+const UAT_USER_CTX: usize = UAT_NUM_CTX - UAT_USER_CTX_START;
+
+/// Number of bits in a page offset.
+pub(crate) const UAT_PGBIT: usize = 14;
+/// UAT page size.
+pub(crate) const UAT_PGSZ: usize = 1 << UAT_PGBIT;
+/// UAT page offset mask.
+pub(crate) const UAT_PGMSK: usize = UAT_PGSZ - 1;
+
+type Pte = AtomicU64;
+
+/// Number of PTEs per page.
+const UAT_NPTE: usize = UAT_PGSZ / size_of::<Pte>();
+
+/// UAT input address space (user)
+pub(crate) const UAT_IAS: usize = 39;
+/// "Fake" kernel UAT input address space (one page level lower)
+pub(crate) const UAT_IAS_KERN: usize = 36;
+
+/// Lower/user base VA
+const IOVA_USER_BASE: usize = UAT_PGSZ;
+/// Lower/user top VA
+const IOVA_USER_TOP: usize = (1 << UAT_IAS) - 1;
+/// Upper/kernel base VA
+// const IOVA_TTBR1_BASE: usize = 0xffffff8000000000;
+/// Driver-managed kernel base VA
+const IOVA_KERN_BASE: usize = 0xffffffa000000000;
+/// Driver-managed kernel top VA
+const IOVA_KERN_TOP: usize = 0xffffffafffffffff;
+
+const TTBR_VALID: u64 = 0x1; // BIT(0)
+const TTBR_ASID_SHIFT: usize = 48;
+
+const PTE_TABLE: u64 = 0x3; // BIT(0) | BIT(1)
+
+// Mapping protection types
+
+// Note: prot::CACHE means "cache coherency", which for UAT means *uncached*,
+// since uncached mappings from the GFX ASC side are cache coherent with the AP cache.
+// Not having that flag means *cached noncoherent*.
+
+/// Firmware MMIO R/W
+pub(crate) const PROT_FW_MMIO_RW: u32 =
+ prot::PRIV | prot::READ | prot::WRITE | prot::CACHE | prot::MMIO;
+/// Firmware MMIO R/O
+pub(crate) const PROT_FW_MMIO_RO: u32 = prot::PRIV | prot::READ | prot::CACHE | prot::MMIO;
+/// Firmware shared (uncached) RW
+pub(crate) const PROT_FW_SHARED_RW: u32 = prot::PRIV | prot::READ | prot::WRITE | prot::CACHE;
+/// Firmware shared (uncached) RO
+pub(crate) const PROT_FW_SHARED_RO: u32 = prot::PRIV | prot::READ | prot::CACHE;
+/// Firmware private (cached) RW
+pub(crate) const PROT_FW_PRIV_RW: u32 = prot::PRIV | prot::READ | prot::WRITE;
+/*
+/// Firmware private (cached) RO
+pub(crate) const PROT_FW_PRIV_RO: u32 = prot::PRIV | prot::READ;
+*/
+/// Firmware/GPU shared (uncached) RW
+pub(crate) const PROT_GPU_FW_SHARED_RW: u32 = prot::READ | prot::WRITE | prot::CACHE;
+/// Firmware/GPU shared (private) RW
+pub(crate) const PROT_GPU_FW_PRIV_RW: u32 = prot::READ | prot::WRITE;
+/// GPU shared/coherent RW
+pub(crate) const PROT_GPU_SHARED_RW: u32 = prot::READ | prot::WRITE | prot::CACHE | prot::NOEXEC;
+/// GPU shared/coherent RO
+pub(crate) const PROT_GPU_SHARED_RO: u32 = prot::READ | prot::CACHE | prot::NOEXEC;
+/// GPU shared/coherent WO
+pub(crate) const PROT_GPU_SHARED_WO: u32 = prot::WRITE | prot::CACHE | prot::NOEXEC;
+/*
+/// GPU private/noncoherent RW
+pub(crate) const PROT_GPU_PRIV_RW: u32 = prot::READ | prot::WRITE | prot::NOEXEC;
+/// GPU private/noncoherent RO
+pub(crate) const PROT_GPU_PRIV_RO: u32 = prot::READ | prot::NOEXEC;
+*/
+
+type PhysAddr = bindings::phys_addr_t;
+
+/// A pre-allocated memory region for UAT management
+struct UatRegion {
+ base: PhysAddr,
+ map: NonNull<core::ffi::c_void>,
+}
+
+/// It's safe to share UAT region records across threads.
+unsafe impl Send for UatRegion {}
+unsafe impl Sync for UatRegion {}
+
+/// Handoff region flush info structure
+#[repr(C)]
+struct FlushInfo {
+ state: AtomicU64,
+ addr: AtomicU64,
+ size: AtomicU64,
+}
+
+/// UAT Handoff region layout
+#[repr(C)]
+struct Handoff {
+ magic_ap: AtomicU64,
+ magic_fw: AtomicU64,
+
+ lock_ap: AtomicU8,
+ lock_fw: AtomicU8,
+ // Implicit padding: 2 bytes
+ turn: AtomicU32,
+ cur_slot: AtomicU32,
+ // Implicit padding: 4 bytes
+ flush: [FlushInfo; UAT_NUM_CTX + 1],
+
+ unk2: AtomicU8,
+ // Implicit padding: 7 bytes
+ unk3: AtomicU64,
+}
+
+const HANDOFF_SIZE: usize = size_of::<Handoff>();
+
+/// One VM slot in the TTBAT
+#[repr(C)]
+struct SlotTTBS {
+ ttb0: AtomicU64,
+ ttb1: AtomicU64,
+}
+
+const SLOTS_SIZE: usize = UAT_NUM_CTX * size_of::<SlotTTBS>();
+
+// We need at least page 0 (ttb0)
+const PAGETABLES_SIZE: usize = UAT_PGSZ;
+
+/// Inner data for a Vm instance. This is reference-counted by the outer Vm object.
+struct VmInner {
+ dev: driver::AsahiDevice,
+ is_kernel: bool,
+ min_va: usize,
+ max_va: usize,
+ page_table: AppleUAT<Uat>,
+ mm: mm::Allocator<(), MappingInner>,
+ uat_inner: Arc<UatInner>,
+ active_users: usize,
+ binding: Option<slotalloc::Guard<SlotInner>>,
+ bind_token: Option<slotalloc::SlotToken>,
+ id: u64,
+}
+
+impl VmInner {
+ /// Returns the slot index, if this VM is bound.
+ fn slot(&self) -> Option<u32> {
+ if self.is_kernel {
+ // The GFX ASC does not care about the ASID. Pick an arbitrary one.
+ // TODO: This needs to be a persistently reserved ASID once we integrate
+ // with the ARM64 kernel ASID machinery to avoid overlap.
+ Some(0)
+ } else {
+ // We don't check whether we lost the slot, which could cause unnecessary
+ // invalidations against another Vm. However, this situation should be very
+ // rare (e.g. a Vm lost its slot, which means 63 other Vms bound in the
+ // interim, and then it gets killed / drops its mappings without doing any
+ // final rendering). Anything doing active maps/unmaps is probably also
+ // rendering and therefore likely bound.
+ self.bind_token
+ .as_ref()
+ .map(|token| (token.last_slot() + UAT_USER_CTX_START as u32))
+ }
+ }
+
+ /// Returns the translation table base for this Vm
+ fn ttb(&self) -> u64 {
+ self.page_table.cfg().ttbr
+ }
+
+ /// Map an IOVA to the shifted address the underlying io_pgtable uses.
+ fn map_iova(&self, iova: usize, size: usize) -> Result<usize> {
+ if iova < self.min_va || (iova + size - 1) > self.max_va {
+ Err(EINVAL)
+ } else if self.is_kernel {
+ Ok(iova - self.min_va)
+ } else {
+ Ok(iova)
+ }
+ }
+
+ /// Map a contiguous range of virtual->physical pages.
+ fn map_pages(
+ &mut self,
+ mut iova: usize,
+ mut paddr: usize,
+ pgsize: usize,
+ pgcount: usize,
+ prot: u32,
+ ) -> Result<usize> {
+ let mut left = pgcount;
+ while left > 0 {
+ let mapped_iova = self.map_iova(iova, pgsize * left)?;
+ let mapped = self
+ .page_table
+ .map_pages(mapped_iova, paddr, pgsize, left, prot)?;
+ assert!(mapped <= left * pgsize);
+
+ left -= mapped / pgsize;
+ paddr += mapped;
+ iova += mapped;
+ }
+ Ok(pgcount * pgsize)
+ }
+
+ /// Unmap a contiguous range of pages.
+ fn unmap_pages(&mut self, mut iova: usize, pgsize: usize, pgcount: usize) -> Result<usize> {
+ let mut left = pgcount;
+ while left > 0 {
+ let mapped_iova = self.map_iova(iova, pgsize * left)?;
+ let unmapped = self.page_table.unmap_pages(mapped_iova, pgsize, left);
+ assert!(unmapped <= left * pgsize);
+
+ left -= unmapped / pgsize;
+ iova += unmapped;
+ }
+
+ Ok(pgcount * pgsize)
+ }
+
+ /// Map an `mm::Node` representing an mapping in VA space.
+ fn map_node(&mut self, node: &mm::Node<(), MappingInner>, prot: u32) -> Result {
+ let mut iova = node.start() as usize;
+ let sgt = node.sgt.as_ref().ok_or(EINVAL)?;
+
+ for range in sgt.iter() {
+ let addr = range.dma_address();
+ let len = range.dma_len();
+
+ if (addr | len | iova) & UAT_PGMSK != 0 {
+ dev_err!(
+ self.dev,
+ "MMU: Mapping {:#x}:{:#x} -> {:#x} is not page-aligned\n",
+ addr,
+ len,
+ iova
+ );
+ return Err(EINVAL);
+ }
+
+ mod_dev_dbg!(
+ self.dev,
+ "MMU: map: {:#x}:{:#x} -> {:#x}\n",
+ addr,
+ len,
+ iova
+ );
+
+ self.map_pages(iova, addr, UAT_PGSZ, len >> UAT_PGBIT, prot)?;
+
+ iova += len;
+ }
+ Ok(())
+ }
+}
+
+/// Shared reference to a virtual memory address space ([`Vm`]).
+#[derive(Clone)]
+pub(crate) struct Vm {
+ id: u64,
+ file_id: u64,
+ inner: Arc<Mutex<VmInner>>,
+}
+no_debug!(Vm);
+
+/// Slot data for a [`Vm`] slot (nothing, we only care about the indices).
+pub(crate) struct SlotInner();
+
+impl slotalloc::SlotItem for SlotInner {
+ type Data = ();
+}
+
+/// Represents a single user of a binding of a [`Vm`] to a slot.
+///
+/// The number of users is counted, and the slot will be freed when it drops to 0.
+#[derive(Debug)]
+pub(crate) struct VmBind(Vm, u32);
+
+impl VmBind {
+ /// Returns the slot that this `Vm` is bound to.
+ pub(crate) fn slot(&self) -> u32 {
+ self.1
+ }
+}
+
+impl Drop for VmBind {
+ fn drop(&mut self) {
+ let mut inner = self.0.inner.lock();
+
+ assert_ne!(inner.active_users, 0);
+ inner.active_users -= 1;
+ mod_pr_debug!("MMU: slot {} active users {}\n", self.1, inner.active_users);
+ if inner.active_users == 0 {
+ inner.binding = None;
+ }
+ }
+}
+
+impl Clone for VmBind {
+ fn clone(&self) -> VmBind {
+ let mut inner = self.0.inner.lock();
+
+ inner.active_users += 1;
+ mod_pr_debug!("MMU: slot {} active users {}\n", self.1, inner.active_users);
+ VmBind(self.0.clone(), self.1)
+ }
+}
+
+/// Inner data required for an object mapping into a [`Vm`].
+pub(crate) struct MappingInner {
+ owner: Arc<Mutex<VmInner>>,
+ uat_inner: Arc<UatInner>,
+ prot: u32,
+ mapped_size: usize,
+ sgt: Option<gem::SGTable>,
+}
+
+/// An object mapping into a [`Vm`], which reserves the address range from use by other mappings.
+pub(crate) struct Mapping(mm::Node<(), MappingInner>);
+
+impl Mapping {
+ /// Returns the IOVA base of this mapping
+ pub(crate) fn iova(&self) -> usize {
+ self.0.start() as usize
+ }
+
+ /// Returns the size of this mapping in bytes
+ pub(crate) fn size(&self) -> usize {
+ self.0.mapped_size
+ }
+
+ /// Remap a cached mapping as uncached, then synchronously flush that range of VAs from the
+ /// coprocessor cache. This is required to safely unmap cached/private mappings.
+ fn remap_uncached_and_flush(&mut self) {
+ let mut owner = self.0.owner.lock();
+ mod_dev_dbg!(
+ owner.dev,
+ "MMU: remap as uncached {:#x}:{:#x}\n",
+ self.iova(),
+ self.size()
+ );
+
+ // The IOMMU API does not allow us to remap things in-place...
+ // just do an unmap and map again for now.
+ // Do not try to unmap guard page (-1)
+ if owner
+ .unmap_pages(self.iova(), UAT_PGSZ, self.size() >> UAT_PGBIT)
+ .is_err()
+ {
+ dev_err!(
+ owner.dev,
+ "MMU: unmap for remap {:#x}:{:#x} failed\n",
+ self.iova(),
+ self.size()
+ );
+ }
+
+ let prot = self.0.prot | prot::CACHE;
+ if owner.map_node(&self.0, prot).is_err() {
+ dev_err!(
+ owner.dev,
+ "MMU: remap {:#x}:{:#x} failed\n",
+ self.iova(),
+ self.size()
+ );
+ }
+
+ // If we don't have (and have never had) a VM slot, just return
+ let slot = match owner.slot() {
+ None => return,
+ Some(slot) => slot,
+ };
+
+ let flush_slot = if owner.is_kernel {
+ // If this is a kernel mapping, always flush on index 64
+ UAT_NUM_CTX as u32
+ } else {
+ // Otherwise, check if this slot is the active one, otherwise return
+ // Also check that we actually own this slot
+ let ttb = owner.ttb() | TTBR_VALID | (slot as u64) << TTBR_ASID_SHIFT;
+
+ let uat_inner = self.0.uat_inner.lock();
+ uat_inner.handoff().lock();
+ let cur_slot = uat_inner.handoff().current_slot();
+ let ttb_cur = uat_inner.ttbs()[slot as usize].ttb0.load(Ordering::Relaxed);
+ uat_inner.handoff().unlock();
+ if cur_slot == Some(slot) && ttb_cur == ttb {
+ slot
+ } else {
+ return;
+ }
+ };
+
+ // FIXME: There is a race here, though it'll probably never happen in practice.
+ // In theory, it's possible for the ASC to finish using our slot, whatever command
+ // it was processing to complete, the slot to be lost to another context, and the ASC
+ // to begin using it again with a different page table, thus faulting when it gets a
+ // flush request here. In practice, the chance of this happening is probably vanishingly
+ // small, as all 62 other slots would have to be recycled or in use before that slot can
+ // be reused, and the ASC using user contexts at all is very rare.
+
+ // Still, the locking around UAT/Handoff/TTBs should probably be redesigned to better
+ // model the interactions with the firmware and avoid these races.
+ // Possibly TTB changes should be tied to slot locks:
+
+ // Flush:
+ // - Can early check handoff here (no need to lock).
+ // If user slot and it doesn't match the active ASC slot,
+ // we can elide the flush as the ASC guarantees it flushes
+ // TLBs/caches when it switches context. We just need a
+ // barrier to ensure ordering.
+ // - Lock TTB slot
+ // - If user ctx:
+ // - Lock handoff AP-side
+ // - Lock handoff dekker
+ // - Check TTB & handoff cur ctx
+ // - Perform flush if necessary
+ // - This implies taking the fwring lock
+ //
+ // TTB change:
+ // - lock TTB slot
+ // - lock handoff AP-side
+ // - lock handoff dekker
+ // change TTB
+
+ // Lock this flush slot, and write the range to it
+ let flush = self.0.uat_inner.lock_flush(flush_slot);
+ let pages = self.size() >> UAT_PGBIT;
+ flush.begin_flush(self.iova() as u64, self.size() as u64);
+ if pages >= 0x10000 {
+ dev_err!(owner.dev, "MMU: Flush too big ({:#x} pages))\n", pages);
+ }
+
+ let cmd = fw::channels::FwCtlMsg {
+ addr: fw::types::U64(self.iova() as u64),
+ unk_8: 0,
+ slot: flush_slot,
+ page_count: pages as u16,
+ unk_12: 2, // ?
+ };
+
+ // Tell the firmware to do a cache flush
+ if let Err(e) = owner.dev.data().gpu.fwctl(cmd) {
+ dev_err!(
+ owner.dev,
+ "MMU: ASC cache flush {:#x}:{:#x} failed (err: {:?})\n",
+ self.iova(),
+ self.size(),
+ e
+ );
+ }
+
+ // Finish the flush
+ flush.end_flush();
+
+ // Slot is unlocked here
+ }
+}
+
+impl Drop for Mapping {
+ fn drop(&mut self) {
+ // This is the main unmap function for UAT mappings.
+ // The sequence of operations here is finicky, due to the interaction
+ // between cached GFX ASC mappings and the page tables. These mappings
+ // always have to be flushed from the cache before being unmapped.
+
+ // For uncached mappings, just unmapping and flushing the TLB is sufficient.
+
+ // For cached mappings, this is the required sequence:
+ // 1. Remap it as uncached
+ // 2. Flush the TLB range
+ // 3. If kernel VA mapping OR user VA mapping and handoff.current_slot() == slot:
+ // a. Take a lock for this slot
+ // b. Write the flush range to the right context slot in handoff area
+ // c. Issue a cache invalidation request via FwCtl queue
+ // d. Poll for completion via queue
+ // e. Check for completion flag in the handoff area
+ // f. Drop the lock
+ // 4. Unmap
+ // 5. Flush the TLB range again
+
+ // prot::CACHE means "cache coherent" which means *uncached* here.
+ if self.0.prot & prot::CACHE == 0 {
+ self.remap_uncached_and_flush();
+ }
+
+ let mut owner = self.0.owner.lock();
+ mod_dev_dbg!(
+ owner.dev,
+ "MMU: unmap {:#x}:{:#x}\n",
+ self.iova(),
+ self.size()
+ );
+
+ if owner
+ .unmap_pages(self.iova(), UAT_PGSZ, self.size() >> UAT_PGBIT)
+ .is_err()
+ {
+ dev_err!(
+ owner.dev,
+ "MMU: unmap {:#x}:{:#x} failed\n",
+ self.iova(),
+ self.size()
+ );
+ }
+
+ if let Some(asid) = owner.slot() {
+ mem::tlbi_range(asid as u8, self.iova(), self.size());
+ mod_dev_dbg!(
+ owner.dev,
+ "MMU: flush range: asid={:#x} start={:#x} len={:#x}\n",
+ asid,
+ self.iova(),
+ self.size()
+ );
+ mem::sync();
+ }
+ }
+}
+
+/// Shared UAT global data structures
+struct UatShared {
+ handoff_rgn: UatRegion,
+ ttbs_rgn: UatRegion,
+}
+
+impl UatShared {
+ /// Returns the handoff region area
+ fn handoff(&self) -> &Handoff {
+ // SAFETY: pointer is non-null per the type invariant
+ unsafe { (self.handoff_rgn.map.as_ptr() as *mut Handoff).as_ref() }.unwrap()
+ }
+
+ /// Returns the TTBAT area
+ fn ttbs(&self) -> &[SlotTTBS; UAT_NUM_CTX] {
+ // SAFETY: pointer is non-null per the type invariant
+ unsafe { (self.ttbs_rgn.map.as_ptr() as *mut [SlotTTBS; UAT_NUM_CTX]).as_ref() }.unwrap()
+ }
+}
+
+// SAFETY: Nothing here is unsafe to send across threads.
+unsafe impl Send for UatShared {}
+
+/// Inner data for the top-level UAT instance.
+struct UatInner {
+ shared: Mutex<UatShared>,
+ handoff_flush: [Mutex<HandoffFlush>; UAT_NUM_CTX + 1],
+}
+
+impl UatInner {
+ /// Take the lock on the shared data and return the guard.
+ fn lock(&self) -> Guard<'_, Mutex<UatShared>> {
+ self.shared.lock()
+ }
+
+ /// Take a lock on a handoff flush slot and return the guard.
+ fn lock_flush(&self, slot: u32) -> Guard<'_, Mutex<HandoffFlush>> {
+ self.handoff_flush[slot as usize].lock()
+ }
+}
+
+/// Top-level UAT manager object
+pub(crate) struct Uat {
+ dev: driver::AsahiDevice,
+ cfg: &'static hw::HwConfig,
+ pagetables_rgn: UatRegion,
+
+ inner: Arc<UatInner>,
+ slots: slotalloc::SlotAllocator<SlotInner>,
+
+ kernel_vm: Vm,
+ _kernel_lower_vm: Vm,
+}
+
+impl Drop for UatRegion {
+ fn drop(&mut self) {
+ // SAFETY: the pointer is valid by the type invariant
+ unsafe { bindings::memunmap(self.map.as_ptr()) };
+ }
+}
+
+impl Handoff {
+ /// Lock the handoff region from firmware access
+ fn lock(&self) {
+ self.lock_ap.store(1, Ordering::Relaxed);
+ fence(Ordering::SeqCst);
+
+ while self.lock_fw.load(Ordering::Relaxed) != 0 {
+ if self.turn.load(Ordering::Relaxed) != 0 {
+ self.lock_ap.store(0, Ordering::Relaxed);
+ while self.turn.load(Ordering::Relaxed) != 0 {}
+ self.lock_ap.store(1, Ordering::Relaxed);
+ fence(Ordering::SeqCst);
+ }
+ }
+ fence(Ordering::Acquire);
+ }
+
+ /// Unlock the handoff region, allowing firmware access
+ fn unlock(&self) {
+ self.turn.store(1, Ordering::Relaxed);
+ self.lock_ap.store(0, Ordering::Release);
+ }
+
+ /// Returns the current Vm slot mapped by the firmware for lower/unprivileged access, if any.
+ fn current_slot(&self) -> Option<u32> {
+ let slot = self.cur_slot.load(Ordering::Relaxed);
+ if slot == 0 || slot == u32::MAX {
+ None
+ } else {
+ Some(slot)
+ }
+ }
+
+ /// Initialize the handoff region
+ fn init(&self) -> Result {
+ self.magic_ap.store(PPL_MAGIC, Ordering::Relaxed);
+ self.cur_slot.store(0, Ordering::Relaxed);
+ self.unk3.store(0, Ordering::Relaxed);
+ fence(Ordering::SeqCst);
+
+ let timeout = time::ktime_get() + Duration::from_millis(1000);
+
+ self.lock();
+ while time::ktime_get() < timeout {
+ if self.magic_fw.load(Ordering::Relaxed) == PPL_MAGIC {
+ break;
+ } else {
+ self.unlock();
+ delay::coarse_sleep(Duration::from_millis(10));
+ self.lock();
+ }
+ }
+
+ if self.magic_fw.load(Ordering::Relaxed) != PPL_MAGIC {
+ self.unlock();
+ pr_err!("Handoff: Failed to initialize (firmware not running?)\n");
+ return Err(EIO);
+ }
+
+ self.unlock();
+
+ for i in 0..=UAT_NUM_CTX {
+ self.flush[i].state.store(0, Ordering::Relaxed);
+ self.flush[i].addr.store(0, Ordering::Relaxed);
+ self.flush[i].size.store(0, Ordering::Relaxed);
+ }
+ fence(Ordering::SeqCst);
+ Ok(())
+ }
+}
+
+/// Represents a single flush info slot in the handoff region.
+///
+/// # Invariants
+/// The pointer is valid and there is no aliasing HandoffFlush instance.
+struct HandoffFlush(*const FlushInfo);
+
+// SAFETY: These pointers are safe to send across threads.
+unsafe impl Send for HandoffFlush {}
+
+impl HandoffFlush {
+ /// Set up a flush operation for the coprocessor
+ fn begin_flush(&self, start: u64, size: u64) {
+ let flush = unsafe { self.0.as_ref().unwrap() };
+
+ let state = flush.state.load(Ordering::Relaxed);
+ if state != 0 {
+ pr_err!("Handoff: expected flush state 0, got {}\n", state);
+ }
+ flush.addr.store(start, Ordering::Relaxed);
+ flush.size.store(size, Ordering::Relaxed);
+ flush.state.store(1, Ordering::Relaxed);
+ }
+
+ /// Complete a flush operation for the coprocessor
+ fn end_flush(&self) {
+ let flush = unsafe { self.0.as_ref().unwrap() };
+ let state = flush.state.load(Ordering::Relaxed);
+ if state != 2 {
+ pr_err!("Handoff: expected flush state 2, got {}\n", state);
+ }
+ flush.state.store(0, Ordering::Relaxed);
+ }
+}
+
+// We do not implement FlushOps, since we flush manually in this module after
+// page table operations. Just provide dummy implementations.
+impl io_pgtable::FlushOps for Uat {
+ type Data = ();
+
+ fn tlb_flush_all(_data: <Self::Data as ForeignOwnable>::Borrowed<'_>) {}
+ fn tlb_flush_walk(
+ _data: <Self::Data as ForeignOwnable>::Borrowed<'_>,
+ _iova: usize,
+ _size: usize,
+ _granule: usize,
+ ) {
+ }
+ fn tlb_add_page(
+ _data: <Self::Data as ForeignOwnable>::Borrowed<'_>,
+ _iova: usize,
+ _granule: usize,
+ ) {
+ }
+}
+
+static LOCK_KEY: LockClassKey = LockClassKey::new();
+
+impl Vm {
+ /// Create a new virtual memory address space
+ fn new(
+ dev: driver::AsahiDevice,
+ uat_inner: Arc<UatInner>,
+ cfg: &'static hw::HwConfig,
+ is_kernel: bool,
+ id: u64,
+ file_id: u64,
+ ) -> Result<Vm> {
+ let page_table = AppleUAT::new(
+ &dev,
+ io_pgtable::Config {
+ pgsize_bitmap: UAT_PGSZ,
+ ias: if is_kernel { UAT_IAS_KERN } else { UAT_IAS },
+ oas: cfg.uat_oas,
+ coherent_walk: true,
+ quirks: 0,
+ },
+ (),
+ )?;
+ let min_va = if is_kernel {
+ IOVA_KERN_BASE
+ } else {
+ IOVA_USER_BASE
+ };
+ let max_va = if is_kernel {
+ IOVA_KERN_TOP
+ } else {
+ IOVA_USER_TOP
+ };
+
+ let mm = mm::Allocator::new(
+ min_va as u64,
+ (max_va - min_va + 1) as u64,
+ (),
+ c_str!("asahi Vm"),
+ &LOCK_KEY,
+ )?;
+
+ Ok(Vm {
+ id,
+ file_id,
+ inner: Arc::try_new(Mutex::new(VmInner {
+ dev,
+ min_va,
+ max_va,
+ is_kernel,
+ page_table,
+ mm,
+ uat_inner,
+ binding: None,
+ bind_token: None,
+ active_users: 0,
+ id,
+ }))?,
+ })
+ }
+
+ /// Get the translation table base for this Vm
+ fn ttb(&self) -> u64 {
+ self.inner.lock().ttb()
+ }
+
+ /// Map a GEM object (using its `SGTable`) into this Vm at a free address.
+ pub(crate) fn map(&self, size: usize, sgt: gem::SGTable) -> Result<Mapping> {
+ let mut inner = self.inner.lock();
+
+ let uat_inner = inner.uat_inner.clone();
+ let node = inner.mm.insert_node(
+ MappingInner {
+ owner: self.inner.clone(),
+ uat_inner,
+ prot: PROT_FW_SHARED_RW,
+ sgt: Some(sgt),
+ mapped_size: size,
+ },
+ (size + UAT_PGSZ) as u64, // Add guard page
+ )?;
+
+ inner.map_node(&node, PROT_FW_SHARED_RW)?;
+ Ok(Mapping(node))
+ }
+
+ /// Map a GEM object (using its `SGTable`) into this Vm at a free address in a given range.
+ #[allow(clippy::too_many_arguments)]
+ pub(crate) fn map_in_range(
+ &self,
+ size: usize,
+ sgt: gem::SGTable,
+ alignment: u64,
+ start: u64,
+ end: u64,
+ prot: u32,
+ guard: bool,
+ ) -> Result<Mapping> {
+ let mut inner = self.inner.lock();
+
+ let uat_inner = inner.uat_inner.clone();
+ let node = inner.mm.insert_node_in_range(
+ MappingInner {
+ owner: self.inner.clone(),
+ uat_inner,
+ prot,
+ sgt: Some(sgt),
+ mapped_size: size,
+ },
+ (size + if guard { UAT_PGSZ } else { 0 }) as u64, // Add guard page
+ alignment,
+ 0,
+ start,
+ end,
+ mm::InsertMode::Best,
+ )?;
+
+ inner.map_node(&node, prot)?;
+ Ok(Mapping(node))
+ }
+
+ /// Map a GEM object (using its `SGTable`) into this Vm at a specific address.
+ #[allow(clippy::too_many_arguments)]
+ pub(crate) fn map_at(
+ &self,
+ addr: u64,
+ size: usize,
+ sgt: gem::SGTable,
+ prot: u32,
+ guard: bool,
+ ) -> Result<Mapping> {
+ let mut inner = self.inner.lock();
+
+ let uat_inner = inner.uat_inner.clone();
+ let node = inner.mm.reserve_node(
+ MappingInner {
+ owner: self.inner.clone(),
+ uat_inner,
+ prot,
+ sgt: Some(sgt),
+ mapped_size: size,
+ },
+ addr,
+ (size + if guard { UAT_PGSZ } else { 0 }) as u64, // Add guard page
+ 0,
+ )?;
+
+ inner.map_node(&node, prot)?;
+ Ok(Mapping(node))
+ }
+
+ /// Add a direct MMIO mapping to this Vm at a free address.
+ pub(crate) fn map_io(&self, phys: usize, size: usize, rw: bool) -> Result<Mapping> {
+ let prot = if rw { PROT_FW_MMIO_RW } else { PROT_FW_MMIO_RO };
+ let mut inner = self.inner.lock();
+
+ let uat_inner = inner.uat_inner.clone();
+ let node = inner.mm.insert_node(
+ MappingInner {
+ owner: self.inner.clone(),
+ uat_inner,
+ prot,
+ sgt: None,
+ mapped_size: size,
+ },
+ (size + UAT_PGSZ) as u64, // Add guard page
+ )?;
+
+ let iova = node.start() as usize;
+
+ if (phys | size | iova) & UAT_PGMSK != 0 {
+ dev_err!(
+ inner.dev,
+ "MMU: Mapping {:#x}:{:#x} -> {:#x} is not page-aligned\n",
+ phys,
+ size,
+ iova
+ );
+ return Err(EINVAL);
+ }
+
+ dev_info!(
+ inner.dev,
+ "MMU: IO map: {:#x}:{:#x} -> {:#x}\n",
+ phys,
+ size,
+ iova
+ );
+
+ inner.map_pages(iova, phys, UAT_PGSZ, size >> UAT_PGBIT, prot)?;
+
+ Ok(Mapping(node))
+ }
+
+ /// Returns the unique ID of this Vm
+ pub(crate) fn id(&self) -> u64 {
+ self.id
+ }
+
+ /// Returns the unique File ID of the owner of this Vm
+ pub(crate) fn file_id(&self) -> u64 {
+ self.file_id
+ }
+}
+
+impl Drop for VmInner {
+ fn drop(&mut self) {
+ assert_eq!(self.active_users, 0);
+
+ mod_pr_debug!(
+ "VmInner::Drop [{}]: bind_token={:?}\n",
+ self.id,
+ self.bind_token
+ );
+
+ // Make sure this VM is not mapped to a TTB if it was
+ if let Some(token) = self.bind_token.take() {
+ let idx = (token.last_slot() as usize) + UAT_USER_CTX_START;
+ let ttb = self.ttb() | TTBR_VALID | (idx as u64) << TTBR_ASID_SHIFT;
+
+ let uat_inner = self.uat_inner.lock();
+ uat_inner.handoff().lock();
+ let handoff_cur = uat_inner.handoff().current_slot();
+ let ttb_cur = uat_inner.ttbs()[idx].ttb0.load(Ordering::SeqCst);
+ let inval = ttb_cur == ttb;
+ if inval {
+ if handoff_cur == Some(idx as u32) {
+ pr_err!(
+ "VmInner::drop owning slot {}, but it is currently in use by the ASC?\n",
+ idx
+ );
+ }
+ uat_inner.ttbs()[idx].ttb0.store(0, Ordering::SeqCst);
+ }
+ uat_inner.handoff().unlock();
+ core::mem::drop(uat_inner);
+
+ // In principle we dropped all the Mappings already, but we might as
+ // well play it safe and invalidate the whole ASID.
+ if inval {
+ mod_pr_debug!(
+ "VmInner::Drop [{}]: need inval for ASID {:#x}\n",
+ self.id,
+ idx
+ );
+ mem::tlbi_asid(idx as u8);
+ mem::sync();
+ }
+ }
+ }
+}
+
+impl Uat {
+ /// Map a bootloader-preallocated memory region
+ fn map_region(
+ dev: &dyn device::RawDevice,
+ name: &CStr,
+ size: usize,
+ cached: bool,
+ ) -> Result<UatRegion> {
+ let rdev = dev.raw_device();
+
+ let mut res = core::mem::MaybeUninit::<bindings::resource>::uninit();
+
+ let res = unsafe {
+ let idx = bindings::of_property_match_string(
+ (*rdev).of_node,
+ c_str!("memory-region-names").as_char_ptr(),
+ name.as_char_ptr(),
+ );
+ to_result(idx)?;
+
+ let np = bindings::of_parse_phandle(
+ (*rdev).of_node,
+ c_str!("memory-region").as_char_ptr(),
+ idx,
+ );
+ if np.is_null() {
+ dev_err!(dev, "Missing {} region\n", name);
+ return Err(EINVAL);
+ }
+ let ret = bindings::of_address_to_resource(np, 0, res.as_mut_ptr());
+ bindings::of_node_put(np);
+
+ if ret < 0 {
+ dev_err!(dev, "Failed to get {} region\n", name);
+ to_result(ret)?
+ }
+
+ res.assume_init()
+ };
+
+ let rgn_size: usize = unsafe { bindings::resource_size(&res) } as usize;
+
+ if size > rgn_size {
+ dev_err!(
+ dev,
+ "Region {} is too small (expected {}, got {})\n",
+ name,
+ size,
+ rgn_size
+ );
+ return Err(ENOMEM);
+ }
+
+ let flags = if cached {
+ bindings::MEMREMAP_WB
+ } else {
+ bindings::MEMREMAP_WC
+ };
+ let map = unsafe { bindings::memremap(res.start, rgn_size, flags.into()) };
+ let map = NonNull::new(map);
+
+ match map {
+ None => {
+ dev_err!(dev, "Failed to remap {} region\n", name);
+ Err(ENOMEM)
+ }
+ Some(map) => Ok(UatRegion {
+ base: res.start,
+ map,
+ }),
+ }
+ }
+
+ /// Returns a view into the root kernel (upper half) page table
+ fn kpt0(&self) -> &[Pte; UAT_NPTE] {
+ // SAFETY: pointer is non-null per the type invariant
+ unsafe { (self.pagetables_rgn.map.as_ptr() as *mut [Pte; UAT_NPTE]).as_ref() }.unwrap()
+ }
+
+ /// Returns a reference to the global kernel (upper half) `Vm`
+ pub(crate) fn kernel_vm(&self) -> &Vm {
+ &self.kernel_vm
+ }
+
+ /// Returns the base physical address of the TTBAT region.
+ pub(crate) fn ttb_base(&self) -> u64 {
+ let inner = self.inner.lock();
+
+ inner.ttbs_rgn.base
+ }
+
+ /// Binds a `Vm` to a slot, preferring the last used one.
+ pub(crate) fn bind(&self, vm: &Vm) -> Result<VmBind> {
+ let mut inner = vm.inner.lock();
+
+ if inner.binding.is_none() {
+ assert_eq!(inner.active_users, 0);
+
+ let slot = self.slots.get(inner.bind_token)?;
+ if slot.changed() {
+ mod_pr_debug!("Vm Bind [{}]: bind_token={:?}\n", vm.id, slot.token(),);
+ let idx = (slot.slot() as usize) + UAT_USER_CTX_START;
+ let ttb = inner.ttb() | TTBR_VALID | (idx as u64) << TTBR_ASID_SHIFT;
+
+ let uat_inner = self.inner.lock();
+ let ttbs = uat_inner.ttbs();
+ uat_inner.handoff().lock();
+ if uat_inner.handoff().current_slot() == Some(idx as u32) {
+ pr_err!(
+ "Vm::bind to slot {}, but it is currently in use by the ASC?\n",
+ idx
+ );
+ }
+ ttbs[idx].ttb0.store(ttb, Ordering::Relaxed);
+ ttbs[idx].ttb1.store(0, Ordering::Relaxed);
+ uat_inner.handoff().unlock();
+ core::mem::drop(uat_inner);
+
+ // Make sure all TLB entries from the previous owner of this ASID are gone
+ mem::tlbi_asid(idx as u8);
+ mem::sync();
+ }
+
+ inner.bind_token = Some(slot.token());
+ inner.binding = Some(slot);
+ }
+
+ inner.active_users += 1;
+
+ let slot = inner.binding.as_ref().unwrap().slot() + UAT_USER_CTX_START as u32;
+ mod_pr_debug!("MMU: slot {} active users {}\n", slot, inner.active_users);
+ Ok(VmBind(vm.clone(), slot))
+ }
+
+ /// Creates a new `Vm` linked to this UAT.
+ pub(crate) fn new_vm(&self, id: u64, file_id: u64) -> Result<Vm> {
+ Vm::new(
+ self.dev.clone(),
+ self.inner.clone(),
+ self.cfg,
+ false,
+ id,
+ file_id,
+ )
+ }
+
+ /// Creates the reference-counted inner data for a new `Uat` instance.
+ #[inline(never)]
+ fn make_inner(dev: &driver::AsahiDevice) -> Result<Arc<UatInner>> {
+ let handoff_rgn = Self::map_region(dev, c_str!("handoff"), HANDOFF_SIZE, false)?;
+ let ttbs_rgn = Self::map_region(dev, c_str!("ttbs"), SLOTS_SIZE, false)?;
+
+ dev_info!(dev, "MMU: Initializing kernel page table\n");
+
+ let mut inner = UniqueArc::<UatInner>::try_new_uninit()?;
+ let ptr = inner.as_mut_ptr();
+
+ Ok(unsafe {
+ let handoff = &(handoff_rgn.map.as_ptr() as *mut Handoff).as_ref().unwrap();
+
+ for i in 0..UAT_NUM_CTX + 1 {
+ addr_of_mut!((*ptr).handoff_flush[i])
+ .write(Mutex::new(HandoffFlush(&handoff.flush[i])));
+ }
+
+ addr_of_mut!((*ptr).shared).write(Mutex::new(UatShared {
+ handoff_rgn,
+ ttbs_rgn,
+ }));
+
+ inner.assume_init()
+ }
+ .into())
+ }
+
+ /// Creates a new `Uat` instance given the relevant hardware config.
+ #[inline(never)]
+ pub(crate) fn new(dev: &driver::AsahiDevice, cfg: &'static hw::HwConfig) -> Result<Self> {
+ dev_info!(dev, "MMU: Initializing...\n");
+
+ let inner = Self::make_inner(dev)?;
+
+ let pagetables_rgn = Self::map_region(dev, c_str!("pagetables"), PAGETABLES_SIZE, true)?;
+
+ dev_info!(dev, "MMU: Creating kernel page tables\n");
+ let kernel_lower_vm = Vm::new(dev.clone(), inner.clone(), cfg, false, 1, 0)?;
+ let kernel_vm = Vm::new(dev.clone(), inner.clone(), cfg, true, 0, 0)?;
+
+ dev_info!(dev, "MMU: Kernel page tables created\n");
+
+ let ttb0 = kernel_lower_vm.ttb();
+ let ttb1 = kernel_vm.ttb();
+
+ let uat = Self {
+ dev: dev.clone(),
+ cfg,
+ pagetables_rgn,
+ kernel_vm,
+ _kernel_lower_vm: kernel_lower_vm,
+ inner,
+ slots: slotalloc::SlotAllocator::new(UAT_USER_CTX as u32, (), |_inner, _slot| {
+ SlotInner()
+ })?,
+ };
+
+ let inner = uat.inner.lock();
+
+ inner.handoff().init()?;
+
+ dev_info!(dev, "MMU: Initializing TTBs\n");
+
+ inner.handoff().lock();
+
+ let ttbs = inner.ttbs();
+
+ ttbs[0].ttb0.store(ttb0 | TTBR_VALID, Ordering::Relaxed);
+ ttbs[0]
+ .ttb1
+ .store(uat.pagetables_rgn.base | TTBR_VALID, Ordering::Relaxed);
+
+ for ctx in &ttbs[1..] {
+ ctx.ttb0.store(0, Ordering::Relaxed);
+ ctx.ttb1.store(0, Ordering::Relaxed);
+ }
+
+ inner.handoff().unlock();
+
+ core::mem::drop(inner);
+
+ uat.kpt0()[2].store(ttb1 | PTE_TABLE, Ordering::Relaxed);
+
+ dev_info!(dev, "MMU: initialized\n");
+
+ Ok(uat)
+ }
+}
+
+impl Drop for Uat {
+ fn drop(&mut self) {
+ // Unmap what we mapped
+ self.kpt0()[2].store(0, Ordering::Relaxed);
+
+ // Make sure we flush the TLBs
+ fence(Ordering::SeqCst);
+ mem::tlbi_all();
+ mem::sync();
+ }
+}
diff --git a/drivers/gpu/drm/asahi/object.rs b/drivers/gpu/drm/asahi/object.rs
new file mode 100644
index 000000000000..449899b88181
--- /dev/null
+++ b/drivers/gpu/drm/asahi/object.rs
@@ -0,0 +1,704 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Asahi GPU object model
+//!
+//! The AGX GPU includes a coprocessor that uses a large number of shared memory structures to
+//! communicate with the driver. These structures contain GPU VA pointers to each other, which are
+//! directly dereferenced by the firmware and are expected to always be valid for the usage
+//! lifetime of the containing struct (which is an implicit contract, not explicitly managed).
+//! Any faults cause an unrecoverable firmware crash, requiring a full system reboot.
+//!
+//! In order to manage this complexity safely, we implement a GPU object model using Rust's type
+//! system to enforce GPU object lifetime relationships. GPU objects represent an allocated piece
+//! of memory of a given type, mapped to the GPU (and usually also the CPU). On the CPU side,
+//! these objects are associated with a pure Rust structure that contains the objects it depends
+//! on (or references to them). This allows us to map Rust lifetimes into the GPU object model
+//! system. Then, GPU VA pointers also inherit those lifetimes, which means the Rust borrow checker
+//! can ensure that all pointers are assigned an address that is guaranteed to outlive the GPU
+//! object it points to.
+//!
+//! Since the firmware object model does have self-referencing pointers (and there is of course no
+//! underlying revocability mechanism to make it safe), we must have an escape hatch. GPU pointers
+//! can be weak pointers, which do not enforce lifetimes. In those cases, it is the user's
+//! responsibility to ensure that lifetime requirements are met.
+//!
+//! In other words, the model is necessarily leaky and there is no way to fully map Rust safety to
+//! GPU firmware object safety. The goal of the model is to make it easy to model the lifetimes of
+//! GPU objects and have the compiler help in avoiding mistakes, rather than to guarantee safety
+//! 100% of the time as would be the case for CPU-side Rust code.
+
+// TODO: There is a fundamental soundness issue with sharing memory with the GPU (that even affects
+// C code too). Since the GPU is free to mutate that memory at any time, normal reference invariants
+// cannot be enforced on the CPU side. For example, the compiler could perform an optimization that
+// assumes that a given memory location does not change between two reads, and causes UB otherwise,
+// and then the GPU could mutate that memory out from under the CPU.
+//
+// For cases where we *expect* this to happen, we use atomic types, which avoid this issue. However,
+// doing so for every single field of every type is a non-starter. Right now, there seems to be no
+// good solution for this that does not come with significant performance or ergonomics downsides.
+//
+// In *practice* we are almost always only writing GPU memory, and only reading from atomics, so the
+// chances of this actually triggering UB (e.g. a security issue that can be triggered from the GPU
+// side) due to a compiler optimization are very slim.
+//
+// Further discussion: https://github.com/rust-lang/unsafe-code-guidelines/issues/152
+
+use kernel::{error::code::*, prelude::*};
+
+use alloc::boxed::Box;
+use core::fmt;
+use core::fmt::Debug;
+use core::fmt::Formatter;
+use core::marker::PhantomData;
+use core::mem::MaybeUninit;
+use core::num::NonZeroU64;
+use core::ops::{Deref, DerefMut, Index, IndexMut};
+use core::{mem, ptr, slice};
+
+use crate::alloc::Allocation;
+use crate::debug::*;
+use crate::fw::types::Zeroed;
+
+const DEBUG_CLASS: DebugFlags = DebugFlags::Object;
+
+/// A GPU-side strong pointer, which is a 64-bit non-zero VA with an associated lifetime.
+///
+/// In rare cases these pointers are not aligned, so this is `packed(1)`.
+#[repr(C, packed(1))]
+pub(crate) struct GpuPointer<'a, T: ?Sized>(NonZeroU64, PhantomData<&'a T>);
+
+impl<'a, T: ?Sized> GpuPointer<'a, T> {
+ /// Logical OR the pointer with an arbitrary `u64`. This is used when GPU struct fields contain
+ /// misc flag fields in the upper bits. The lifetime is retained. This is GPU-unsafe in
+ /// principle, but we assert that only non-implemented address bits are touched, which is safe
+ /// for pointers used by the GPU (not by firmware).
+ pub(crate) fn or(&self, other: u64) -> GpuPointer<'a, T> {
+ // This will fail for kernel-half pointers, which should not be ORed.
+ assert_eq!(self.0.get() & other, 0);
+ // Assert that we only touch the high bits.
+ assert_eq!(other & 0xffffffffff, 0);
+ GpuPointer(self.0 | other, PhantomData)
+ }
+
+ /// Add an arbitrary offset to the pointer. This is not safe (from the GPU perspective), and
+ /// should only be used via the `inner_ptr` macro to get pointers to inner fields, hence we mark
+ /// it `unsafe` to discourage direct use.
+ // NOTE: The third argument is a type inference hack.
+ pub(crate) unsafe fn offset<U>(&self, off: usize, _: *const U) -> GpuPointer<'a, U> {
+ GpuPointer::<'a, U>(
+ NonZeroU64::new(self.0.get() + (off as u64)).unwrap(),
+ PhantomData,
+ )
+ }
+}
+
+impl<'a, T: ?Sized> Debug for GpuPointer<'a, T> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ let val = self.0;
+ f.write_fmt(format_args!("{:#x} ({})", val, core::any::type_name::<T>()))
+ }
+}
+
+/// Take a pointer to a sub-field within a structure pointed to by a GpuPointer, keeping the
+/// lifetime.
+#[macro_export]
+macro_rules! inner_ptr {
+ ($gpuva:expr, $($f:tt)*) => ({
+ // This mirrors kernel::offset_of(), except we use type inference to avoid having to know
+ // the type of the pointer explicitly.
+ fn uninit_from<'a, T: GpuStruct>(_: GpuPointer<'a, T>) -> core::mem::MaybeUninit<T::Raw<'static>> {
+ core::mem::MaybeUninit::uninit()
+ }
+ let tmp = uninit_from($gpuva);
+ let outer = tmp.as_ptr();
+ // SAFETY: The pointer is valid and aligned, just not initialised; `addr_of` ensures that
+ // we don't actually read from `outer` (which would be UB) nor create an intermediate
+ // reference.
+ let p: *const _ = unsafe { core::ptr::addr_of!((*outer).$($f)*) };
+ let inner = p as *const u8;
+ // SAFETY: The two pointers are within the same allocation block.
+ let off = unsafe { inner.offset_from(outer as *const u8) };
+ // SAFETY: The resulting pointer is guaranteed to point to valid memory within the outer
+ // object.
+ unsafe { $gpuva.offset(off.try_into().unwrap(), p) }
+ })
+}
+
+/// A GPU-side weak pointer, which is a 64-bit non-zero VA with no lifetime.
+///
+/// In rare cases these pointers are not aligned, so this is `packed(1)`.
+#[repr(C, packed(1))]
+pub(crate) struct GpuWeakPointer<T: ?Sized>(NonZeroU64, PhantomData<*const T>);
+
+/// SAFETY: GPU weak pointers are always safe to share between threads.
+unsafe impl<T: ?Sized> Send for GpuWeakPointer<T> {}
+unsafe impl<T: ?Sized> Sync for GpuWeakPointer<T> {}
+
+// Weak pointers can be copied/cloned regardless of their target type.
+impl<T: ?Sized> Copy for GpuWeakPointer<T> {}
+
+impl<T: ?Sized> Clone for GpuWeakPointer<T> {
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<T: ?Sized> GpuWeakPointer<T> {
+ /// Add an arbitrary offset to the pointer. This is not safe (from the GPU perspective), and
+ /// should only be used via the `inner_ptr` macro to get pointers to inner fields, hence we mark
+ /// it `unsafe` to discourage direct use.
+ // NOTE: The third argument is a type inference hack.
+ pub(crate) unsafe fn offset<U>(&self, off: usize, _: *const U) -> GpuWeakPointer<U> {
+ GpuWeakPointer::<U>(
+ NonZeroU64::new(self.0.get() + (off as u64)).unwrap(),
+ PhantomData,
+ )
+ }
+
+ /// Upgrade a weak pointer into a strong pointer. This is not considered safe from the GPU
+ /// perspective.
+ pub(crate) unsafe fn upgrade<'a>(&self) -> GpuPointer<'a, T> {
+ GpuPointer(self.0, PhantomData)
+ }
+}
+
+impl<T: ?Sized> Debug for GpuWeakPointer<T> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ let val = self.0;
+ f.write_fmt(format_args!("{:#x} ({})", val, core::any::type_name::<T>()))
+ }
+}
+
+/// Take a pointer to a sub-field within a structure pointed to by a GpuWeakPointer.
+#[macro_export]
+macro_rules! inner_weak_ptr {
+ ($gpuva:expr, $($f:tt)*) => ({
+ // See inner_ptr()
+ fn uninit_from<T: GpuStruct>(_: GpuWeakPointer<T>) -> core::mem::MaybeUninit<T::Raw<'static>> {
+ core::mem::MaybeUninit::uninit()
+ }
+ let tmp = uninit_from($gpuva);
+ let outer = tmp.as_ptr();
+ // SAFETY: The pointer is valid and aligned, just not initialised; `addr_of` ensures that
+ // we don't actually read from `outer` (which would be UB) nor create an intermediate
+ // reference.
+ let p: *const _ = unsafe { core::ptr::addr_of!((*outer).$($f)*) };
+ let inner = p as *const u8;
+ // SAFETY: The two pointers are within the same allocation block.
+ let off = unsafe { inner.offset_from(outer as *const u8) };
+ // SAFETY: The resulting pointer is guaranteed to point to valid memory within the outer
+ // object.
+ unsafe { $gpuva.offset(off.try_into().unwrap(), p) }
+ })
+}
+
+/// Types that implement this trait represent a GPU structure from the CPU side.
+///
+/// The `Raw` type represents the actual raw structure definition on the GPU side.
+///
+/// Types implementing [`GpuStruct`] must have fields owning any objects (or strong references
+/// to them) that GPU pointers in the `Raw` structure point to. This mechanism is used to enforce
+/// lifetimes.
+pub(crate) trait GpuStruct: 'static {
+ /// The type of the GPU-side structure definition representing the firmware struct layout.
+ type Raw<'a>;
+}
+
+/// An instance of a GPU object in memory.
+///
+/// # Invariants
+/// `raw` must point to a valid mapping of the `T::Raw` type associated with the `alloc` allocation.
+/// `gpu_ptr` must be the GPU address of the same object.
+pub(crate) struct GpuObject<T: GpuStruct, U: Allocation<T>> {
+ raw: *mut T::Raw<'static>,
+ alloc: U,
+ gpu_ptr: GpuWeakPointer<T>,
+ inner: Box<T>,
+}
+
+impl<T: GpuStruct, U: Allocation<T>> GpuObject<T, U> {
+ /// Create a new GpuObject given an allocator and the inner data (a type implementing
+ /// GpuStruct).
+ ///
+ /// The caller passes a closure that constructs the `T::Raw` type given a reference to the
+ /// `GpuStruct`. This is the mechanism used to enforce lifetimes.
+ pub(crate) fn new(
+ alloc: U,
+ inner: T,
+ callback: impl for<'a> FnOnce(&'a T) -> T::Raw<'a>,
+ ) -> Result<Self> {
+ let size = mem::size_of::<T::Raw<'static>>();
+ if size > 0x1000 {
+ dev_crit!(
+ alloc.device(),
+ "Allocating {} of size {:#x}, with new, please use new_boxed!\n",
+ core::any::type_name::<T>(),
+ size
+ );
+ }
+ if alloc.size() < size {
+ return Err(ENOMEM);
+ }
+ let gpu_ptr =
+ GpuWeakPointer::<T>(NonZeroU64::new(alloc.gpu_ptr()).ok_or(EINVAL)?, PhantomData);
+ mod_dev_dbg!(
+ alloc.device(),
+ "Allocating {} @ {:#x}\n",
+ core::any::type_name::<T>(),
+ alloc.gpu_ptr()
+ );
+ let p = alloc.ptr().ok_or(EINVAL)?.as_ptr() as *mut T::Raw<'static>;
+ let mut raw = callback(&inner);
+ // SAFETY: `p` is guaranteed to be valid per the Allocation invariant, and the type is
+ // identical to the type of `raw` other than the lifetime.
+ unsafe { p.copy_from(&mut raw as *mut _ as *mut u8 as *mut _, 1) };
+ mem::forget(raw);
+ Ok(Self {
+ raw: p,
+ gpu_ptr,
+ alloc,
+ inner: Box::try_new(inner)?,
+ })
+ }
+
+ /// Create a new GpuObject given an allocator and the boxed inner data (a type implementing
+ /// GpuStruct).
+ ///
+ /// The caller passes a closure that initializes the `T::Raw` type given a reference to the
+ /// `GpuStruct` and a `MaybeUninit<T::Raw>`. This is intended to be used with the place!()
+ /// macro to avoid constructing the whole `T::Raw` object on the stack.
+ pub(crate) fn new_boxed(
+ alloc: U,
+ inner: Box<T>,
+ callback: impl for<'a> FnOnce(
+ &'a T,
+ &'a mut MaybeUninit<T::Raw<'a>>,
+ ) -> Result<&'a mut T::Raw<'a>>,
+ ) -> Result<Self> {
+ if alloc.size() < mem::size_of::<T::Raw<'static>>() {
+ return Err(ENOMEM);
+ }
+ let gpu_ptr =
+ GpuWeakPointer::<T>(NonZeroU64::new(alloc.gpu_ptr()).ok_or(EINVAL)?, PhantomData);
+ mod_dev_dbg!(
+ alloc.device(),
+ "Allocating {} @ {:#x}\n",
+ core::any::type_name::<T>(),
+ alloc.gpu_ptr()
+ );
+ let p = alloc.ptr().ok_or(EINVAL)?.as_ptr() as *mut MaybeUninit<T::Raw<'_>>;
+ // SAFETY: `p` is guaranteed to be valid per the Allocation invariant.
+ let raw = callback(&inner, unsafe { &mut *p })?;
+ if p as *mut T::Raw<'_> != raw as *mut _ {
+ dev_err!(
+ alloc.device(),
+ "Allocation callback returned a mismatched reference ({})\n",
+ core::any::type_name::<T>(),
+ );
+ return Err(EINVAL);
+ }
+ Ok(Self {
+ raw: p as *mut u8 as *mut T::Raw<'static>,
+ gpu_ptr,
+ alloc,
+ inner,
+ })
+ }
+
+ /// Create a new GpuObject given an allocator and the inner data (a type implementing
+ /// GpuStruct).
+ ///
+ /// The caller passes a closure that initializes the `T::Raw` type given a reference to the
+ /// `GpuStruct` and a `MaybeUninit<T::Raw>`. This is intended to be used with the place!()
+ /// macro to avoid constructing the whole `T::Raw` object on the stack.
+ pub(crate) fn new_inplace(
+ alloc: U,
+ inner: T,
+ callback: impl for<'a> FnOnce(
+ &'a T,
+ &'a mut MaybeUninit<T::Raw<'a>>,
+ ) -> Result<&'a mut T::Raw<'a>>,
+ ) -> Result<Self> {
+ GpuObject::<T, U>::new_boxed(alloc, Box::try_new(inner)?, callback)
+ }
+
+ /// Create a new GpuObject given an allocator, with callback-based initialization.
+ ///
+ /// This is used when the construction of the `T` type requires knowing the GPU VA address of
+ /// the structure that is being constructed ahead of time. The first callback constructs a
+ /// `Box<T>` given the pointer to the about-to-be-initialized GPU structure, and the second
+ /// callback initializes that structure as in `new_boxed`.
+ pub(crate) fn new_prealloc(
+ alloc: U,
+ inner_cb: impl FnOnce(GpuWeakPointer<T>) -> Result<Box<T>>,
+ raw_cb: impl for<'a> FnOnce(
+ &'a T,
+ &'a mut MaybeUninit<T::Raw<'a>>,
+ ) -> Result<&'a mut T::Raw<'a>>,
+ ) -> Result<Self> {
+ if alloc.size() < mem::size_of::<T::Raw<'static>>() {
+ return Err(ENOMEM);
+ }
+ let gpu_ptr =
+ GpuWeakPointer::<T>(NonZeroU64::new(alloc.gpu_ptr()).ok_or(EINVAL)?, PhantomData);
+ mod_dev_dbg!(
+ alloc.device(),
+ "Allocating {} @ {:#x}\n",
+ core::any::type_name::<T>(),
+ alloc.gpu_ptr()
+ );
+ let inner = inner_cb(gpu_ptr)?;
+ let p = alloc.ptr().ok_or(EINVAL)?.as_ptr() as *mut MaybeUninit<T::Raw<'_>>;
+ // SAFETY: `p` is guaranteed to be valid per the Allocation invariant.
+ let raw = raw_cb(&*inner, unsafe { &mut *p })?;
+ if p as *mut T::Raw<'_> != raw as *mut _ {
+ dev_err!(
+ alloc.device(),
+ "Allocation callback returned a mismatched reference ({})\n",
+ core::any::type_name::<T>(),
+ );
+ return Err(EINVAL);
+ }
+ Ok(Self {
+ raw: p as *mut u8 as *mut T::Raw<'static>,
+ gpu_ptr,
+ alloc,
+ inner,
+ })
+ }
+
+ /// Returns the GPU VA of this object (as a raw [`NonZeroU64`])
+ pub(crate) fn gpu_va(&self) -> NonZeroU64 {
+ self.gpu_ptr.0
+ }
+
+ /// Returns a strong GPU pointer to this object, with a lifetime.
+ pub(crate) fn gpu_pointer(&self) -> GpuPointer<'_, T> {
+ GpuPointer(self.gpu_ptr.0, PhantomData)
+ }
+
+ /// Returns a weak GPU pointer to this object, with no lifetime.
+ pub(crate) fn weak_pointer(&self) -> GpuWeakPointer<T> {
+ GpuWeakPointer(self.gpu_ptr.0, PhantomData)
+ }
+
+ /// Perform a mutation to the inner `Raw` data given a user-supplied callback.
+ ///
+ /// The callback gets a mutable reference to the `GpuStruct` type.
+ pub(crate) fn with_mut<RetVal>(
+ &mut self,
+ callback: impl for<'a> FnOnce(&'a mut <T as GpuStruct>::Raw<'a>, &'a mut T) -> RetVal,
+ ) -> RetVal {
+ // SAFETY: `self.raw` is valid per the type invariant, and the second half is just
+ // converting lifetimes.
+ unsafe { callback(&mut *self.raw, &mut *(&mut *self.inner as *mut _)) }
+ }
+
+ /// Access the inner `Raw` data given a user-supplied callback.
+ ///
+ /// The callback gets a reference to the `GpuStruct` type.
+ pub(crate) fn with<RetVal>(
+ &self,
+ callback: impl for<'a> FnOnce(&'a <T as GpuStruct>::Raw<'a>, &'a T) -> RetVal,
+ ) -> RetVal {
+ // SAFETY: `self.raw` is valid per the type invariant, and the second half is just
+ // converting lifetimes.
+ unsafe { callback(&*self.raw, &*(&*self.inner as *const _)) }
+ }
+}
+
+impl<T: GpuStruct, U: Allocation<T>> Deref for GpuObject<T, U> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ &self.inner
+ }
+}
+
+impl<T: GpuStruct, U: Allocation<T>> DerefMut for GpuObject<T, U> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.inner
+ }
+}
+
+impl<T: GpuStruct + Debug, U: Allocation<T>> Debug for GpuObject<T, U>
+where
+ <T as GpuStruct>::Raw<'static>: Debug,
+{
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ f.debug_struct(core::any::type_name::<T>())
+ // SAFETY: `self.raw` is valid per the type invariant.
+ .field("raw", &format_args!("{:#X?}", unsafe { &*self.raw }))
+ .field("inner", &format_args!("{:#X?}", &self.inner))
+ .field("alloc", &format_args!("{:?}", &self.alloc))
+ .finish()
+ }
+}
+
+impl<T: GpuStruct + Default, U: Allocation<T>> GpuObject<T, U>
+where
+ for<'a> <T as GpuStruct>::Raw<'a>: Default + Zeroed,
+{
+ /// Create a new GpuObject with default data. `T` must implement `Default` and `T::Raw` must
+ /// implement `Zeroed`, since the GPU-side memory is initialized by zeroing.
+ pub(crate) fn new_default(alloc: U) -> Result<Self> {
+ GpuObject::<T, U>::new_inplace(alloc, Default::default(), |_inner, raw| {
+ // SAFETY: `raw` is valid here, and `T::Raw` implements `Zeroed`.
+ Ok(unsafe {
+ ptr::write_bytes(raw, 0, 1);
+ (*raw).assume_init_mut()
+ })
+ })
+ }
+}
+
+impl<T: GpuStruct, U: Allocation<T>> Drop for GpuObject<T, U> {
+ fn drop(&mut self) {
+ mod_dev_dbg!(
+ self.alloc.device(),
+ "Dropping {} @ {:?}\n",
+ core::any::type_name::<T>(),
+ self.gpu_pointer()
+ );
+ }
+}
+
+// SAFETY: GpuObjects are Send as long as the GpuStruct itself is Send
+unsafe impl<T: GpuStruct + Send, U: Allocation<T>> Send for GpuObject<T, U> {}
+// SAFETY: GpuObjects are Send as long as the GpuStruct itself is Send
+unsafe impl<T: GpuStruct + Sync, U: Allocation<T>> Sync for GpuObject<T, U> {}
+
+/// Trait used to erase the type of a GpuObject, used when we need to keep a list of heterogenous
+/// objects around.
+pub(crate) trait OpaqueGpuObject: Send + Sync {
+ fn gpu_va(&self) -> NonZeroU64;
+}
+
+impl<T: GpuStruct + Sync + Send, U: Allocation<T>> OpaqueGpuObject for GpuObject<T, U> {
+ fn gpu_va(&self) -> NonZeroU64 {
+ Self::gpu_va(self)
+ }
+}
+
+/// An array of raw GPU objects that is only accessible to the GPU (no CPU-side mapping required).
+///
+/// This must necessarily be uninitialized as far as the GPU is concerned, so it cannot be used
+/// when initialization is required.
+///
+/// # Invariants
+///
+/// `alloc` is valid and at least as large as `len` times the size of one `T`.
+/// `gpu_ptr` is valid and points to the allocation start.
+pub(crate) struct GpuOnlyArray<T, U: Allocation<T>> {
+ len: usize,
+ alloc: U,
+ gpu_ptr: NonZeroU64,
+ _p: PhantomData<T>,
+}
+
+impl<T, U: Allocation<T>> GpuOnlyArray<T, U> {
+ /// Allocate a new GPU-only array with the given length.
+ pub(crate) fn new(alloc: U, count: usize) -> Result<GpuOnlyArray<T, U>> {
+ let bytes = count * mem::size_of::<T>();
+ let gpu_ptr = NonZeroU64::new(alloc.gpu_ptr()).ok_or(EINVAL)?;
+ if alloc.size() < bytes {
+ return Err(ENOMEM);
+ }
+ Ok(Self {
+ len: count,
+ alloc,
+ gpu_ptr,
+ _p: PhantomData,
+ })
+ }
+
+ /// Returns the GPU VA of this arraw (as a raw [`NonZeroU64`])
+ pub(crate) fn gpu_va(&self) -> NonZeroU64 {
+ self.gpu_ptr
+ }
+
+ /// Returns a strong GPU pointer to this array, with a lifetime.
+ pub(crate) fn gpu_pointer(&self) -> GpuPointer<'_, &'_ [T]> {
+ GpuPointer(self.gpu_ptr, PhantomData)
+ }
+
+ /// Returns a weak GPU pointer to this array, with no lifetime.
+ pub(crate) fn weak_pointer(&self) -> GpuWeakPointer<[T]> {
+ GpuWeakPointer(self.gpu_ptr, PhantomData)
+ }
+
+ /// Returns a pointer to an offset within the array (as a subslice).
+ pub(crate) fn gpu_offset_pointer(&self, offset: usize) -> GpuPointer<'_, &'_ [T]> {
+ if offset > self.len {
+ panic!("Index {} out of bounds (len: {})", offset, self.len);
+ }
+ GpuPointer(
+ NonZeroU64::new(self.gpu_ptr.get() + (offset * mem::size_of::<T>()) as u64).unwrap(),
+ PhantomData,
+ )
+ }
+
+ /* Not used yet
+ /// Returns a weak pointer to an offset within the array (as a subslice).
+ pub(crate) fn weak_offset_pointer(&self, offset: usize) -> GpuWeakPointer<[T]> {
+ if offset > self.len {
+ panic!("Index {} out of bounds (len: {})", offset, self.len);
+ }
+ GpuWeakPointer(
+ NonZeroU64::new(self.gpu_ptr.get() + (offset * mem::size_of::<T>()) as u64).unwrap(),
+ PhantomData,
+ )
+ }
+
+ /// Returns a pointer to an element within the array.
+ pub(crate) fn gpu_item_pointer(&self, index: usize) -> GpuPointer<'_, &'_ T> {
+ if index >= self.len {
+ panic!("Index {} out of bounds (len: {})", index, self.len);
+ }
+ GpuPointer(
+ NonZeroU64::new(self.gpu_ptr.get() + (index * mem::size_of::<T>()) as u64).unwrap(),
+ PhantomData,
+ )
+ }
+ */
+
+ /// Returns a weak pointer to an element within the array.
+ pub(crate) fn weak_item_pointer(&self, index: usize) -> GpuWeakPointer<T> {
+ if index >= self.len {
+ panic!("Index {} out of bounds (len: {})", index, self.len);
+ }
+ GpuWeakPointer(
+ NonZeroU64::new(self.gpu_ptr.get() + (index * mem::size_of::<T>()) as u64).unwrap(),
+ PhantomData,
+ )
+ }
+
+ /// Returns the length of the array.
+ pub(crate) fn len(&self) -> usize {
+ self.len
+ }
+}
+
+impl<T: Debug, U: Allocation<T>> Debug for GpuOnlyArray<T, U> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ f.debug_struct(core::any::type_name::<T>())
+ .field("len", &format_args!("{:#X?}", self.len()))
+ .finish()
+ }
+}
+
+impl<T, U: Allocation<T>> Drop for GpuOnlyArray<T, U> {
+ fn drop(&mut self) {
+ mod_dev_dbg!(
+ self.alloc.device(),
+ "Dropping {} @ {:?}\n",
+ core::any::type_name::<T>(),
+ self.gpu_pointer()
+ );
+ }
+}
+
+/// An array of raw GPU objects that is also CPU-accessible.
+///
+/// # Invariants
+///
+/// `raw` is valid and points to the CPU-side view of the array (which must have one).
+pub(crate) struct GpuArray<T, U: Allocation<T>> {
+ raw: *mut T,
+ array: GpuOnlyArray<T, U>,
+}
+
+/* Not used yet
+impl<T: Copy, U: Allocation<T>> GpuArray<T, U> {
+ /// Allocate a new GPU array, copying the contents from a slice.
+ pub(crate) fn new(alloc: U, data: &[T]) -> Result<GpuArray<T, U>> {
+ let p = alloc.ptr().ok_or(EINVAL)?.as_ptr();
+ let inner = GpuOnlyArray::new(alloc, data.len())?;
+ // SAFETY: `p` is valid per the Allocation type invariant, and GpuOnlyArray guarantees
+ // that its size is at least as large as `data.len()`.
+ unsafe { ptr::copy(data.as_ptr(), p, data.len()) };
+ Ok(Self {
+ raw: p,
+ array: inner,
+ })
+ }
+}
+*/
+
+impl<T: Default, U: Allocation<T>> GpuArray<T, U> {
+ /// Allocate a new GPU array, initializing each element to its default.
+ pub(crate) fn empty(alloc: U, count: usize) -> Result<GpuArray<T, U>> {
+ let p = alloc.ptr().ok_or(EINVAL)?.as_ptr() as *mut T;
+ let inner = GpuOnlyArray::new(alloc, count)?;
+ let mut pi = p;
+ for _i in 0..count {
+ // SAFETY: `pi` is valid per the Allocation type invariant, and GpuOnlyArray guarantees
+ // that it can never iterate beyond the buffer length.
+ unsafe {
+ pi.write(Default::default());
+ pi = pi.add(1);
+ }
+ }
+ Ok(Self {
+ raw: p,
+ array: inner,
+ })
+ }
+}
+
+impl<T, U: Allocation<T>> GpuArray<T, U> {
+ /// Get a slice view of the array contents.
+ pub(crate) fn as_slice(&self) -> &[T] {
+ // SAFETY: self.raw / self.len are valid per the type invariant
+ unsafe { slice::from_raw_parts(self.raw, self.len) }
+ }
+
+ /// Get a mutable slice view of the array contents.
+ pub(crate) fn as_mut_slice(&mut self) -> &mut [T] {
+ // SAFETY: self.raw / self.len are valid per the type invariant
+ unsafe { slice::from_raw_parts_mut(self.raw, self.len) }
+ }
+}
+
+impl<T, U: Allocation<T>> Deref for GpuArray<T, U> {
+ type Target = GpuOnlyArray<T, U>;
+
+ fn deref(&self) -> &GpuOnlyArray<T, U> {
+ &self.array
+ }
+}
+
+impl<T, U: Allocation<T>> Index<usize> for GpuArray<T, U> {
+ type Output = T;
+
+ fn index(&self, index: usize) -> &T {
+ if index >= self.len {
+ panic!("Index {} out of bounds (len: {})", index, self.len);
+ }
+ // SAFETY: This is bounds checked above
+ unsafe { &*(self.raw.add(index)) }
+ }
+}
+
+impl<T, U: Allocation<T>> IndexMut<usize> for GpuArray<T, U> {
+ fn index_mut(&mut self, index: usize) -> &mut T {
+ if index >= self.len {
+ panic!("Index {} out of bounds (len: {})", index, self.len);
+ }
+ // SAFETY: This is bounds checked above
+ unsafe { &mut *(self.raw.add(index)) }
+ }
+}
+
+// SAFETY: GpuArray are Send as long as the contained type itself is Send
+unsafe impl<T: Send, U: Allocation<T>> Send for GpuArray<T, U> {}
+// SAFETY: GpuArray are Sync as long as the contained type itself is Sync
+unsafe impl<T: Sync, U: Allocation<T>> Sync for GpuArray<T, U> {}
+
+impl<T: Debug, U: Allocation<T>> Debug for GpuArray<T, U> {
+ fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
+ f.debug_struct(core::any::type_name::<T>())
+ .field("array", &format_args!("{:#X?}", self.as_slice()))
+ .finish()
+ }
+}
diff --git a/drivers/gpu/drm/asahi/place.rs b/drivers/gpu/drm/asahi/place.rs
new file mode 100644
index 000000000000..40c51f4fab8d
--- /dev/null
+++ b/drivers/gpu/drm/asahi/place.rs
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: Apache-2.0 OR MIT
+
+//! "Placement new" macro
+//!
+//! This cursed abomination of a declarative macro is used to emulate a "placement new" feature,
+//! which allows initializing objects directly in a user-provided memory region without first
+//! going through the stack.
+//!
+//! This driver needs to manage several large GPU objects of a fixed layout. Linux kernel stacks are
+//! very small, so it is impossible to create these objects on the stack. While the compiler can
+//! sometimes optimize away the stack copy and directly instantiate in target memory, this is not
+//! guaranteed and not reliable. Therefore, we need some mechanism to ergonomically initialize
+//! complex structures directly in a pre-allocated piece of memory.
+//!
+//! This issue also affects some driver-internal structs which are large/complex enough to overflow
+//! the stack. While this can be solved by breaking them up into pieces and using `Box` more
+//! liberally, this has performance implications and still isn't very nice. This macro can also be
+//! used to solve this issue.
+//!
+//! # Further reading
+//! https://github.com/rust-lang/rust/issues/27779#issuecomment-378416911
+//! https://internals.rust-lang.org/t/removal-of-all-unstable-placement-features/7223
+
+/// Initialize a `MaybeUninit` in-place, without constructing the value on the stack first.
+///
+/// This macro is analogous to `MaybeUninit::write()`. In other words,
+/// `place!(foo, bar)` is equivalent to `MaybeUninit::write(foo, bar)`, except that `bar` is not
+/// constructed first, but rather its fields (if it is a structure constructor) are copied one by
+/// one into the correct location in the `MaybeUninit`.
+///
+/// The macro supports most Rust initialization syntax including type paths, generic arguments,
+/// and nested structures. Nested structures are themselves initialized in-place field by field.
+/// `..Default::default()` is supported, but this macro converts it to `..Zeroed::zeroed()`, as it
+/// initializes those structs by zero-initializing the underlying memory. Usage of
+/// `..Default::default()` with a type not implementing `Zeroed` will result in a compile error.
+///
+/// Usage:
+/// ```
+/// let mut buf = MaybeUninit::uninit();
+/// let mut_ref = place!(&mut buf, MyStruct {
+/// b: true,
+/// s: String::from("works"),
+/// i: str::parse::<i32>("123").unwrap(),
+/// v: vec![String::from("works")],
+/// x: foo::MyOtherCoolStruct {
+/// a: false,
+/// b: String::from("Hello, world!"),
+/// },
+/// y: foo::MyOtherCoolStruct {
+/// a: false,
+/// b: String::from("Hello, world!"),
+/// },
+/// z: foo::MyCoolGenericStruct::<bool, String> {
+/// a: false,
+/// b: String::from("Hello, world!"),
+/// },
+/// };
+/// // `mut_ref` is now a mutable reference to the `buf`, which is now safely initialized.
+/// ```
+///
+/// Based on https://crates.io/crates/place by DianaNites, with contributions by Joshua Barretto.
+#[macro_export]
+macro_rules! place {
+ // Top-level struct
+ (@STRUCT $ptr:ident, _TOP, $typ:path, {$($typ_init:tt)*} { $($fields:tt)* }) => {{
+ place!(@STRUCT_ZERO $ptr, {$($typ_init)*} { $($fields)* });
+ place!(@STRUCT_CHECK $ptr, {$($typ_init)*} { $($fields)* } {
+ place!(@FIELDS $ptr, $($fields)*);
+ });
+ }};
+ // Nested structure
+ (@STRUCT $ptr:ident, $f_struct:ident, $typ:path, {$($typ_init:tt)*} { $($fields:tt)* }) => {{
+ use core::ptr::addr_of_mut;
+ let buf = unsafe { addr_of_mut!((*$ptr).$f_struct) };
+ place!(@STRUCT_ZERO buf, {$($typ_init)*} { $($fields)* });
+ place!(@STRUCT_CHECK $ptr, {$($typ_init)*} { $($fields)* } {
+ place!(@FIELDS buf, $($fields)*);
+ });
+ }};
+
+ // Zero-initialize structure if the initializer ends in ..default::Default()
+ (@STRUCT_ZERO $ptr:ident, {$($typ_init:tt)*} { $($f:ident $(: $v:expr)?),* $(,)? }) => {};
+ (@STRUCT_ZERO $ptr:ident, {$($typ_init:tt)*} { $($($f:ident $(: $v:expr)?),*,)? ..Default::default() }) => {{
+ // Check that the structure actually implements Zeroed
+ const _: () = {
+ fn _check_default() {
+ let _ = $($typ_init)* {
+ ..Zeroed::zeroed()
+ };
+ }
+ };
+ use core::ptr;
+ unsafe { ptr::write_bytes($ptr, 0, 1) };
+
+ }};
+
+ // Check that all fields are specified
+ (@STRUCT_CHECK $ptr:ident, {$($typ_init:tt)*} { $($($f:ident $(: $v:expr)?),*,)? ..Default::default() } {$($body:tt)*}) => {
+ if false {
+ #[allow(clippy::redundant_field_names)]
+ let _x = $($typ_init)* {
+ $($(
+ $f $(: $v)?
+ ),*
+ ,)?
+ ..Zeroed::zeroed()
+ };
+ } else {
+ {$($body)*}
+ }
+ };
+ (@STRUCT_CHECK $ptr:ident, {$($typ_init:tt)*} { $($f:ident $(: $v:expr)?),* $(,)? } {$($body:tt)*}) => {
+ if false {
+ #[allow(clippy::redundant_field_names)]
+ let _x = $($typ_init)* {
+ $(
+ $f $(: $v)?
+ ),*
+ };
+ } else {
+ {$($body)*}
+ }
+ };
+ // Top-level scalar
+ (@SCALAR $ptr:ident, _TOP, $val:expr) => {
+ let tmp = $val;
+ unsafe { $ptr.write(tmp); }
+ };
+ // Regular field
+ (@SCALAR $ptr:ident, $f:ident, $val:expr) => {{
+ use core::ptr::addr_of_mut;
+ let tmp = $val;
+ unsafe { addr_of_mut!((*$ptr).$f).write(tmp); }
+ }};
+ // Type-like name followed by braces is a nested structure
+ (@PARTIAL $ptr:ident, $f:ident, {$($head:tt)*}, {{ $($fields:tt)* } $($tail:tt)*}) => {
+ place!(@STRUCT $ptr, $f, $($head)*, {$($head)*} { $($fields)* });
+ place!(@FIELDS $ptr $($tail)*)
+ };
+ // Type-like name followed by ::ident, append to head
+ (@PARTIAL $ptr:ident, $f:ident, {$($head:tt)*}, {::$id:ident $($tail:tt)*}) => {
+ place!(@PARTIAL $ptr, $f, {$($head)* :: $id}, {$($tail)*});
+ };
+ // Type-like name followed by ::<args>, append to head
+ (@PARTIAL $ptr:ident, $f:ident, {$($head:tt)*}, {::<$($gen:ty),*> $($tail:tt)*}) => {
+ place!(@PARTIAL $ptr, $f, {$($head)* :: <$($gen),*>}, {$($tail)*});
+ };
+ // Type-like name followed by ::<'lifetime>, append to head
+ (@PARTIAL $ptr:ident, $f:ident, {$($head:tt)*}, {::<$li:lifetime> $($tail:tt)*}) => {
+ place!(@PARTIAL $ptr, $f, {$($head)* :: <$li>}, {$($tail)*});
+ };
+ // Anything else, parse it as an expression
+ (@PARTIAL $ptr:ident, $f:ident, {$($head:tt)*}, {$($tail:tt)*}) => {
+ place!(@EXPR $ptr, $f, $($head)* $($tail)*)
+ };
+ // Expression followed by more fields
+ (@EXPR $ptr:ident, $f:ident, $val:expr, $($tail:tt)*) => {
+ place!(@SCALAR $ptr, $f, $val);
+ place!(@FIELDS $ptr, $($tail)*)
+ };
+ // Last field expression, without a trailing comma
+ (@EXPR $ptr:ident, $f:ident, $val:expr) => {
+ place!(@SCALAR $ptr, $f, $val);
+ };
+ // Field with a value starting with an ident, start incremental type parsing
+ (@FIELDS $ptr:ident, $f:ident : $id:ident $($tail:tt)*) => {
+ place!(@PARTIAL $ptr, $f, {$id}, {$($tail)*});
+ };
+ // Same, but starting with ::ident
+ (@FIELDS $ptr:ident, $f:ident : ::$id:ident $($tail:tt)*) => {
+ place!(@PARTIAL $ptr, $f, {::$id}, {$($tail)*});
+ };
+ // Otherwise, parse it as an expression
+ (@FIELDS $ptr:ident, $f:ident : $($tail:tt)*) => {
+ place!(@EXPR $ptr, $f, $($tail)*)
+ };
+ // Default terminating case
+ (@FIELDS $ptr:ident, ..Default::default() ) => {};
+ // Terminating case
+ (@FIELDS $ptr:ident $(,)? ) => {};
+ (
+ $buf:expr,
+ $($val:tt)*
+ ) => {{
+ use core::mem::MaybeUninit;
+ // Ensures types are correct
+ let obj: &mut MaybeUninit<_> = $buf;
+ let top_ptr = obj.as_mut_ptr();
+ place!(@FIELDS top_ptr, _TOP: $($val)*);
+ // SAFETY: All fields have been initialized above
+ // The compiler ensures that all fields were used, all types were correct,
+ // and that size and alignment are correct.
+ unsafe { obj.assume_init_mut() }
+ }};
+}
+
+/// Helper macro to get the struct type part of a struct initialization expression.
+#[macro_export]
+#[doc(hidden)]
+macro_rules! get_type {
+ ($t:ty { $($val:tt)* }) => {
+ $t
+ };
+}
+
+/// Like `Box::try_new(...)`, but with in-place initialization.
+#[macro_export]
+macro_rules! box_in_place {
+ ($($val:tt)*) => {{
+ use $crate::place;
+ let b = Box::<$crate::get_type!($($val)*)>::try_new_uninit();
+ match b {
+ Ok(mut p) => {
+ place!((&mut *p), $($val)*);
+ Ok(unsafe { p.assume_init() })
+ }
+ Err(e) => Err(e)
+ }
+ }};
+}
+
+// TODO: figure out how to make this run
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use core::mem::MaybeUninit;
+
+ #[derive(Debug, PartialEq)]
+ struct MyCoolStruct {
+ b: bool,
+ s: String,
+ i: i32,
+ v: Vec<String>,
+ x: MyOtherCoolStruct,
+ y: MyOtherCoolStruct,
+ z: foo::MyCoolGenericStruct<bool, String>,
+ }
+
+ #[derive(Debug, PartialEq)]
+ struct MyDefaultStruct {
+ b: bool,
+ i: i32,
+ j: i16,
+ }
+ default_zeroed!(MyDefaultStruct);
+
+ mod foo {
+ #[derive(Debug, PartialEq)]
+ pub struct MyOtherCoolStruct {
+ pub a: bool,
+ pub b: String,
+ }
+ #[derive(Debug, PartialEq)]
+ pub struct MyCoolGenericStruct<T, U> {
+ pub a: T,
+ pub b: U,
+ }
+ }
+
+ use foo::MyOtherCoolStruct;
+
+ #[test]
+ fn test_initialized() {
+ let mut buf: MaybeUninit<MyCoolStruct> = MaybeUninit::uninit();
+
+ let x: &mut MyCoolStruct = place!(
+ &mut buf,
+ MyCoolStruct {
+ b: true,
+ s: String::from("works"),
+ i: str::parse::<i32>("123").unwrap(),
+ v: vec![String::from("works")],
+ x: MyOtherCoolStruct {
+ a: false,
+ b: String::from("Hello, world!"),
+ },
+ y: foo::MyOtherCoolStruct {
+ a: false,
+ b: String::from("Hello, world!"),
+ },
+ z: foo::MyCoolGenericStruct::<bool, String> {
+ a: false,
+ b: String::from("Hello, world!"),
+ }
+ }
+ );
+ //dbg!(x);
+
+ assert_eq!(
+ x,
+ &MyCoolStruct {
+ b: true,
+ s: String::from("works"),
+ i: str::parse::<i32>("123").unwrap(),
+ v: vec![String::from("works")],
+ x: foo::MyOtherCoolStruct {
+ a: false,
+ b: String::from("Hello, world!"),
+ },
+ y: foo::MyOtherCoolStruct {
+ a: false,
+ b: String::from("Hello, world!"),
+ },
+ z: foo::MyCoolGenericStruct::<bool, String> {
+ a: false,
+ b: String::from("Hello, world!"),
+ },
+ },
+ );
+ }
+
+ #[test]
+ fn test_default() {
+ let mut buf: MaybeUninit<MyDefaultStruct> = MaybeUninit::uninit();
+
+ let x: &mut MyDefaultStruct = place!(
+ &mut buf,
+ MyDefaultStruct {
+ b: true,
+ i: 1,
+ ..Default::default()
+ }
+ );
+
+ assert_eq!(
+ x,
+ &MyDefaultStruct {
+ b: true,
+ i: 1,
+ j: 0,
+ },
+ );
+ }
+
+ #[test]
+ fn test_scalar() {
+ let mut buf: MaybeUninit<u32> = MaybeUninit::uninit();
+
+ let x: &mut u32 = place!(&mut buf, 1234);
+
+ assert_eq!(x, &mut 1234u32);
+ }
+}
diff --git a/drivers/gpu/drm/asahi/queue/common.rs b/drivers/gpu/drm/asahi/queue/common.rs
new file mode 100644
index 000000000000..127b4ccc6eca
--- /dev/null
+++ b/drivers/gpu/drm/asahi/queue/common.rs
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Common queue functionality.
+//!
+//! Shared helpers used by the submission logic for multiple command types.
+
+use crate::fw::microseq;
+use crate::fw::types::*;
+
+use kernel::bindings;
+use kernel::io_buffer::IoBufferReader;
+use kernel::prelude::*;
+use kernel::user_ptr::UserSlicePtr;
+
+use core::mem::MaybeUninit;
+
+pub(super) fn build_attachments(pointer: u64, count: u32) -> Result<microseq::Attachments> {
+ if count as usize > microseq::MAX_ATTACHMENTS {
+ return Err(EINVAL);
+ }
+
+ const STRIDE: usize = core::mem::size_of::<bindings::drm_asahi_attachment>();
+ let size = STRIDE * count as usize;
+
+ // SAFETY: We only read this once, so there are no TOCTOU issues.
+ let mut reader = unsafe { UserSlicePtr::new(pointer as usize as *mut _, size).reader() };
+
+ let mut attachments: microseq::Attachments = Default::default();
+
+ for i in 0..count {
+ let mut att: MaybeUninit<bindings::drm_asahi_attachment> = MaybeUninit::uninit();
+
+ // SAFETY: The size of `att` is STRIDE
+ unsafe { reader.read_raw(att.as_mut_ptr() as *mut u8, STRIDE)? };
+
+ // SAFETY: All bit patterns in the struct are valid
+ let att = unsafe { att.assume_init() };
+
+ let cache_lines = (att.size + 127) >> 7;
+ let order = 1;
+ attachments.list[i as usize] = microseq::Attachment {
+ address: U64(att.pointer),
+ size: cache_lines,
+ unk_c: 0x17,
+ unk_e: order,
+ };
+
+ attachments.count += 1;
+ }
+
+ Ok(attachments)
+}
diff --git a/drivers/gpu/drm/asahi/queue/compute.rs b/drivers/gpu/drm/asahi/queue/compute.rs
new file mode 100644
index 000000000000..6590382c75af
--- /dev/null
+++ b/drivers/gpu/drm/asahi/queue/compute.rs
@@ -0,0 +1,371 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+#![allow(clippy::unusual_byte_groupings)]
+
+//! Compute work queue.
+//!
+//! A compute queue consists of one underlying WorkQueue.
+//! This module is in charge of creating all of the firmware structures required to submit compute
+//! work to the GPU, based on the userspace command buffer.
+
+use super::common;
+use crate::alloc::Allocator;
+use crate::debug::*;
+use crate::fw::types::*;
+use crate::gpu::GpuManager;
+use crate::{box_in_place, inner_ptr, inner_weak_ptr, place};
+use crate::{fw, gpu, microseq};
+use core::mem::MaybeUninit;
+use core::sync::atomic::Ordering;
+use kernel::bindings;
+use kernel::dma_fence::RawDmaFence;
+use kernel::drm::sched::Job;
+use kernel::io_buffer::IoBufferReader;
+use kernel::prelude::*;
+use kernel::sync::Arc;
+use kernel::user_ptr::UserSlicePtr;
+
+const DEBUG_CLASS: DebugFlags = DebugFlags::Compute;
+
+#[versions(AGX)]
+impl super::Queue::ver {
+ /// Submit work to a compute queue.
+ pub(super) fn submit_compute(
+ &self,
+ job: &mut Job<super::QueueJob::ver>,
+ cmd: &bindings::drm_asahi_command,
+ result_writer: Option<super::ResultWriter>,
+ id: u64,
+ flush_stamps: bool,
+ ) -> Result {
+ if cmd.cmd_type != bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_COMPUTE {
+ return Err(EINVAL);
+ }
+
+ let dev = self.dev.data();
+ let gpu = match dev.gpu.as_any().downcast_ref::<gpu::GpuManager::ver>() {
+ Some(gpu) => gpu,
+ None => {
+ dev_crit!(self.dev, "GpuManager mismatched with Queue!\n");
+ return Err(EIO);
+ }
+ };
+
+ let mut alloc = gpu.alloc();
+ let kalloc = &mut *alloc;
+
+ mod_dev_dbg!(self.dev, "[Submission {}] Compute!\n", id);
+
+ let mut cmdbuf_reader = unsafe {
+ UserSlicePtr::new(
+ cmd.cmd_buffer as usize as *mut _,
+ core::mem::size_of::<bindings::drm_asahi_cmd_compute>(),
+ )
+ .reader()
+ };
+
+ let mut cmdbuf: MaybeUninit<bindings::drm_asahi_cmd_compute> = MaybeUninit::uninit();
+ unsafe {
+ cmdbuf_reader.read_raw(
+ cmdbuf.as_mut_ptr() as *mut u8,
+ core::mem::size_of::<bindings::drm_asahi_cmd_compute>(),
+ )?;
+ }
+ let cmdbuf = unsafe { cmdbuf.assume_init() };
+
+ if cmdbuf.flags != 0 {
+ return Err(EINVAL);
+ }
+
+ // This sequence number increases per new client/VM? assigned to some slot,
+ // but it's unclear *which* slot...
+ let slot_client_seq: u8 = (self.id & 0xff) as u8;
+
+ let vm_bind = job.vm_bind.clone();
+
+ mod_dev_dbg!(
+ self.dev,
+ "[Submission {}] VM slot = {}\n",
+ id,
+ vm_bind.slot()
+ );
+
+ let notifier = self.notifier.clone();
+
+ let fence = job.fence.clone();
+ let comp_job = job.get_comp()?;
+ let ev_comp = comp_job.event_info();
+
+ // TODO: Is this the same on all GPUs? Is this really for preemption?
+ let preempt_size = 0x7fa0;
+ let preempt2_off = 0x7f80;
+ let preempt3_off = 0x7f88;
+ let preempt4_off = 0x7f90;
+ let preempt5_off = 0x7f98;
+
+ let preempt_buf = self.ualloc.lock().array_empty(preempt_size)?;
+
+ let mut seq_buf = self.ualloc.lock().array_empty(0x800)?;
+ for i in 1..0x400 {
+ seq_buf[i] = (i + 1) as u64;
+ }
+
+ mod_dev_dbg!(
+ self.dev,
+ "[Submission {}] Event #{} {:#x?} -> {:#x?}\n",
+ id,
+ ev_comp.slot,
+ ev_comp.value,
+ ev_comp.value.next(),
+ );
+
+ let timestamps = Arc::try_new(kalloc.shared.new_default::<fw::job::JobTimestamps>()?)?;
+
+ let uuid = cmdbuf.cmd_id;
+
+ let unk3 = debug_enabled(debug::DebugFlags::Debug3);
+
+ mod_dev_dbg!(self.dev, "[Submission {}] UUID = {:#x?}\n", id, uuid);
+
+ // TODO: check
+ #[ver(V >= V13_0B4)]
+ let count = self.counter.fetch_add(1, Ordering::Relaxed);
+
+ let comp = GpuObject::new_prealloc(
+ kalloc.private.alloc_object()?,
+ |ptr: GpuWeakPointer<fw::compute::RunCompute::ver>| {
+ let mut builder = microseq::Builder::new();
+
+ let stats = gpu.initdata.runtime_pointers.stats.comp.weak_pointer();
+
+ let start_comp = builder.add(microseq::StartCompute::ver {
+ header: microseq::op::StartCompute::HEADER,
+ unk_pointer: inner_weak_ptr!(ptr, unk_pointee),
+ job_params1: inner_weak_ptr!(ptr, job_params1),
+ stats,
+ work_queue: ev_comp.info_ptr,
+ vm_slot: vm_bind.slot(),
+ unk_28: 0x1,
+ event_generation: self.id as u32,
+ cmd_seq: U64(ev_comp.cmd_seq),
+ unk_38: 0x0,
+ job_params2: inner_weak_ptr!(ptr, job_params2),
+ unk_44: 0x0,
+ uuid,
+ attachments: common::build_attachments(
+ cmdbuf.attachments,
+ cmdbuf.attachment_count,
+ )?,
+ padding: Default::default(),
+ #[ver(V >= V13_0B4)]
+ unk_flag: inner_weak_ptr!(ptr, unk_flag),
+ #[ver(V >= V13_0B4)]
+ counter: U64(count),
+ #[ver(V >= V13_0B4)]
+ notifier_buf: inner_weak_ptr!(notifier.weak_pointer(), state.unk_buf),
+ })?;
+
+ if result_writer.is_some() {
+ builder.add(microseq::Timestamp::ver {
+ header: microseq::op::Timestamp::new(true),
+ cur_ts: inner_weak_ptr!(ptr, cur_ts),
+ start_ts: inner_weak_ptr!(ptr, start_ts),
+ update_ts: inner_weak_ptr!(ptr, start_ts),
+ work_queue: ev_comp.info_ptr,
+ unk_24: U64(0),
+ #[ver(V >= V13_0B4)]
+ unk_ts: inner_weak_ptr!(ptr, unk_ts),
+ uuid,
+ unk_30_padding: 0,
+ })?;
+ }
+
+ builder.add(microseq::WaitForIdle {
+ header: microseq::op::WaitForIdle::new(microseq::Pipe::Compute),
+ })?;
+
+ if result_writer.is_some() {
+ builder.add(microseq::Timestamp::ver {
+ header: microseq::op::Timestamp::new(false),
+ cur_ts: inner_weak_ptr!(ptr, cur_ts),
+ start_ts: inner_weak_ptr!(ptr, start_ts),
+ update_ts: inner_weak_ptr!(ptr, end_ts),
+ work_queue: ev_comp.info_ptr,
+ unk_24: U64(0),
+ #[ver(V >= V13_0B4)]
+ unk_ts: inner_weak_ptr!(ptr, unk_ts),
+ uuid,
+ unk_30_padding: 0,
+ })?;
+ }
+
+ let off = builder.offset_to(start_comp);
+ builder.add(microseq::FinalizeCompute::ver {
+ header: microseq::op::FinalizeCompute::HEADER,
+ stats,
+ work_queue: ev_comp.info_ptr,
+ vm_slot: vm_bind.slot(),
+ #[ver(V < V13_0B4)]
+ unk_18: 0,
+ job_params2: inner_weak_ptr!(ptr, job_params2),
+ unk_24: 0,
+ uuid,
+ fw_stamp: ev_comp.fw_stamp_pointer,
+ stamp_value: ev_comp.value.next(),
+ unk_38: 0,
+ unk_3c: 0,
+ unk_40: 0,
+ unk_44: 0,
+ unk_48: 0,
+ unk_4c: 0,
+ unk_50: 0,
+ unk_54: 0,
+ unk_58: 0,
+ #[ver(G == G14 && V < V13_0B4)]
+ unk_5c_g14: U64(0),
+ restart_branch_offset: off,
+ unk_60: unk3.into(),
+ #[ver(V >= V13_0B4)]
+ unk_64: Default::default(),
+ #[ver(V >= V13_0B4)]
+ unk_flag: inner_weak_ptr!(ptr, unk_flag),
+ #[ver(V >= V13_0B4)]
+ unk_79: Default::default(),
+ })?;
+
+ builder.add(microseq::RetireStamp {
+ header: microseq::op::RetireStamp::HEADER,
+ })?;
+
+ Ok(box_in_place!(fw::compute::RunCompute::ver {
+ notifier: notifier.clone(),
+ preempt_buf: preempt_buf,
+ seq_buf: seq_buf,
+ micro_seq: builder.build(&mut kalloc.private)?,
+ vm_bind: vm_bind.clone(),
+ timestamps: timestamps.clone(),
+ })?)
+ },
+ |inner, ptr| {
+ Ok(place!(
+ ptr,
+ fw::compute::raw::RunCompute::ver {
+ tag: fw::workqueue::CommandType::RunCompute,
+ #[ver(V >= V13_0B4)]
+ counter: U64(count),
+ unk_4: 0,
+ vm_slot: vm_bind.slot(),
+ notifier: inner.notifier.gpu_pointer(),
+ unk_pointee: Default::default(),
+ job_params1: fw::compute::raw::JobParameters1 {
+ preempt_buf1: inner.preempt_buf.gpu_pointer(),
+ encoder: U64(cmdbuf.encoder_ptr),
+ // buf2-5 Only if internal program is used
+ preempt_buf2: inner.preempt_buf.gpu_offset_pointer(preempt2_off),
+ preempt_buf3: inner.preempt_buf.gpu_offset_pointer(preempt3_off),
+ preempt_buf4: inner.preempt_buf.gpu_offset_pointer(preempt4_off),
+ preempt_buf5: inner.preempt_buf.gpu_offset_pointer(preempt5_off),
+ pipeline_base: U64(0x11_00000000),
+ unk_38: U64(0x8c60),
+ unk_40: cmdbuf.ctx_switch_prog, // Internal program addr | 1
+ unk_44: 0,
+ compute_layout_addr: U64(cmdbuf.buffer_descriptor), // Only if internal program used
+ unk_50: cmdbuf.buffer_descriptor_size, // 0x40 if internal program used
+ unk_54: 0,
+ unk_58: 1,
+ unk_5c: 0,
+ iogpu_unk_40: cmdbuf.iogpu_unk_40, // 0x1c if internal program used
+ },
+ unk_b8: Default::default(),
+ microsequence: inner.micro_seq.gpu_pointer(),
+ microsequence_size: inner.micro_seq.len() as u32,
+ job_params2: fw::compute::raw::JobParameters2::ver {
+ #[ver(V >= V13_0B4)]
+ unk_0_0: 0,
+ unk_0: Default::default(),
+ preempt_buf1: inner.preempt_buf.gpu_pointer(),
+ encoder_end: U64(cmdbuf.encoder_end),
+ unk_34: Default::default(),
+ #[ver(V < V13_0B4)]
+ unk_5c: 0,
+ },
+ encoder_params: fw::job::raw::EncoderParams {
+ unk_8: 0x0, // fixed
+ unk_c: 0x0, // fixed
+ unk_10: 0x0, // fixed
+ encoder_id: cmdbuf.encoder_id,
+ unk_18: 0x0, // fixed
+ iogpu_compute_unk44: cmdbuf.iogpu_unk_44,
+ seq_buffer: inner.seq_buf.gpu_pointer(),
+ unk_28: U64(0x0), // fixed
+ },
+ meta: fw::job::raw::JobMeta {
+ unk_4: 0,
+ stamp: ev_comp.stamp_pointer,
+ fw_stamp: ev_comp.fw_stamp_pointer,
+ stamp_value: ev_comp.value.next(),
+ stamp_slot: ev_comp.slot,
+ evctl_index: 0, // fixed
+ flush_stamps: flush_stamps as u32,
+ uuid: uuid,
+ cmd_seq: ev_comp.cmd_seq as u32,
+ },
+ cur_ts: U64(0),
+ start_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), start)),
+ end_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), end)),
+ unk_2c0: 0,
+ unk_2c4: 0,
+ unk_2c8: 0,
+ unk_2cc: 0,
+ client_sequence: slot_client_seq,
+ pad_2d1: Default::default(),
+ unk_2d4: 0,
+ unk_2d8: 0,
+ #[ver(V >= V13_0B4)]
+ unk_ts: U64(0),
+ #[ver(V >= V13_0B4)]
+ unk_2e1: Default::default(),
+ #[ver(V >= V13_0B4)]
+ unk_flag: U32(0),
+ #[ver(V >= V13_0B4)]
+ unk_pad: Default::default(),
+ }
+ ))
+ },
+ )?;
+
+ core::mem::drop(alloc);
+
+ fence.add_command();
+ comp_job.add_cb(comp, vm_bind.slot(), move |cmd, error| {
+ if let Some(err) = error {
+ fence.set_error(err.into())
+ }
+ if let Some(mut rw) = result_writer {
+ let mut result: bindings::drm_asahi_result_compute = Default::default();
+
+ cmd.timestamps.with(|raw, _inner| {
+ result.ts_start = raw.start.load(Ordering::Relaxed);
+ result.ts_end = raw.end.load(Ordering::Relaxed);
+ });
+
+ if let Some(err) = error {
+ result.info = err.into();
+ } else {
+ result.info.status = bindings::drm_asahi_status_DRM_ASAHI_STATUS_COMPLETE;
+ }
+
+ rw.write(result);
+ }
+
+ fence.command_complete();
+ })?;
+
+ notifier.threshold.with(|raw, _inner| {
+ raw.increment();
+ });
+
+ comp_job.next_seq();
+
+ Ok(())
+ }
+}
diff --git a/drivers/gpu/drm/asahi/queue/mod.rs b/drivers/gpu/drm/asahi/queue/mod.rs
new file mode 100644
index 000000000000..15988af33cf3
--- /dev/null
+++ b/drivers/gpu/drm/asahi/queue/mod.rs
@@ -0,0 +1,725 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Submission queue management
+//!
+//! This module implements the userspace view of submission queues and the logic to map userspace
+//! submissions to firmware queues.
+
+use kernel::dma_fence::*;
+use kernel::prelude::*;
+use kernel::{
+ bindings, c_str, dma_fence,
+ drm::gem::shmem::VMap,
+ drm::sched,
+ macros::versions,
+ sync::{smutex::Mutex, Arc},
+};
+
+use crate::alloc::Allocator;
+use crate::debug::*;
+use crate::driver::AsahiDevice;
+use crate::fw::types::*;
+use crate::gpu::GpuManager;
+use crate::{alloc, buffer, channel, event, file, fw, gem, gpu, mmu, workqueue};
+use crate::{inner_weak_ptr, place};
+
+use core::mem::MaybeUninit;
+use core::sync::atomic::{AtomicU64, Ordering};
+
+const DEBUG_CLASS: DebugFlags = DebugFlags::Queue;
+
+const WQ_SIZE: u32 = 0x500;
+
+mod common;
+mod compute;
+mod render;
+
+/// Trait implemented by all versioned queues.
+pub(crate) trait Queue: Send + Sync {
+ fn submit(
+ &mut self,
+ id: u64,
+ in_syncs: Vec<file::SyncItem>,
+ out_syncs: Vec<file::SyncItem>,
+ result_buf: Option<gem::ObjectRef>,
+ commands: Vec<bindings::drm_asahi_command>,
+ ) -> Result;
+}
+
+#[versions(AGX)]
+struct SubQueue {
+ wq: Arc<workqueue::WorkQueue::ver>,
+}
+
+#[versions(AGX)]
+impl SubQueue::ver {
+ fn new_job(&mut self) -> SubQueueJob::ver {
+ SubQueueJob::ver {
+ wq: self.wq.clone(),
+ job: None,
+ }
+ }
+}
+
+#[versions(AGX)]
+struct SubQueueJob {
+ wq: Arc<workqueue::WorkQueue::ver>,
+ job: Option<workqueue::Job::ver>,
+}
+
+#[versions(AGX)]
+impl SubQueueJob::ver {
+ fn get(&mut self) -> Result<&mut workqueue::Job::ver> {
+ if self.job.is_none() {
+ mod_pr_debug!("SubQueueJob: Creating {:?} job\n", self.wq.pipe_type());
+ self.job.replace(self.wq.new_job()?);
+ }
+ Ok(self.job.as_mut().expect("expected a Job"))
+ }
+
+ fn commit(&mut self) -> Result {
+ match self.job.as_mut() {
+ Some(job) => job.commit(),
+ None => Ok(()),
+ }
+ }
+
+ fn can_submit(&self) -> bool {
+ match self.job.as_ref() {
+ None => true,
+ Some(job) => job.can_submit(),
+ }
+ }
+}
+
+#[versions(AGX)]
+pub(crate) struct Queue {
+ dev: AsahiDevice,
+ _sched: sched::Scheduler<QueueJob::ver>,
+ entity: sched::Entity<QueueJob::ver>,
+ vm: mmu::Vm,
+ ualloc: Arc<Mutex<alloc::DefaultAllocator>>,
+ q_vtx: Option<SubQueue::ver>,
+ q_frag: Option<SubQueue::ver>,
+ q_comp: Option<SubQueue::ver>,
+ buffer: Option<Mutex<buffer::Buffer::ver>>,
+ gpu_context: Arc<workqueue::GpuContext>,
+ notifier_list: Arc<GpuObject<fw::event::NotifierList>>,
+ notifier: Arc<GpuObject<fw::event::Notifier::ver>>,
+ id: u64,
+ fence_ctx: FenceContexts,
+ #[ver(V >= V13_0B4)]
+ counter: AtomicU64,
+}
+
+#[versions(AGX)]
+#[derive(Default)]
+pub(crate) struct JobFence {
+ id: u64,
+ pending: AtomicU64,
+}
+
+#[versions(AGX)]
+impl JobFence::ver {
+ fn add_command(self: &FenceObject<Self>) {
+ self.pending.fetch_add(1, Ordering::Relaxed);
+ }
+
+ fn command_complete(self: &FenceObject<Self>) {
+ let remain = self.pending.fetch_sub(1, Ordering::Relaxed) - 1;
+ mod_pr_debug!(
+ "JobFence[{}]: Command complete (remain: {})\n",
+ self.id,
+ remain
+ );
+ if remain == 0 {
+ mod_pr_debug!("JobFence[{}]: Signaling\n", self.id);
+ if self.signal().is_err() {
+ pr_err!("JobFence[{}]: Fence signal failed\n", self.id);
+ }
+ }
+ }
+}
+
+#[versions(AGX)]
+#[vtable]
+impl dma_fence::FenceOps for JobFence::ver {
+ const USE_64BIT_SEQNO: bool = true;
+
+ fn get_driver_name<'a>(self: &'a FenceObject<Self>) -> &'a CStr {
+ c_str!("asahi")
+ }
+ fn get_timeline_name<'a>(self: &'a FenceObject<Self>) -> &'a CStr {
+ c_str!("queue")
+ }
+}
+
+#[versions(AGX)]
+pub(crate) struct QueueJob {
+ dev: AsahiDevice,
+ vm_bind: mmu::VmBind,
+ op_guard: Option<gpu::OpGuard>,
+ sj_vtx: Option<SubQueueJob::ver>,
+ sj_frag: Option<SubQueueJob::ver>,
+ sj_comp: Option<SubQueueJob::ver>,
+ fence: UserFence<JobFence::ver>,
+ did_run: bool,
+ id: u64,
+}
+
+#[versions(AGX)]
+impl QueueJob::ver {
+ fn get_vtx(&mut self) -> Result<&mut workqueue::Job::ver> {
+ self.sj_vtx.as_mut().ok_or(EINVAL)?.get()
+ }
+ fn get_frag(&mut self) -> Result<&mut workqueue::Job::ver> {
+ self.sj_frag.as_mut().ok_or(EINVAL)?.get()
+ }
+ fn get_comp(&mut self) -> Result<&mut workqueue::Job::ver> {
+ self.sj_comp.as_mut().ok_or(EINVAL)?.get()
+ }
+
+ fn commit(&mut self) -> Result {
+ mod_dev_dbg!(self.dev, "QueueJob: Committing\n");
+
+ self.sj_vtx.as_mut().map(|a| a.commit()).unwrap_or(Ok(()))?;
+ self.sj_frag
+ .as_mut()
+ .map(|a| a.commit())
+ .unwrap_or(Ok(()))?;
+ self.sj_comp.as_mut().map(|a| a.commit()).unwrap_or(Ok(()))
+ }
+}
+
+#[versions(AGX)]
+impl sched::JobImpl for QueueJob::ver {
+ fn can_run(job: &mut sched::Job<Self>) -> bool {
+ mod_dev_dbg!(job.dev, "QueueJob {}: Checking runnability\n", job.id);
+
+ if let Some(sj) = job.sj_vtx.as_ref() {
+ if !sj.can_submit() {
+ mod_dev_dbg!(
+ job.dev,
+ "QueueJob {}: Blocking due to vertex queue full\n",
+ job.id
+ );
+ return false;
+ }
+ }
+ if let Some(sj) = job.sj_frag.as_ref() {
+ if !sj.can_submit() {
+ mod_dev_dbg!(
+ job.dev,
+ "QueueJob {}: Blocking due to fragment queue full\n",
+ job.id
+ );
+ return false;
+ }
+ }
+ if let Some(sj) = job.sj_comp.as_ref() {
+ if !sj.can_submit() {
+ mod_dev_dbg!(
+ job.dev,
+ "QueueJob {}: Blocking due to compute queue full\n",
+ job.id
+ );
+ return false;
+ }
+ }
+ true
+ }
+
+ #[allow(unused_assignments)]
+ fn run(job: &mut sched::Job<Self>) -> Result<Option<dma_fence::Fence>> {
+ mod_dev_dbg!(job.dev, "QueueJob {}: Running Job\n", job.id);
+
+ let dev = job.dev.data();
+ let gpu = match dev
+ .gpu
+ .clone()
+ .arc_as_any()
+ .downcast::<gpu::GpuManager::ver>()
+ {
+ Ok(gpu) => gpu,
+ Err(_) => {
+ dev_crit!(job.dev, "GpuManager mismatched with QueueJob!\n");
+ return Err(EIO);
+ }
+ };
+
+ if job.op_guard.is_none() {
+ job.op_guard = Some(gpu.start_op()?);
+ }
+
+ // First submit all the commands for each queue. This can fail.
+
+ let mut frag_job = None;
+ let mut frag_sub = None;
+ if let Some(sj) = job.sj_frag.as_mut() {
+ frag_job = sj.job.take();
+ if let Some(wqjob) = frag_job.as_mut() {
+ mod_dev_dbg!(job.dev, "QueueJob {}: Submit fragment\n", job.id);
+ frag_sub = Some(wqjob.submit()?);
+ }
+ }
+
+ let mut vtx_job = None;
+ let mut vtx_sub = None;
+ if let Some(sj) = job.sj_vtx.as_mut() {
+ vtx_job = sj.job.take();
+ if let Some(wqjob) = vtx_job.as_mut() {
+ mod_dev_dbg!(job.dev, "QueueJob {}: Submit vertex\n", job.id);
+ vtx_sub = Some(wqjob.submit()?);
+ }
+ }
+
+ let mut comp_job = None;
+ let mut comp_sub = None;
+ if let Some(sj) = job.sj_comp.as_mut() {
+ comp_job = sj.job.take();
+ if let Some(wqjob) = comp_job.as_mut() {
+ mod_dev_dbg!(job.dev, "QueueJob {}: Submit compute\n", job.id);
+ comp_sub = Some(wqjob.submit()?);
+ }
+ }
+
+ // Now we fully commit to running the job
+ mod_dev_dbg!(job.dev, "QueueJob {}: Run fragment\n", job.id);
+ frag_sub.map(|a| gpu.run_job(a)).transpose()?;
+
+ mod_dev_dbg!(job.dev, "QueueJob {}: Run vertex\n", job.id);
+ vtx_sub.map(|a| gpu.run_job(a)).transpose()?;
+
+ mod_dev_dbg!(job.dev, "QueueJob {}: Run compute\n", job.id);
+ comp_sub.map(|a| gpu.run_job(a)).transpose()?;
+
+ mod_dev_dbg!(job.dev, "QueueJob {}: Drop compute job\n", job.id);
+ core::mem::drop(comp_job);
+ mod_dev_dbg!(job.dev, "QueueJob {}: Drop vertex job\n", job.id);
+ core::mem::drop(vtx_job);
+ mod_dev_dbg!(job.dev, "QueueJob {}: Drop fragment job\n", job.id);
+ core::mem::drop(frag_job);
+
+ job.did_run = true;
+
+ Ok(Some(Fence::from_fence(&job.fence)))
+ }
+
+ fn timed_out(job: &mut sched::Job<Self>) -> sched::Status {
+ // FIXME: Handle timeouts properly
+ dev_err!(
+ job.dev,
+ "QueueJob {}: Job timed out on the DRM scheduler, things will probably break (ran: {})\n",
+ job.id, job.did_run
+ );
+ sched::Status::NoDevice
+ }
+}
+
+#[versions(AGX)]
+impl Drop for QueueJob::ver {
+ fn drop(&mut self) {
+ mod_dev_dbg!(self.dev, "QueueJob {}: Dropping\n", self.id);
+ }
+}
+
+struct ResultWriter {
+ vmap: VMap<gem::DriverObject>,
+ offset: usize,
+ len: usize,
+}
+
+impl ResultWriter {
+ fn write<T>(&mut self, mut value: T) {
+ let p: *mut u8 = &mut value as *mut _ as *mut u8;
+ // SAFETY: We know `p` points to a type T of that size, and UAPI types must have
+ // no padding and all bit patterns valid.
+ let slice = unsafe { core::slice::from_raw_parts_mut(p, core::mem::size_of::<T>()) };
+ let len = slice.len().min(self.len);
+ self.vmap.as_mut_slice()[self.offset..self.offset + len].copy_from_slice(&slice[..len]);
+ }
+}
+
+static QUEUE_NAME: &CStr = c_str!("asahi_fence");
+static QUEUE_CLASS_KEY: kernel::sync::LockClassKey = kernel::sync::LockClassKey::new();
+
+#[versions(AGX)]
+impl Queue::ver {
+ /// Create a new user queue.
+ #[allow(clippy::too_many_arguments)]
+ pub(crate) fn new(
+ dev: &AsahiDevice,
+ vm: mmu::Vm,
+ alloc: &mut gpu::KernelAllocators,
+ ualloc: Arc<Mutex<alloc::DefaultAllocator>>,
+ ualloc_priv: Arc<Mutex<alloc::DefaultAllocator>>,
+ event_manager: Arc<event::EventManager>,
+ mgr: &buffer::BufferManager,
+ id: u64,
+ priority: u32,
+ caps: u32,
+ ) -> Result<Queue::ver> {
+ mod_dev_dbg!(dev, "[Queue {}] Creating queue\n", id);
+
+ let data = dev.data();
+
+ let mut notifier_list = alloc.private.new_default::<fw::event::NotifierList>()?;
+
+ let self_ptr = notifier_list.weak_pointer();
+ notifier_list.with_mut(|raw, _inner| {
+ raw.list_head.next = Some(inner_weak_ptr!(self_ptr, list_head));
+ });
+
+ let threshold = alloc.shared.new_default::<fw::event::Threshold>()?;
+
+ let notifier: Arc<GpuObject<fw::event::Notifier::ver>> =
+ Arc::try_new(alloc.private.new_inplace(
+ fw::event::Notifier::ver { threshold },
+ |inner, ptr: &mut MaybeUninit<fw::event::raw::Notifier::ver<'_>>| {
+ Ok(place!(
+ ptr,
+ fw::event::raw::Notifier::ver {
+ threshold: inner.threshold.gpu_pointer(),
+ generation: AtomicU32::new(id as u32),
+ cur_count: AtomicU32::new(0),
+ unk_10: AtomicU32::new(0x50),
+ state: Default::default()
+ }
+ ))
+ },
+ )?)?;
+
+ let sched = sched::Scheduler::new(dev, WQ_SIZE, 0, 100000, c_str!("asahi_sched"))?;
+ // Priorities are handled by the AGX scheduler, there is no meaning within a
+ // per-queue scheduler.
+ let entity = sched::Entity::new(&sched, sched::Priority::Normal)?;
+
+ let mut ret = Queue::ver {
+ dev: dev.clone(),
+ _sched: sched,
+ entity,
+ vm,
+ ualloc,
+ q_vtx: None,
+ q_frag: None,
+ q_comp: None,
+ buffer: None,
+ gpu_context: Arc::try_new(workqueue::GpuContext::new(dev, alloc)?)?,
+ notifier_list: Arc::try_new(notifier_list)?,
+ notifier,
+ id,
+ fence_ctx: FenceContexts::new(1, QUEUE_NAME, &QUEUE_CLASS_KEY)?,
+ #[ver(V >= V13_0B4)]
+ counter: AtomicU64::new(0),
+ };
+
+ // Rendering structures
+ if caps & bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_RENDER != 0 {
+ let buffer =
+ buffer::Buffer::ver::new(&*data.gpu, alloc, ret.ualloc.clone(), ualloc_priv, mgr)?;
+ let tvb_blocks = {
+ let lock = crate::THIS_MODULE.kernel_param_lock();
+ *crate::initial_tvb_size.read(&lock)
+ };
+
+ buffer.ensure_blocks(tvb_blocks)?;
+
+ ret.buffer = Some(Mutex::new(buffer));
+ ret.q_vtx = Some(SubQueue::ver {
+ wq: workqueue::WorkQueue::ver::new(
+ alloc,
+ event_manager.clone(),
+ ret.gpu_context.clone(),
+ ret.notifier_list.clone(),
+ channel::PipeType::Vertex,
+ id,
+ priority,
+ WQ_SIZE,
+ )?,
+ });
+ }
+
+ // Rendering & blit structures
+ if caps
+ & (bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_RENDER
+ | bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_BLIT)
+ != 0
+ {
+ ret.q_frag = Some(SubQueue::ver {
+ wq: workqueue::WorkQueue::ver::new(
+ alloc,
+ event_manager.clone(),
+ ret.gpu_context.clone(),
+ ret.notifier_list.clone(),
+ channel::PipeType::Fragment,
+ id,
+ priority,
+ WQ_SIZE,
+ )?,
+ });
+ }
+
+ // Compute structures
+ if caps & bindings::drm_asahi_queue_cap_DRM_ASAHI_QUEUE_CAP_COMPUTE != 0 {
+ ret.q_comp = Some(SubQueue::ver {
+ wq: workqueue::WorkQueue::ver::new(
+ alloc,
+ event_manager,
+ ret.gpu_context.clone(),
+ ret.notifier_list.clone(),
+ channel::PipeType::Compute,
+ id,
+ priority,
+ WQ_SIZE,
+ )?,
+ });
+ }
+
+ mod_dev_dbg!(dev, "[Queue {}] Queue created\n", id);
+ Ok(ret)
+ }
+}
+
+const SQ_RENDER: usize = bindings::drm_asahi_subqueue_DRM_ASAHI_SUBQUEUE_RENDER as usize;
+const SQ_COMPUTE: usize = bindings::drm_asahi_subqueue_DRM_ASAHI_SUBQUEUE_COMPUTE as usize;
+const SQ_COUNT: usize = bindings::drm_asahi_subqueue_DRM_ASAHI_SUBQUEUE_COUNT as usize;
+
+#[versions(AGX)]
+impl Queue for Queue::ver {
+ fn submit(
+ &mut self,
+ id: u64,
+ in_syncs: Vec<file::SyncItem>,
+ out_syncs: Vec<file::SyncItem>,
+ result_buf: Option<gem::ObjectRef>,
+ commands: Vec<bindings::drm_asahi_command>,
+ ) -> Result {
+ let dev = self.dev.data();
+ let gpu = match dev
+ .gpu
+ .clone()
+ .arc_as_any()
+ .downcast::<gpu::GpuManager::ver>()
+ {
+ Ok(gpu) => gpu,
+ Err(_) => {
+ dev_crit!(self.dev, "GpuManager mismatched with JobImpl!\n");
+ return Err(EIO);
+ }
+ };
+
+ mod_dev_dbg!(self.dev, "[Submission {}] Submit job\n", id);
+
+ if gpu.is_crashed() {
+ dev_err!(
+ self.dev,
+ "[Submission {}] GPU is crashed, cannot submit\n",
+ id
+ );
+ return Err(ENODEV);
+ }
+
+ // Empty submissions are not legal
+ if commands.is_empty() {
+ return Err(EINVAL);
+ }
+
+ let op_guard = if !in_syncs.is_empty() {
+ Some(gpu.start_op()?)
+ } else {
+ None
+ };
+
+ let mut events: [Vec<Option<workqueue::QueueEventInfo::ver>>; SQ_COUNT] =
+ Default::default();
+
+ events[SQ_RENDER].try_push(self.q_frag.as_ref().and_then(|a| a.wq.event_info()))?;
+ events[SQ_COMPUTE].try_push(self.q_comp.as_ref().and_then(|a| a.wq.event_info()))?;
+
+ let vm_bind = gpu.bind_vm(&self.vm)?;
+ let vm_slot = vm_bind.slot();
+
+ mod_dev_dbg!(self.dev, "[Submission {}] Creating job\n", id);
+ let mut job = self.entity.new_job(QueueJob::ver {
+ dev: self.dev.clone(),
+ vm_bind,
+ op_guard,
+ sj_vtx: self.q_vtx.as_mut().map(|a| a.new_job()),
+ sj_frag: self.q_frag.as_mut().map(|a| a.new_job()),
+ sj_comp: self.q_comp.as_mut().map(|a| a.new_job()),
+ fence: self
+ .fence_ctx
+ .new_fence::<JobFence::ver>(
+ 0,
+ JobFence::ver {
+ id,
+ pending: Default::default(),
+ },
+ )?
+ .into(),
+ did_run: false,
+ id,
+ })?;
+
+ mod_dev_dbg!(
+ self.dev,
+ "[Submission {}] Adding {} in_syncs\n",
+ id,
+ in_syncs.len()
+ );
+ for sync in in_syncs {
+ job.add_dependency(sync.fence.expect("in_sync missing fence"))?;
+ }
+
+ let mut last_render = None;
+ let mut last_compute = None;
+
+ for (i, cmd) in commands.iter().enumerate() {
+ match cmd.cmd_type {
+ bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_RENDER => last_render = Some(i),
+ bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_COMPUTE => last_compute = Some(i),
+ _ => return Err(EINVAL),
+ }
+ }
+
+ mod_dev_dbg!(
+ self.dev,
+ "[Submission {}] Submitting {} commands\n",
+ id,
+ commands.len()
+ );
+ for (i, cmd) in commands.into_iter().enumerate() {
+ for (queue_idx, index) in cmd.barriers.iter().enumerate() {
+ if *index == bindings::DRM_ASAHI_BARRIER_NONE as u32 {
+ continue;
+ }
+ if let Some(event) = events[queue_idx].get(*index as usize).ok_or(EINVAL)? {
+ let mut alloc = gpu.alloc();
+ let queue_job = match cmd.cmd_type {
+ bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_RENDER => job.get_vtx()?,
+ bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_COMPUTE => job.get_comp()?,
+ _ => return Err(EINVAL),
+ };
+ mod_dev_dbg!(self.dev, "[Submission {}] Create Explicit Barrier\n", id);
+ let barrier: GpuObject<fw::workqueue::Barrier> = alloc.private.new_inplace(
+ Default::default(),
+ |_inner, ptr: &mut MaybeUninit<fw::workqueue::raw::Barrier>| {
+ Ok(place!(
+ ptr,
+ fw::workqueue::raw::Barrier {
+ tag: fw::workqueue::CommandType::Barrier,
+ wait_stamp: event.fw_stamp_pointer,
+ wait_value: event.value,
+ wait_slot: event.slot,
+ stamp_self: queue_job.event_info().value.next(),
+ uuid: 0xffffbbbb,
+ unk: 0,
+ }
+ ))
+ },
+ )?;
+ mod_dev_dbg!(self.dev, "[Submission {}] Add Explicit Barrier\n", id);
+ queue_job.add(barrier, vm_slot)?;
+ } else {
+ assert!(*index == 0);
+ }
+ }
+
+ let result_writer = match result_buf.as_ref() {
+ None => {
+ if cmd.result_offset != 0 || cmd.result_size != 0 {
+ return Err(EINVAL);
+ }
+ None
+ }
+ Some(buf) => {
+ if cmd.result_size != 0 {
+ if cmd
+ .result_offset
+ .checked_add(cmd.result_size)
+ .ok_or(EINVAL)?
+ > buf.size() as u64
+ {
+ return Err(EINVAL);
+ }
+ Some(ResultWriter {
+ vmap: buf.gem.vmap()?,
+ offset: cmd.result_offset.try_into()?,
+ len: cmd.result_size.try_into()?,
+ })
+ } else {
+ None
+ }
+ }
+ };
+
+ match cmd.cmd_type {
+ bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_RENDER => {
+ self.submit_render(
+ &mut job,
+ &cmd,
+ result_writer,
+ id,
+ last_render.unwrap() == i,
+ )?;
+ events[SQ_RENDER].try_push(Some(
+ job.sj_frag
+ .as_ref()
+ .expect("No frag queue?")
+ .job
+ .as_ref()
+ .expect("No frag job?")
+ .event_info(),
+ ))?;
+ }
+ bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_COMPUTE => {
+ self.submit_compute(
+ &mut job,
+ &cmd,
+ result_writer,
+ id,
+ last_compute.unwrap() == i,
+ )?;
+ events[SQ_COMPUTE].try_push(Some(
+ job.sj_comp
+ .as_ref()
+ .expect("No comp queue?")
+ .job
+ .as_ref()
+ .expect("No comp job?")
+ .event_info(),
+ ))?;
+ }
+ _ => return Err(EINVAL),
+ }
+ }
+
+ mod_dev_dbg!(self.dev, "Queue: Committing job\n");
+ job.commit()?;
+
+ mod_dev_dbg!(self.dev, "Queue: Arming job\n");
+ let job = job.arm();
+ let out_fence = job.fences().finished();
+ mod_dev_dbg!(self.dev, "Queue: Pushing job\n");
+ job.push();
+
+ mod_dev_dbg!(self.dev, "Queue: Adding {} out_syncs\n", out_syncs.len());
+ for mut sync in out_syncs {
+ if let Some(chain) = sync.chain_fence.take() {
+ sync.syncobj
+ .add_point(chain, &out_fence, sync.timeline_value);
+ } else {
+ sync.syncobj.replace_fence(Some(&out_fence));
+ }
+ }
+
+ Ok(())
+ }
+}
+
+#[versions(AGX)]
+impl Drop for Queue::ver {
+ fn drop(&mut self) {
+ mod_dev_dbg!(self.dev, "[Queue {}] Dropping queue\n", self.id);
+ }
+}
diff --git a/drivers/gpu/drm/asahi/queue/render.rs b/drivers/gpu/drm/asahi/queue/render.rs
new file mode 100644
index 000000000000..318c952df020
--- /dev/null
+++ b/drivers/gpu/drm/asahi/queue/render.rs
@@ -0,0 +1,1173 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+#![allow(clippy::unusual_byte_groupings)]
+
+//! Render work queue.
+//!
+//! A render queue consists of two underlying WorkQueues, one for vertex and one for fragment work.
+//! This module is in charge of creating all of the firmware structures required to submit 3D
+//! rendering work to the GPU, based on the userspace command buffer.
+
+use super::common;
+use crate::alloc::Allocator;
+use crate::debug::*;
+use crate::fw::types::*;
+use crate::gpu::GpuManager;
+use crate::util::*;
+use crate::workqueue::WorkError;
+use crate::{box_in_place, inner_ptr, inner_weak_ptr, place};
+use crate::{buffer, fw, gpu, microseq, workqueue};
+use core::mem::MaybeUninit;
+use core::sync::atomic::Ordering;
+use kernel::bindings;
+use kernel::dma_fence::RawDmaFence;
+use kernel::drm::sched::Job;
+use kernel::io_buffer::IoBufferReader;
+use kernel::prelude::*;
+use kernel::sync::{smutex::Mutex, Arc};
+use kernel::user_ptr::UserSlicePtr;
+
+const DEBUG_CLASS: DebugFlags = DebugFlags::Render;
+
+/// Tiling/Vertex control bit to disable using more than one GPU cluster. This results in decreased
+/// throughput but also less latency, which is probably desirable for light vertex loads where the
+/// overhead of clustering/merging would exceed the time it takes to just run the job on one
+/// cluster.
+const TILECTL_DISABLE_CLUSTERING: u32 = 1u32 << 0;
+
+struct RenderResult {
+ result: bindings::drm_asahi_result_render,
+ vtx_complete: bool,
+ frag_complete: bool,
+ vtx_error: Option<workqueue::WorkError>,
+ frag_error: Option<workqueue::WorkError>,
+ writer: super::ResultWriter,
+}
+
+impl RenderResult {
+ fn commit(&mut self) {
+ if !self.vtx_complete || !self.frag_complete {
+ return;
+ }
+
+ let mut error = self.vtx_error.take();
+ if let Some(frag_error) = self.frag_error.take() {
+ if error.is_none() || error == Some(WorkError::Killed) {
+ error = Some(frag_error);
+ }
+ }
+
+ if let Some(err) = error {
+ self.result.info = err.into();
+ } else {
+ self.result.info.status = bindings::drm_asahi_status_DRM_ASAHI_STATUS_COMPLETE;
+ }
+
+ self.writer.write(self.result);
+ }
+}
+
+#[versions(AGX)]
+impl super::Queue::ver {
+ /// Get the appropriate tiling parameters for a given userspace command buffer.
+ fn get_tiling_params(
+ cmdbuf: &bindings::drm_asahi_cmd_render,
+ num_clusters: u32,
+ ) -> Result<buffer::TileInfo> {
+ let width: u32 = cmdbuf.fb_width;
+ let height: u32 = cmdbuf.fb_height;
+ let layers: u32 = cmdbuf.layers;
+
+ if width > 65536 || height > 65536 {
+ return Err(EINVAL);
+ }
+
+ if layers == 0 || layers > 2048 {
+ return Err(EINVAL);
+ }
+
+ let tile_width = 32u32;
+ let tile_height = 32u32;
+
+ let utile_width = cmdbuf.utile_width;
+ let utile_height = cmdbuf.utile_height;
+
+ match (utile_width, utile_height) {
+ (32, 32) | (32, 16) | (16, 16) => (),
+ _ => return Err(EINVAL),
+ };
+
+ let utiles_per_tile_x = tile_width / utile_width;
+ let utiles_per_tile_y = tile_height / utile_height;
+
+ let utiles_per_tile = utiles_per_tile_x * utiles_per_tile_y;
+
+ let tiles_x = (width + tile_width - 1) / tile_width;
+ let tiles_y = (height + tile_height - 1) / tile_height;
+ let tiles = tiles_x * tiles_y;
+
+ let mtiles_x = 4u32;
+ let mtiles_y = 4u32;
+ let mtiles = mtiles_x * mtiles_y;
+
+ // TODO: *samples
+ let tiles_per_mtile_x = align(div_ceil(tiles_x, mtiles_x), 4);
+ let tiles_per_mtile_y = align(div_ceil(tiles_y, mtiles_y), 4);
+ let tiles_per_mtile = tiles_per_mtile_x * tiles_per_mtile_y;
+
+ let mtile_x1 = tiles_per_mtile_x;
+ let mtile_x2 = 2 * tiles_per_mtile_x;
+ let mtile_x3 = 3 * tiles_per_mtile_x;
+
+ let mtile_y1 = tiles_per_mtile_y;
+ let mtile_y2 = 2 * tiles_per_mtile_y;
+ let mtile_y3 = 3 * tiles_per_mtile_y;
+
+ let rgn_entry_size = 5;
+ // Macrotile stride in 32-bit words
+ let rgn_size = align(rgn_entry_size * tiles_per_mtile * utiles_per_tile, 4) / 4;
+ let tilemap_size = (4 * rgn_size * mtiles * layers) as usize;
+
+ let tpc_entry_size = 8;
+ // TPC stride in 32-bit words
+ let tpc_mtile_stride = tpc_entry_size * utiles_per_tile * tiles_per_mtile / 4;
+ let tpc_size = (num_clusters * (4 * tpc_mtile_stride * mtiles) * layers) as usize;
+
+ // No idea where this comes from, but it fits what macOS does...
+ // TODO: layers?
+ let meta1_blocks = if num_clusters > 1 {
+ div_ceil(align(tiles_x, 2) * align(tiles_y, 4), 0x1980)
+ } else {
+ 0
+ };
+
+ let min_tvb_blocks =
+ div_ceil(tiles_x * tiles_y, 128).max(if num_clusters > 1 { 9 } else { 8 }) as usize;
+
+ // Sometimes clustering seems to use twice the cluster tilemap count
+ // and twice the meta4 size. TODO: Is this random or can we calculate
+ // it somehow??? Does it go higher???
+ let cluster_factor = 2;
+
+ Ok(buffer::TileInfo {
+ tiles_x,
+ tiles_y,
+ tiles,
+ utile_width,
+ utile_height,
+ //mtiles_x,
+ //mtiles_y,
+ tiles_per_mtile_x,
+ tiles_per_mtile_y,
+ //tiles_per_mtile,
+ utiles_per_mtile_x: tiles_per_mtile_x * utiles_per_tile_x,
+ utiles_per_mtile_y: tiles_per_mtile_y * utiles_per_tile_y,
+ //utiles_per_mtile: tiles_per_mtile * utiles_per_tile,
+ tilemap_size,
+ tpc_size,
+ meta1_blocks,
+ min_tvb_blocks,
+ cluster_factor,
+ params: fw::vertex::raw::TilingParameters {
+ rgn_size,
+ unk_4: 0x88,
+ ppp_ctrl: cmdbuf.ppp_ctrl,
+ x_max: (width - 1) as u16,
+ y_max: (height - 1) as u16,
+ te_screen: ((tiles_y - 1) << 12) | (tiles_x - 1),
+ te_mtile1: mtile_x3 | (mtile_x2 << 9) | (mtile_x1 << 18),
+ te_mtile2: mtile_y3 | (mtile_y2 << 9) | (mtile_y1 << 18),
+ tiles_per_mtile,
+ tpc_stride: tpc_mtile_stride,
+ unk_24: 0x100,
+ unk_28: if layers > 1 {
+ 0xe000 | (layers - 1)
+ } else {
+ 0x8000
+ },
+ },
+ })
+ }
+
+ /// Submit work to a render queue.
+ pub(super) fn submit_render(
+ &self,
+ job: &mut Job<super::QueueJob::ver>,
+ cmd: &bindings::drm_asahi_command,
+ result_writer: Option<super::ResultWriter>,
+ id: u64,
+ flush_stamps: bool,
+ ) -> Result {
+ if cmd.cmd_type != bindings::drm_asahi_cmd_type_DRM_ASAHI_CMD_RENDER {
+ return Err(EINVAL);
+ }
+
+ mod_dev_dbg!(self.dev, "[Submission {}] Render!\n", id);
+
+ let mut cmdbuf_reader = unsafe {
+ UserSlicePtr::new(
+ cmd.cmd_buffer as usize as *mut _,
+ core::mem::size_of::<bindings::drm_asahi_cmd_render>(),
+ )
+ .reader()
+ };
+
+ let mut cmdbuf: MaybeUninit<bindings::drm_asahi_cmd_render> = MaybeUninit::uninit();
+ unsafe {
+ cmdbuf_reader.read_raw(
+ cmdbuf.as_mut_ptr() as *mut u8,
+ core::mem::size_of::<bindings::drm_asahi_cmd_render>(),
+ )?;
+ }
+ let cmdbuf = unsafe { cmdbuf.assume_init() };
+
+ if cmdbuf.flags
+ & !(bindings::ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES
+ | bindings::ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S
+ | bindings::ASAHI_RENDER_MEMORYLESS_RTS_USED
+ | bindings::ASAHI_RENDER_PROCESS_EMPTY_TILES
+ | bindings::ASAHI_RENDER_NO_VERTEX_CLUSTERING) as u64
+ != 0
+ {
+ return Err(EINVAL);
+ }
+
+ if cmdbuf.flags & bindings::ASAHI_RENDER_MEMORYLESS_RTS_USED as u64 != 0 {
+ // Not supported yet
+ return Err(EINVAL);
+ }
+
+ if cmdbuf.fb_width == 0
+ || cmdbuf.fb_height == 0
+ || cmdbuf.fb_width > 16384
+ || cmdbuf.fb_height > 16384
+ {
+ mod_dev_dbg!(
+ self.dev,
+ "[Submission {}] Invalid dimensions {}x{}\n",
+ id,
+ cmdbuf.fb_width,
+ cmdbuf.fb_height
+ );
+ return Err(EINVAL);
+ }
+
+ let dev = self.dev.data();
+ let gpu = match dev.gpu.as_any().downcast_ref::<gpu::GpuManager::ver>() {
+ Some(gpu) => gpu,
+ None => {
+ dev_crit!(self.dev, "GpuManager mismatched with Queue!\n");
+ return Err(EIO);
+ }
+ };
+
+ let nclusters = gpu.get_dyncfg().id.num_clusters;
+
+ // Can be set to false to disable clustering (for simpler jobs), but then the
+ // core masks below should be adjusted to cover a single rolling cluster.
+ let mut clustering = nclusters > 1;
+
+ if debug_enabled(debug::DebugFlags::DisableClustering)
+ || cmdbuf.flags & bindings::ASAHI_RENDER_NO_VERTEX_CLUSTERING as u64 != 0
+ {
+ clustering = false;
+ }
+
+ #[ver(G < G14)]
+ let tiling_control = {
+ let render_cfg = gpu.get_cfg().render;
+ let mut tiling_control = render_cfg.tiling_control;
+
+ if !clustering {
+ tiling_control |= TILECTL_DISABLE_CLUSTERING;
+ }
+ tiling_control
+ };
+
+ let mut alloc = gpu.alloc();
+ let kalloc = &mut *alloc;
+
+ // This sequence number increases per new client/VM? assigned to some slot,
+ // but it's unclear *which* slot...
+ let slot_client_seq: u8 = (self.id & 0xff) as u8;
+
+ let tile_info = Self::get_tiling_params(&cmdbuf, if clustering { nclusters } else { 1 })?;
+
+ let buffer = self.buffer.as_ref().ok_or(EINVAL)?.lock();
+
+ let scene = Arc::try_new(buffer.new_scene(kalloc, &tile_info)?)?;
+
+ let notifier = self.notifier.clone();
+
+ let tvb_autogrown = buffer.auto_grow()?;
+ if tvb_autogrown {
+ let new_size = buffer.block_count() as usize;
+ cls_dev_dbg!(
+ TVBStats,
+ &self.dev,
+ "[Submission {}] TVB grew to {} bytes ({} blocks) due to overflows\n",
+ id,
+ new_size * buffer::BLOCK_SIZE,
+ new_size,
+ );
+ }
+
+ let tvb_grown = buffer.ensure_blocks(tile_info.min_tvb_blocks)?;
+ if tvb_grown {
+ cls_dev_dbg!(
+ TVBStats,
+ &self.dev,
+ "[Submission {}] TVB grew to {} bytes ({} blocks) due to dimensions ({}x{})\n",
+ id,
+ tile_info.min_tvb_blocks * buffer::BLOCK_SIZE,
+ tile_info.min_tvb_blocks,
+ cmdbuf.fb_width,
+ cmdbuf.fb_height
+ );
+ }
+
+ let vm_bind = job.vm_bind.clone();
+
+ mod_dev_dbg!(
+ self.dev,
+ "[Submission {}] VM slot = {}\n",
+ id,
+ vm_bind.slot()
+ );
+
+ let ev_vtx = job.get_vtx()?.event_info();
+ let ev_frag = job.get_frag()?.event_info();
+
+ mod_dev_dbg!(
+ self.dev,
+ "[Submission {}] Vert event #{} -> {:#x?}\n",
+ id,
+ ev_vtx.slot,
+ ev_vtx.value.next(),
+ );
+ mod_dev_dbg!(
+ self.dev,
+ "[Submission {}] Frag event #{} -> {:#x?}\n",
+ id,
+ ev_frag.slot,
+ ev_frag.value.next(),
+ );
+
+ let uuid_3d = cmdbuf.cmd_3d_id;
+ let uuid_ta = cmdbuf.cmd_ta_id;
+
+ mod_dev_dbg!(
+ self.dev,
+ "[Submission {}] Vert UUID = {:#x?}\n",
+ id,
+ uuid_ta
+ );
+ mod_dev_dbg!(
+ self.dev,
+ "[Submission {}] Frag UUID = {:#x?}\n",
+ id,
+ uuid_3d
+ );
+
+ let fence = job.fence.clone();
+ let frag_job = job.get_frag()?;
+
+ mod_dev_dbg!(self.dev, "[Submission {}] Create Barrier\n", id);
+ let barrier: GpuObject<fw::workqueue::Barrier> = kalloc.private.new_inplace(
+ Default::default(),
+ |_inner, ptr: &mut MaybeUninit<fw::workqueue::raw::Barrier>| {
+ Ok(place!(
+ ptr,
+ fw::workqueue::raw::Barrier {
+ tag: fw::workqueue::CommandType::Barrier,
+ wait_stamp: ev_vtx.fw_stamp_pointer,
+ wait_value: ev_vtx.value.next(),
+ wait_slot: ev_vtx.slot,
+ stamp_self: ev_frag.value.next(),
+ uuid: uuid_3d,
+ unk: 0,
+ }
+ ))
+ },
+ )?;
+
+ mod_dev_dbg!(self.dev, "[Submission {}] Add Barrier\n", id);
+ frag_job.add(barrier, vm_bind.slot())?;
+
+ let timestamps = Arc::try_new(kalloc.shared.new_default::<fw::job::RenderTimestamps>()?)?;
+
+ let unk1 = debug_enabled(debug::DebugFlags::Debug1);
+ let unk2 = debug_enabled(debug::DebugFlags::Debug2);
+ let unk3 = debug_enabled(debug::DebugFlags::Debug3);
+
+ let mut tile_config: u64 = 0;
+ if !unk1 {
+ tile_config |= 0x280;
+ }
+ if cmdbuf.layers > 1 {
+ tile_config |= 1;
+ }
+ if cmdbuf.flags & bindings::ASAHI_RENDER_PROCESS_EMPTY_TILES as u64 != 0 {
+ tile_config |= 0x10000;
+ }
+
+ let mut utile_config =
+ ((tile_info.utile_width / 16) << 12) | ((tile_info.utile_height / 16) << 14);
+ utile_config |= match cmdbuf.samples {
+ 1 => 0,
+ 2 => 1,
+ 4 => 2,
+ _ => return Err(EINVAL),
+ };
+
+ let frag_result = result_writer
+ .map(|writer| {
+ let mut result = RenderResult {
+ result: Default::default(),
+ vtx_complete: false,
+ frag_complete: false,
+ vtx_error: None,
+ frag_error: None,
+ writer,
+ };
+
+ if tvb_autogrown {
+ result.result.flags |= bindings::DRM_ASAHI_RESULT_RENDER_TVB_GROW_OVF as u64;
+ }
+ if tvb_grown {
+ result.result.flags |= bindings::DRM_ASAHI_RESULT_RENDER_TVB_GROW_MIN as u64;
+ }
+ result.result.tvb_size_bytes = buffer.size() as u64;
+
+ Arc::try_new(Mutex::new(result))
+ })
+ .transpose()?;
+
+ let vtx_result = frag_result.clone();
+
+ // TODO: check
+ #[ver(V >= V13_0B4)]
+ let count_frag = self.counter.fetch_add(2, Ordering::Relaxed);
+ #[ver(V >= V13_0B4)]
+ let count_vtx = count_frag + 1;
+
+ mod_dev_dbg!(self.dev, "[Submission {}] Create Frag\n", id);
+ let frag = GpuObject::new_prealloc(
+ kalloc.private.alloc_object()?,
+ |ptr: GpuWeakPointer<fw::fragment::RunFragment::ver>| {
+ let mut builder = microseq::Builder::new();
+
+ let stats = inner_weak_ptr!(
+ gpu.initdata.runtime_pointers.stats.frag.weak_pointer(),
+ stats
+ );
+
+ let start_frag = builder.add(microseq::StartFragment::ver {
+ header: microseq::op::StartFragment::HEADER,
+ job_params2: inner_weak_ptr!(ptr, job_params2),
+ job_params1: inner_weak_ptr!(ptr, job_params1),
+ scene: scene.gpu_pointer(),
+ stats,
+ busy_flag: inner_weak_ptr!(ptr, busy_flag),
+ tvb_overflow_count: inner_weak_ptr!(ptr, tvb_overflow_count),
+ unk_pointer: inner_weak_ptr!(ptr, unk_pointee),
+ work_queue: ev_frag.info_ptr,
+ work_item: ptr,
+ vm_slot: vm_bind.slot(),
+ unk_50: 0x1, // fixed
+ event_generation: self.id as u32,
+ buffer_slot: scene.slot(),
+ unk_5c: 0,
+ cmd_seq: U64(ev_frag.cmd_seq),
+ unk_68: 0,
+ unk_758_flag: inner_weak_ptr!(ptr, unk_758_flag),
+ unk_job_buf: inner_weak_ptr!(ptr, unk_buf_0),
+ unk_7c: 0,
+ unk_80: 0,
+ unk_84: 0,
+ uuid: uuid_3d,
+ attachments: common::build_attachments(
+ cmdbuf.attachments,
+ cmdbuf.attachment_count,
+ )?,
+ unk_190: 0,
+ #[ver(V >= V13_0B4)]
+ counter: U64(count_frag),
+ #[ver(V >= V13_0B4)]
+ notifier_buf: inner_weak_ptr!(notifier.weak_pointer(), state.unk_buf),
+ })?;
+
+ if frag_result.is_some() {
+ builder.add(microseq::Timestamp::ver {
+ header: microseq::op::Timestamp::new(true),
+ cur_ts: inner_weak_ptr!(ptr, cur_ts),
+ start_ts: inner_weak_ptr!(ptr, start_ts),
+ update_ts: inner_weak_ptr!(ptr, start_ts),
+ work_queue: ev_frag.info_ptr,
+ unk_24: U64(0),
+ #[ver(V >= V13_0B4)]
+ unk_ts: inner_weak_ptr!(ptr, unk_ts),
+ uuid: uuid_3d,
+ unk_30_padding: 0,
+ })?;
+ }
+
+ builder.add(microseq::WaitForIdle {
+ header: microseq::op::WaitForIdle::new(microseq::Pipe::Fragment),
+ })?;
+
+ if frag_result.is_some() {
+ builder.add(microseq::Timestamp::ver {
+ header: microseq::op::Timestamp::new(false),
+ cur_ts: inner_weak_ptr!(ptr, cur_ts),
+ start_ts: inner_weak_ptr!(ptr, start_ts),
+ update_ts: inner_weak_ptr!(ptr, end_ts),
+ work_queue: ev_frag.info_ptr,
+ unk_24: U64(0),
+ #[ver(V >= V13_0B4)]
+ unk_ts: inner_weak_ptr!(ptr, unk_ts),
+ uuid: uuid_3d,
+ unk_30_padding: 0,
+ })?;
+ }
+
+ let off = builder.offset_to(start_frag);
+ builder.add(microseq::FinalizeFragment::ver {
+ header: microseq::op::FinalizeFragment::HEADER,
+ uuid: uuid_3d,
+ unk_8: 0,
+ fw_stamp: ev_frag.fw_stamp_pointer,
+ stamp_value: ev_frag.value.next(),
+ unk_18: 0,
+ scene: scene.weak_pointer(),
+ buffer: scene.weak_buffer_pointer(),
+ unk_2c: U64(1),
+ stats,
+ unk_pointer: inner_weak_ptr!(ptr, unk_pointee),
+ busy_flag: inner_weak_ptr!(ptr, busy_flag),
+ work_queue: ev_frag.info_ptr,
+ work_item: ptr,
+ vm_slot: vm_bind.slot(),
+ unk_60: 0,
+ unk_758_flag: inner_weak_ptr!(ptr, unk_758_flag),
+ unk_6c: U64(0),
+ unk_74: U64(0),
+ unk_7c: U64(0),
+ unk_84: U64(0),
+ unk_8c: U64(0),
+ #[ver(G == G14 && V < V13_0B4)]
+ unk_8c_g14: U64(0),
+ restart_branch_offset: off,
+ unk_98: unk3.into(),
+ #[ver(V >= V13_0B4)]
+ unk_9c: Default::default(),
+ })?;
+
+ builder.add(microseq::RetireStamp {
+ header: microseq::op::RetireStamp::HEADER,
+ })?;
+
+ Ok(box_in_place!(fw::fragment::RunFragment::ver {
+ notifier: notifier.clone(),
+ scene: scene.clone(),
+ micro_seq: builder.build(&mut kalloc.private)?,
+ vm_bind: vm_bind.clone(),
+ aux_fb: self.ualloc.lock().array_empty(0x8000)?,
+ timestamps: timestamps.clone(),
+ })?)
+ },
+ |inner, ptr| {
+ let aux_fb_info = fw::fragment::raw::AuxFBInfo::ver {
+ iogpu_unk_214: cmdbuf.iogpu_unk_214,
+ unk2: 0,
+ width: cmdbuf.fb_width,
+ height: cmdbuf.fb_height,
+ #[ver(V >= V13_0B4)]
+ unk3: U64(0x100000),
+ };
+
+ Ok(place!(
+ ptr,
+ fw::fragment::raw::RunFragment::ver {
+ tag: fw::workqueue::CommandType::RunFragment,
+ #[ver(V >= V13_0B4)]
+ counter: U64(count_frag),
+ vm_slot: vm_bind.slot(),
+ unk_8: 0,
+ microsequence: inner.micro_seq.gpu_pointer(),
+ microsequence_size: inner.micro_seq.len() as u32,
+ notifier: inner.notifier.gpu_pointer(),
+ buffer: inner.scene.buffer_pointer(),
+ scene: inner.scene.gpu_pointer(),
+ unk_buffer_buf: inner.scene.kernel_buffer_pointer(),
+ tvb_tilemap: inner.scene.tvb_tilemap_pointer(),
+ ppp_multisamplectl: U64(cmdbuf.ppp_multisamplectl),
+ samples: cmdbuf.samples,
+ tiles_per_mtile_y: tile_info.tiles_per_mtile_y as u16,
+ tiles_per_mtile_x: tile_info.tiles_per_mtile_x as u16,
+ unk_50: U64(0),
+ unk_58: U64(0),
+ merge_upper_x: F32::from_bits(cmdbuf.merge_upper_x),
+ merge_upper_y: F32::from_bits(cmdbuf.merge_upper_y),
+ unk_68: U64(0),
+ tile_count: U64(tile_info.tiles as u64),
+ job_params1: fw::fragment::raw::JobParameters1::ver {
+ utile_config: utile_config,
+ unk_4: 0,
+ clear_pipeline: fw::fragment::raw::ClearPipelineBinding {
+ pipeline_bind: U64(cmdbuf.load_pipeline_bind as u64),
+ address: U64(cmdbuf.load_pipeline as u64),
+ },
+ ppp_multisamplectl: U64(cmdbuf.ppp_multisamplectl),
+ scissor_array: U64(cmdbuf.scissor_array),
+ depth_bias_array: U64(cmdbuf.depth_bias_array),
+ aux_fb_info: aux_fb_info,
+ depth_dimensions: U64(cmdbuf.depth_dimensions as u64),
+ visibility_result_buffer: U64(cmdbuf.visibility_result_buffer),
+ zls_ctrl: U64(cmdbuf.zls_ctrl),
+ #[ver(G >= G14)]
+ unk_58_g14_0: U64(0x4040404),
+ #[ver(G >= G14)]
+ unk_58_g14_8: U64(0),
+ depth_buffer_ptr1: U64(cmdbuf.depth_buffer_1),
+ depth_buffer_ptr2: U64(cmdbuf.depth_buffer_2),
+ stencil_buffer_ptr1: U64(cmdbuf.stencil_buffer_1),
+ stencil_buffer_ptr2: U64(cmdbuf.stencil_buffer_2),
+ #[ver(G >= G14)]
+ unk_68_g14_0: Default::default(),
+ unk_78: Default::default(),
+ depth_meta_buffer_ptr1: U64(cmdbuf.depth_meta_buffer_1),
+ unk_a0: Default::default(),
+ depth_meta_buffer_ptr2: U64(cmdbuf.depth_meta_buffer_2),
+ unk_b0: Default::default(),
+ stencil_meta_buffer_ptr1: U64(cmdbuf.stencil_meta_buffer_1),
+ unk_c0: Default::default(),
+ stencil_meta_buffer_ptr2: U64(cmdbuf.stencil_meta_buffer_2),
+ unk_d0: Default::default(),
+ tvb_tilemap: inner.scene.tvb_tilemap_pointer(),
+ tvb_heapmeta: inner.scene.tvb_heapmeta_pointer(),
+ mtile_stride_dwords: U64((4 * tile_info.params.rgn_size as u64) << 24),
+ tvb_heapmeta_2: inner.scene.tvb_heapmeta_pointer(),
+ tile_config: U64(tile_config),
+ aux_fb: inner.aux_fb.gpu_pointer(),
+ unk_108: Default::default(),
+ pipeline_base: U64(0x11_00000000),
+ unk_140: U64(0x8c60),
+ unk_148: U64(0x0),
+ unk_150: U64(0x0),
+ unk_158: U64(0x1c),
+ unk_160: U64(0),
+ unk_168_padding: Default::default(),
+ #[ver(V < V13_0B4)]
+ __pad0: Default::default(),
+ },
+ job_params2: fw::fragment::raw::JobParameters2 {
+ store_pipeline_bind: cmdbuf.store_pipeline_bind,
+ store_pipeline_addr: cmdbuf.store_pipeline,
+ unk_8: 0x0,
+ unk_c: 0x0,
+ merge_upper_x: F32::from_bits(cmdbuf.merge_upper_x),
+ merge_upper_y: F32::from_bits(cmdbuf.merge_upper_y),
+ unk_18: U64(0x0),
+ utiles_per_mtile_y: tile_info.utiles_per_mtile_y as u16,
+ utiles_per_mtile_x: tile_info.utiles_per_mtile_x as u16,
+ unk_24: 0x0,
+ tile_counts: ((tile_info.tiles_y - 1) << 12) | (tile_info.tiles_x - 1),
+ iogpu_unk_212: cmdbuf.iogpu_unk_212,
+ isp_bgobjdepth: cmdbuf.isp_bgobjdepth,
+ // TODO: does this flag need to be exposed to userspace?
+ isp_bgobjvals: cmdbuf.isp_bgobjvals | 0x400,
+ unk_38: 0x0,
+ unk_3c: 0x1,
+ unk_40: 0,
+ },
+ job_params3: fw::fragment::raw::JobParameters3::ver {
+ unk_44_padding: Default::default(),
+ depth_bias_array: fw::fragment::raw::ArrayAddr {
+ ptr: U64(cmdbuf.depth_bias_array),
+ unk_padding: U64(0),
+ },
+ scissor_array: fw::fragment::raw::ArrayAddr {
+ ptr: U64(cmdbuf.scissor_array),
+ unk_padding: U64(0),
+ },
+ visibility_result_buffer: U64(cmdbuf.visibility_result_buffer),
+ unk_118: U64(0x0),
+ unk_120: Default::default(),
+ unk_reload_pipeline: fw::fragment::raw::ClearPipelineBinding {
+ pipeline_bind: U64(cmdbuf.partial_reload_pipeline_bind as u64),
+ address: U64(cmdbuf.partial_reload_pipeline as u64),
+ },
+ unk_258: U64(0),
+ unk_260: U64(0),
+ unk_268: U64(0),
+ unk_270: U64(0),
+ reload_pipeline: fw::fragment::raw::ClearPipelineBinding {
+ pipeline_bind: U64(cmdbuf.partial_reload_pipeline_bind as u64),
+ address: U64(cmdbuf.partial_reload_pipeline as u64),
+ },
+ zls_ctrl: U64(cmdbuf.zls_ctrl),
+ unk_290: U64(0x0),
+ depth_buffer_ptr1: U64(cmdbuf.depth_buffer_1),
+ unk_2a0: U64(0x0),
+ unk_2a8: U64(0x0),
+ depth_buffer_ptr2: U64(cmdbuf.depth_buffer_2),
+ depth_buffer_ptr3: U64(cmdbuf.depth_buffer_3),
+ depth_meta_buffer_ptr3: U64(cmdbuf.depth_meta_buffer_3),
+ stencil_buffer_ptr1: U64(cmdbuf.stencil_buffer_1),
+ unk_2d0: U64(0x0),
+ unk_2d8: U64(0x0),
+ stencil_buffer_ptr2: U64(cmdbuf.stencil_buffer_2),
+ stencil_buffer_ptr3: U64(cmdbuf.stencil_buffer_3),
+ stencil_meta_buffer_ptr3: U64(cmdbuf.stencil_meta_buffer_3),
+ unk_2f8: Default::default(),
+ iogpu_unk_212: cmdbuf.iogpu_unk_212,
+ unk_30c: 0x0,
+ aux_fb_info: aux_fb_info,
+ unk_320_padding: Default::default(),
+ unk_partial_store_pipeline:
+ fw::fragment::raw::StorePipelineBinding::new(
+ cmdbuf.partial_store_pipeline_bind,
+ cmdbuf.partial_store_pipeline
+ ),
+ partial_store_pipeline: fw::fragment::raw::StorePipelineBinding::new(
+ cmdbuf.partial_store_pipeline_bind,
+ cmdbuf.partial_store_pipeline
+ ),
+ isp_bgobjdepth: cmdbuf.isp_bgobjdepth,
+ isp_bgobjvals: cmdbuf.isp_bgobjvals,
+ iogpu_unk_49: cmdbuf.iogpu_unk_49,
+ unk_37c: 0x0,
+ unk_380: U64(0x0),
+ unk_388: U64(0x0),
+ #[ver(V >= V13_0B4)]
+ unk_390_0: U64(0x0),
+ depth_dimensions: U64(cmdbuf.depth_dimensions as u64),
+ },
+ unk_758_flag: 0,
+ unk_75c_flag: 0,
+ unk_buf: Default::default(),
+ busy_flag: 0,
+ tvb_overflow_count: 0,
+ unk_878: 0,
+ encoder_params: fw::job::raw::EncoderParams {
+ unk_8: (cmdbuf.flags
+ & bindings::ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S as u64
+ != 0) as u32,
+ unk_c: 0x0, // fixed
+ unk_10: 0x0, // fixed
+ encoder_id: cmdbuf.encoder_id,
+ unk_18: 0x0, // fixed
+ iogpu_compute_unk44: 0xffffffff,
+ seq_buffer: inner.scene.seq_buf_pointer(),
+ unk_28: U64(0x0), // fixed
+ },
+ process_empty_tiles: (cmdbuf.flags
+ & bindings::ASAHI_RENDER_PROCESS_EMPTY_TILES as u64
+ != 0) as u32,
+ no_clear_pipeline_textures: (cmdbuf.flags
+ & bindings::ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES as u64
+ != 0) as u32,
+ unk_param: unk2.into(), // 1 for boot stuff?
+ unk_pointee: 0,
+ meta: fw::job::raw::JobMeta {
+ unk_4: 0,
+ stamp: ev_frag.stamp_pointer,
+ fw_stamp: ev_frag.fw_stamp_pointer,
+ stamp_value: ev_frag.value.next(),
+ stamp_slot: ev_frag.slot,
+ evctl_index: 0, // fixed
+ flush_stamps: flush_stamps as u32,
+ uuid: uuid_3d,
+ cmd_seq: ev_frag.cmd_seq as u32,
+ },
+ unk_after_meta: unk1.into(),
+ unk_buf_0: U64(0),
+ unk_buf_8: U64(0),
+ unk_buf_10: U64(1),
+ cur_ts: U64(0),
+ start_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), frag.start)),
+ end_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), frag.end)),
+ unk_914: 0,
+ unk_918: U64(0),
+ unk_920: 0,
+ client_sequence: slot_client_seq,
+ pad_925: Default::default(),
+ unk_928: 0,
+ unk_92c: 0,
+ #[ver(V >= V13_0B4)]
+ unk_ts: U64(0),
+ #[ver(V >= V13_0B4)]
+ unk_92d_8: Default::default(),
+ }
+ ))
+ },
+ )?;
+
+ mod_dev_dbg!(self.dev, "[Submission {}] Add Frag\n", id);
+ fence.add_command();
+
+ frag_job.add_cb(frag, vm_bind.slot(), move |cmd, error| {
+ if let Some(err) = error {
+ fence.set_error(err.into());
+ }
+ if let Some(mut res) = frag_result.as_ref().map(|a| a.lock()) {
+ cmd.timestamps.with(|raw, _inner| {
+ res.result.fragment_ts_start = raw.frag.start.load(Ordering::Relaxed);
+ res.result.fragment_ts_end = raw.frag.end.load(Ordering::Relaxed);
+ });
+ cmd.with(|raw, _inner| {
+ res.result.num_tvb_overflows = raw.tvb_overflow_count;
+ });
+ res.frag_error = error;
+ res.frag_complete = true;
+ res.commit();
+ }
+ fence.command_complete();
+ })?;
+
+ let fence = job.fence.clone();
+ let vtx_job = job.get_vtx()?;
+
+ if scene.rebind() || tvb_grown || tvb_autogrown {
+ mod_dev_dbg!(self.dev, "[Submission {}] Create Bind Buffer\n", id);
+ let bind_buffer = kalloc.private.new_inplace(
+ fw::buffer::InitBuffer::ver {
+ scene: scene.clone(),
+ },
+ |inner, ptr: &mut MaybeUninit<fw::buffer::raw::InitBuffer::ver<'_>>| {
+ Ok(place!(
+ ptr,
+ fw::buffer::raw::InitBuffer::ver {
+ tag: fw::workqueue::CommandType::InitBuffer,
+ vm_slot: vm_bind.slot(),
+ buffer_slot: inner.scene.slot(),
+ unk_c: 0,
+ block_count: buffer.block_count(),
+ buffer: inner.scene.buffer_pointer(),
+ stamp_value: ev_vtx.value.next(),
+ }
+ ))
+ },
+ )?;
+
+ mod_dev_dbg!(self.dev, "[Submission {}] Add Bind Buffer\n", id);
+ vtx_job.add(bind_buffer, vm_bind.slot())?;
+ }
+
+ mod_dev_dbg!(self.dev, "[Submission {}] Create Vertex\n", id);
+ let vtx = GpuObject::new_prealloc(
+ kalloc.private.alloc_object()?,
+ |ptr: GpuWeakPointer<fw::vertex::RunVertex::ver>| {
+ let mut builder = microseq::Builder::new();
+
+ let stats = inner_weak_ptr!(
+ gpu.initdata.runtime_pointers.stats.vtx.weak_pointer(),
+ stats
+ );
+
+ let start_vtx = builder.add(microseq::StartVertex::ver {
+ header: microseq::op::StartVertex::HEADER,
+ tiling_params: inner_weak_ptr!(ptr, tiling_params),
+ job_params1: inner_weak_ptr!(ptr, job_params1),
+ buffer: scene.weak_buffer_pointer(),
+ scene: scene.weak_pointer(),
+ stats,
+ work_queue: ev_vtx.info_ptr,
+ vm_slot: vm_bind.slot(),
+ unk_38: 1, // fixed
+ event_generation: self.id as u32,
+ buffer_slot: scene.slot(),
+ unk_44: 0,
+ cmd_seq: U64(ev_vtx.cmd_seq),
+ unk_50: 0,
+ unk_pointer: inner_weak_ptr!(ptr, unk_pointee),
+ unk_job_buf: inner_weak_ptr!(ptr, unk_buf_0),
+ unk_64: 0x0, // fixed
+ unk_68: unk1.into(),
+ uuid: uuid_ta,
+ unk_70: 0x0, // fixed
+ unk_74: Default::default(), // fixed
+ unk_15c: 0x0, // fixed
+ unk_160: U64(0x0), // fixed
+ unk_168: 0x0, // fixed
+ unk_16c: 0x0, // fixed
+ unk_170: U64(0x0), // fixed
+ #[ver(V >= V13_0B4)]
+ counter: U64(count_vtx),
+ #[ver(V >= V13_0B4)]
+ notifier_buf: inner_weak_ptr!(notifier.weak_pointer(), state.unk_buf),
+ unk_178: 0x0, // padding?
+ })?;
+
+ if vtx_result.is_some() {
+ builder.add(microseq::Timestamp::ver {
+ header: microseq::op::Timestamp::new(true),
+ cur_ts: inner_weak_ptr!(ptr, cur_ts),
+ start_ts: inner_weak_ptr!(ptr, start_ts),
+ update_ts: inner_weak_ptr!(ptr, start_ts),
+ work_queue: ev_vtx.info_ptr,
+ unk_24: U64(0),
+ #[ver(V >= V13_0B4)]
+ unk_ts: inner_weak_ptr!(ptr, unk_ts),
+ uuid: uuid_ta,
+ unk_30_padding: 0,
+ })?;
+ }
+
+ builder.add(microseq::WaitForIdle {
+ header: microseq::op::WaitForIdle::new(microseq::Pipe::Vertex),
+ })?;
+
+ if vtx_result.is_some() {
+ builder.add(microseq::Timestamp::ver {
+ header: microseq::op::Timestamp::new(false),
+ cur_ts: inner_weak_ptr!(ptr, cur_ts),
+ start_ts: inner_weak_ptr!(ptr, start_ts),
+ update_ts: inner_weak_ptr!(ptr, end_ts),
+ work_queue: ev_vtx.info_ptr,
+ unk_24: U64(0),
+ #[ver(V >= V13_0B4)]
+ unk_ts: inner_weak_ptr!(ptr, unk_ts),
+ uuid: uuid_ta,
+ unk_30_padding: 0,
+ })?;
+ }
+
+ let off = builder.offset_to(start_vtx);
+ builder.add(microseq::FinalizeVertex::ver {
+ header: microseq::op::FinalizeVertex::HEADER,
+ scene: scene.weak_pointer(),
+ buffer: scene.weak_buffer_pointer(),
+ stats,
+ work_queue: ev_vtx.info_ptr,
+ vm_slot: vm_bind.slot(),
+ unk_28: 0x0, // fixed
+ unk_pointer: inner_weak_ptr!(ptr, unk_pointee),
+ unk_34: 0x0, // fixed
+ uuid: uuid_ta,
+ fw_stamp: ev_vtx.fw_stamp_pointer,
+ stamp_value: ev_vtx.value.next(),
+ unk_48: U64(0x0), // fixed
+ unk_50: 0x0, // fixed
+ unk_54: 0x0, // fixed
+ unk_58: U64(0x0), // fixed
+ unk_60: 0x0, // fixed
+ unk_64: 0x0, // fixed
+ unk_68: 0x0, // fixed
+ #[ver(G >= G14 && V < V13_0B4)]
+ unk_68_g14: U64(0),
+ restart_branch_offset: off,
+ unk_70: 0x0, // fixed
+ #[ver(V >= V13_0B4)]
+ unk_74: Default::default(), // Ventura
+ })?;
+
+ builder.add(microseq::RetireStamp {
+ header: microseq::op::RetireStamp::HEADER,
+ })?;
+
+ Ok(box_in_place!(fw::vertex::RunVertex::ver {
+ notifier: notifier,
+ scene: scene.clone(),
+ micro_seq: builder.build(&mut kalloc.private)?,
+ vm_bind: vm_bind.clone(),
+ timestamps: timestamps,
+ })?)
+ },
+ |inner, ptr| {
+ #[ver(G < G14)]
+ let core_masks = gpu.core_masks_packed();
+ Ok(place!(
+ ptr,
+ fw::vertex::raw::RunVertex::ver {
+ tag: fw::workqueue::CommandType::RunVertex,
+ #[ver(V >= V13_0B4)]
+ counter: U64(count_vtx),
+ vm_slot: vm_bind.slot(),
+ unk_8: 0,
+ notifier: inner.notifier.gpu_pointer(),
+ buffer_slot: inner.scene.slot(),
+ unk_1c: 0,
+ buffer: inner.scene.buffer_pointer(),
+ scene: inner.scene.gpu_pointer(),
+ unk_buffer_buf: inner.scene.kernel_buffer_pointer(),
+ unk_34: 0,
+ job_params1: fw::vertex::raw::JobParameters1::ver {
+ unk_0: U64(if unk1 { 0 } else { 0x200 }), // sometimes 0
+ unk_8: f32!(1e-20), // fixed
+ unk_c: f32!(1e-20), // fixed
+ tvb_tilemap: inner.scene.tvb_tilemap_pointer(),
+ #[ver(G < G14)]
+ tvb_cluster_tilemaps: inner.scene.cluster_tilemaps_pointer(),
+ tpc: inner.scene.tpc_pointer(),
+ tvb_heapmeta: inner
+ .scene
+ .tvb_heapmeta_pointer()
+ .or(0x8000_0000_0000_0000),
+ iogpu_unk_54: 0x6b0003, // fixed
+ iogpu_unk_55: 0x3a0012, // fixed
+ iogpu_unk_56: U64(0x1), // fixed
+ #[ver(G < G14)]
+ tvb_cluster_meta1: inner
+ .scene
+ .meta_1_pointer()
+ .map(|x| x.or((tile_info.meta1_blocks as u64) << 50)),
+ utile_config: utile_config,
+ unk_4c: 0,
+ ppp_multisamplectl: U64(cmdbuf.ppp_multisamplectl), // fixed
+ tvb_heapmeta_2: inner.scene.tvb_heapmeta_pointer(),
+ #[ver(G < G14)]
+ unk_60: U64(0x0), // fixed
+ #[ver(G < G14)]
+ core_mask: Array::new([
+ *core_masks.first().unwrap_or(&0),
+ *core_masks.get(1).unwrap_or(&0),
+ ]),
+ preempt_buf1: inner.scene.preempt_buf_1_pointer(),
+ preempt_buf2: inner.scene.preempt_buf_2_pointer(),
+ unk_80: U64(0x1), // fixed
+ preempt_buf3: inner
+ .scene
+ .preempt_buf_3_pointer()
+ .or(0x4_0000_0000_0000), // check
+ encoder_addr: U64(cmdbuf.encoder_ptr),
+ #[ver(G < G14)]
+ tvb_cluster_meta2: inner.scene.meta_2_pointer(),
+ #[ver(G < G14)]
+ tvb_cluster_meta3: inner.scene.meta_3_pointer(),
+ #[ver(G < G14)]
+ tiling_control: tiling_control,
+ #[ver(G < G14)]
+ unk_ac: Default::default(), // fixed
+ unk_b0: Default::default(), // fixed
+ pipeline_base: U64(0x11_00000000),
+ #[ver(G < G14)]
+ tvb_cluster_meta4: inner
+ .scene
+ .meta_4_pointer()
+ .map(|x| x.or(0x3000_0000_0000_0000)),
+ #[ver(G < G14)]
+ unk_f0: U64(0x1c + align(tile_info.meta1_blocks, 4) as u64),
+ unk_f8: U64(0x8c60), // fixed
+ unk_100: Default::default(), // fixed
+ unk_118: 0x1c, // fixed
+ #[ver(G >= G14)]
+ __pad: Default::default(),
+ },
+ unk_154: Default::default(),
+ tiling_params: tile_info.params,
+ unk_3e8: Default::default(),
+ tpc: inner.scene.tpc_pointer(),
+ tpc_size: U64(tile_info.tpc_size as u64),
+ microsequence: inner.micro_seq.gpu_pointer(),
+ microsequence_size: inner.micro_seq.len() as u32,
+ fragment_stamp_slot: ev_frag.slot,
+ fragment_stamp_value: ev_frag.value.next(),
+ unk_pointee: 0,
+ unk_pad: 0,
+ job_params2: fw::vertex::raw::JobParameters2 {
+ unk_480: Default::default(), // fixed
+ unk_498: U64(0x0), // fixed
+ unk_4a0: 0x0, // fixed
+ preempt_buf1: inner.scene.preempt_buf_1_pointer(),
+ unk_4ac: 0x0, // fixed
+ unk_4b0: U64(0x0), // fixed
+ unk_4b8: 0x0, // fixed
+ unk_4bc: U64(0x0), // fixed
+ unk_4c4_padding: Default::default(),
+ unk_50c: 0x0, // fixed
+ unk_510: U64(0x0), // fixed
+ unk_518: U64(0x0), // fixed
+ unk_520: U64(0x0), // fixed
+ },
+ encoder_params: fw::job::raw::EncoderParams {
+ unk_8: 0x0, // fixed
+ unk_c: 0x0, // fixed
+ unk_10: 0x0, // fixed
+ encoder_id: cmdbuf.encoder_id,
+ unk_18: 0x0, // fixed
+ iogpu_compute_unk44: 0xffffffff,
+ seq_buffer: inner.scene.seq_buf_pointer(),
+ unk_28: U64(0x0), // fixed
+ },
+ unk_55c: 0,
+ unk_560: 0,
+ memoryless_rts_used: (cmdbuf.flags
+ & bindings::ASAHI_RENDER_MEMORYLESS_RTS_USED as u64
+ != 0) as u32,
+ unk_568: 0,
+ unk_56c: 0,
+ meta: fw::job::raw::JobMeta {
+ unk_4: 0,
+ stamp: ev_vtx.stamp_pointer,
+ fw_stamp: ev_vtx.fw_stamp_pointer,
+ stamp_value: ev_vtx.value.next(),
+ stamp_slot: ev_vtx.slot,
+ evctl_index: 0, // fixed
+ flush_stamps: flush_stamps as u32,
+ uuid: uuid_ta,
+ cmd_seq: ev_vtx.cmd_seq as u32,
+ },
+ unk_after_meta: unk1.into(),
+ unk_buf_0: U64(0),
+ unk_buf_8: U64(0),
+ unk_buf_10: U64(0),
+ cur_ts: U64(0),
+ start_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), vtx.start)),
+ end_ts: Some(inner_ptr!(inner.timestamps.gpu_pointer(), vtx.end)),
+ unk_5c4: 0,
+ unk_5c8: 0,
+ unk_5cc: 0,
+ unk_5d0: 0,
+ client_sequence: slot_client_seq,
+ pad_5d5: Default::default(),
+ unk_5d8: 0,
+ unk_5dc: 0,
+ #[ver(V >= V13_0B4)]
+ unk_ts: U64(0),
+ #[ver(V >= V13_0B4)]
+ unk_5dd_8: Default::default(),
+ }
+ ))
+ },
+ )?;
+
+ core::mem::drop(alloc);
+
+ mod_dev_dbg!(self.dev, "[Submission {}] Add Vertex\n", id);
+ fence.add_command();
+ vtx_job.add_cb(vtx, vm_bind.slot(), move |cmd, error| {
+ if let Some(err) = error {
+ fence.set_error(err.into())
+ }
+ if let Some(mut res) = vtx_result.as_ref().map(|a| a.lock()) {
+ cmd.timestamps.with(|raw, _inner| {
+ res.result.vertex_ts_start = raw.vtx.start.load(Ordering::Relaxed);
+ res.result.vertex_ts_end = raw.vtx.end.load(Ordering::Relaxed);
+ });
+ res.result.tvb_usage_bytes = cmd.scene.used_bytes() as u64;
+ if cmd.scene.overflowed() {
+ res.result.flags |= bindings::DRM_ASAHI_RESULT_RENDER_TVB_OVERFLOWED as u64;
+ }
+ res.vtx_error = error;
+ res.vtx_complete = true;
+ res.commit();
+ }
+ fence.command_complete();
+ })?;
+
+ mod_dev_dbg!(self.dev, "[Submission {}] Increment counters\n", id);
+ self.notifier.threshold.with(|raw, _inner| {
+ raw.increment();
+ raw.increment();
+ });
+
+ // TODO: handle rollbacks, move to job submit?
+ buffer.increment();
+
+ job.get_vtx()?.next_seq();
+ job.get_frag()?.next_seq();
+
+ Ok(())
+ }
+}
diff --git a/drivers/gpu/drm/asahi/regs.rs b/drivers/gpu/drm/asahi/regs.rs
new file mode 100644
index 000000000000..019d7214793d
--- /dev/null
+++ b/drivers/gpu/drm/asahi/regs.rs
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU MMIO register abstraction
+//!
+//! Since the vast majority of the interactions with the GPU are brokered through the firmware,
+//! there is very little need to interact directly with GPU MMIO register. This module abstracts
+//! the few operations that require that, mainly reading the MMU fault status, reading GPU ID
+//! information, and starting the GPU firmware coprocessor.
+
+use crate::hw;
+use kernel::{device, io_mem::IoMem, platform, prelude::*};
+
+/// Size of the ASC control MMIO region.
+pub(crate) const ASC_CTL_SIZE: usize = 0x4000;
+
+/// Size of the SGX MMIO region.
+pub(crate) const SGX_SIZE: usize = 0x1000000;
+
+const CPU_CONTROL: usize = 0x44;
+const CPU_RUN: u32 = 0x1 << 4; // BIT(4)
+
+const FAULT_INFO: usize = 0x17030;
+
+const ID_VERSION: usize = 0xd04000;
+const ID_UNK08: usize = 0xd04008;
+const ID_COUNTS_1: usize = 0xd04010;
+const ID_COUNTS_2: usize = 0xd04014;
+const ID_UNK18: usize = 0xd04018;
+const ID_CLUSTERS: usize = 0xd0401c;
+
+const CORE_MASK_0: usize = 0xd01500;
+const CORE_MASK_1: usize = 0xd01514;
+
+/// Enum representing the unit that caused an MMU fault.
+#[allow(non_camel_case_types)]
+#[allow(clippy::upper_case_acronyms)]
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub(crate) enum FaultUnit {
+ /// Decompress / pixel fetch
+ DCMP(u8),
+ /// USC L1 Cache (device loads/stores)
+ UL1C(u8),
+ /// Compress / pixel store
+ CMP(u8),
+ GSL1(u8),
+ IAP(u8),
+ VCE(u8),
+ /// Tiling Engine
+ TE(u8),
+ RAS(u8),
+ /// Vertex Data Master
+ VDM(u8),
+ PPP(u8),
+ /// ISP Parameter Fetch
+ IPF(u8),
+ IPF_CPF(u8),
+ VF(u8),
+ VF_CPF(u8),
+ /// Depth/Stencil load/store
+ ZLS(u8),
+
+ /// Parameter Management
+ dPM,
+ /// Compute Data Master
+ dCDM_KS(u8),
+ dIPP,
+ dIPP_CS,
+ // Vertex Data Master
+ dVDM_CSD,
+ dVDM_SSD,
+ dVDM_ILF,
+ dVDM_ILD,
+ dRDE(u8),
+ FC,
+ GSL2,
+
+ /// Graphics L2 Cache Control?
+ GL2CC_META(u8),
+ GL2CC_MB,
+
+ /// Parameter Management
+ gPM_SP(u8),
+ /// Vertex Data Master - CSD
+ gVDM_CSD_SP(u8),
+ gVDM_SSD_SP(u8),
+ gVDM_ILF_SP(u8),
+ gVDM_TFP_SP(u8),
+ gVDM_MMB_SP(u8),
+ /// Compute Data Master
+ gCDM_CS_KS0_SP(u8),
+ gCDM_CS_KS1_SP(u8),
+ gCDM_CS_KS2_SP(u8),
+ gCDM_KS0_SP(u8),
+ gCDM_KS1_SP(u8),
+ gCDM_KS2_SP(u8),
+ gIPP_SP(u8),
+ gIPP_CS_SP(u8),
+ gRDE0_SP(u8),
+ gRDE1_SP(u8),
+
+ Unknown(u8),
+}
+
+/// Reason for an MMU fault.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub(crate) enum FaultReason {
+ Unmapped,
+ AfFault,
+ WriteOnly,
+ ReadOnly,
+ NoAccess,
+ Unknown(u8),
+}
+
+/// Collection of information about an MMU fault.
+#[derive(Copy, Clone, Debug, Eq, PartialEq)]
+pub(crate) struct FaultInfo {
+ pub(crate) address: u64,
+ pub(crate) sideband: u8,
+ pub(crate) vm_slot: u32,
+ pub(crate) unit_code: u8,
+ pub(crate) unit: FaultUnit,
+ pub(crate) level: u8,
+ pub(crate) unk_5: u8,
+ pub(crate) read: bool,
+ pub(crate) reason: FaultReason,
+}
+
+/// Device resources for this GPU instance.
+pub(crate) struct Resources {
+ dev: device::Device,
+ asc: IoMem<ASC_CTL_SIZE>,
+ sgx: IoMem<SGX_SIZE>,
+}
+
+impl Resources {
+ /// Map the required resources given our platform device.
+ pub(crate) fn new(pdev: &mut platform::Device) -> Result<Resources> {
+ // TODO: add device abstraction to ioremap by name
+ let asc_res = unsafe { pdev.ioremap_resource(0)? };
+ let sgx_res = unsafe { pdev.ioremap_resource(1)? };
+
+ Ok(Resources {
+ // SAFETY: This device does DMA via the UAT IOMMU.
+ dev: device::Device::from_dev(pdev),
+ asc: asc_res,
+ sgx: sgx_res,
+ })
+ }
+
+ fn sgx_read32(&self, off: usize) -> u32 {
+ self.sgx.readl_relaxed(off)
+ }
+
+ /* Not yet used
+ fn sgx_write32(&self, off: usize, val: u32) {
+ self.sgx.writel_relaxed(val, off)
+ }
+ */
+
+ fn sgx_read64(&self, off: usize) -> u64 {
+ self.sgx.readq_relaxed(off)
+ }
+
+ /* Not yet used
+ fn sgx_write64(&self, off: usize, val: u64) {
+ self.sgx.writeq_relaxed(val, off)
+ }
+ */
+
+ /// Initialize the MMIO registers for the GPU.
+ pub(crate) fn init_mmio(&self) -> Result {
+ // Nothing to do for now...
+
+ Ok(())
+ }
+
+ /// Start the ASC coprocessor CPU.
+ pub(crate) fn start_cpu(&self) -> Result {
+ let val = self.asc.readl_relaxed(CPU_CONTROL);
+
+ self.asc.writel_relaxed(val | CPU_RUN, CPU_CONTROL);
+
+ Ok(())
+ }
+
+ /// Get the GPU identification info from registers.
+ ///
+ /// See [`hw::GpuIdConfig`] for the result.
+ pub(crate) fn get_gpu_id(&self) -> Result<hw::GpuIdConfig> {
+ let id_version = self.sgx_read32(ID_VERSION);
+ let id_unk08 = self.sgx_read32(ID_UNK08);
+ let id_counts_1 = self.sgx_read32(ID_COUNTS_1);
+ let id_counts_2 = self.sgx_read32(ID_COUNTS_2);
+ let id_unk18 = self.sgx_read32(ID_UNK18);
+ let id_clusters = self.sgx_read32(ID_CLUSTERS);
+
+ dev_info!(
+ self.dev,
+ "GPU ID registers: {:#x} {:#x} {:#x} {:#x} {:#x} {:#x}\n",
+ id_version,
+ id_unk08,
+ id_counts_1,
+ id_counts_2,
+ id_unk18,
+ id_clusters
+ );
+
+ let core_mask_0 = self.sgx_read32(CORE_MASK_0);
+ let core_mask_1 = self.sgx_read32(CORE_MASK_1);
+ let mut core_mask = (core_mask_0 as u64) | ((core_mask_1 as u64) << 32);
+
+ dev_info!(self.dev, "Core mask: {:#x}\n", core_mask);
+
+ let num_clusters = (id_clusters >> 12) & 0xff;
+ let num_cores = id_counts_1 & 0xff;
+
+ if num_cores * num_clusters > 64 {
+ dev_err!(
+ self.dev,
+ "Too many total cores ({} x {} > 64)\n",
+ num_clusters,
+ num_cores
+ );
+ return Err(ENODEV);
+ }
+
+ let mut core_masks = Vec::new();
+ let mut total_active_cores: u32 = 0;
+
+ let max_core_mask = (1u64 << num_cores) - 1;
+ for _i in 0..num_clusters {
+ let mask = core_mask & max_core_mask;
+ core_masks.try_push(mask as u32)?;
+ core_mask >>= num_cores;
+ total_active_cores += mask.count_ones();
+ }
+ let mut core_masks_packed = Vec::new();
+ core_masks_packed.try_push(core_mask_0)?;
+ if core_mask_1 != 0 {
+ core_masks_packed.try_push(core_mask_1)?;
+ }
+
+ if core_mask != 0 {
+ dev_err!(self.dev, "Leftover core mask: {:#x}\n", core_mask);
+ return Err(EIO);
+ }
+
+ let (gpu_rev, gpu_rev_id) = match (id_version >> 8) & 0xff {
+ 0x00 => (hw::GpuRevision::A0, hw::GpuRevisionID::A0),
+ 0x01 => (hw::GpuRevision::A1, hw::GpuRevisionID::A1),
+ 0x10 => (hw::GpuRevision::B0, hw::GpuRevisionID::B0),
+ 0x11 => (hw::GpuRevision::B1, hw::GpuRevisionID::B1),
+ 0x20 => (hw::GpuRevision::C0, hw::GpuRevisionID::C0),
+ 0x21 => (hw::GpuRevision::C1, hw::GpuRevisionID::C1),
+ a => {
+ dev_err!(self.dev, "Unknown GPU revision {}\n", a);
+ return Err(ENODEV);
+ }
+ };
+
+ Ok(hw::GpuIdConfig {
+ gpu_gen: match (id_version >> 24) & 0xff {
+ 4 => hw::GpuGen::G13,
+ 5 => hw::GpuGen::G14,
+ a => {
+ dev_err!(self.dev, "Unknown GPU generation {}\n", a);
+ return Err(ENODEV);
+ }
+ },
+ gpu_variant: match (id_version >> 16) & 0xff {
+ 1 => hw::GpuVariant::P, // Guess
+ 2 => hw::GpuVariant::G,
+ 3 => hw::GpuVariant::S,
+ 4 => {
+ if num_clusters > 4 {
+ hw::GpuVariant::D
+ } else {
+ hw::GpuVariant::C
+ }
+ }
+ a => {
+ dev_err!(self.dev, "Unknown GPU variant {}\n", a);
+ return Err(ENODEV);
+ }
+ },
+ gpu_rev,
+ gpu_rev_id,
+ max_dies: (id_clusters >> 20) & 0xf,
+ num_clusters,
+ num_cores,
+ num_frags: (id_counts_1 >> 8) & 0xff,
+ num_gps: (id_counts_2 >> 16) & 0xff,
+ total_active_cores,
+ core_masks,
+ core_masks_packed,
+ })
+ }
+
+ /// Get the fault information from the MMU status register, if one occurred.
+ pub(crate) fn get_fault_info(&self) -> Option<FaultInfo> {
+ let fault_info = self.sgx_read64(FAULT_INFO);
+
+ if fault_info & 1 == 0 {
+ return None;
+ }
+
+ let unit_code = ((fault_info >> 9) & 0xff) as u8;
+ let unit = match unit_code {
+ 0x00..=0x9f => match unit_code & 0xf {
+ 0x0 => FaultUnit::DCMP(unit_code >> 4),
+ 0x1 => FaultUnit::UL1C(unit_code >> 4),
+ 0x2 => FaultUnit::CMP(unit_code >> 4),
+ 0x3 => FaultUnit::GSL1(unit_code >> 4),
+ 0x4 => FaultUnit::IAP(unit_code >> 4),
+ 0x5 => FaultUnit::VCE(unit_code >> 4),
+ 0x6 => FaultUnit::TE(unit_code >> 4),
+ 0x7 => FaultUnit::RAS(unit_code >> 4),
+ 0x8 => FaultUnit::VDM(unit_code >> 4),
+ 0x9 => FaultUnit::PPP(unit_code >> 4),
+ 0xa => FaultUnit::IPF(unit_code >> 4),
+ 0xb => FaultUnit::IPF_CPF(unit_code >> 4),
+ 0xc => FaultUnit::VF(unit_code >> 4),
+ 0xd => FaultUnit::VF_CPF(unit_code >> 4),
+ 0xe => FaultUnit::ZLS(unit_code >> 4),
+ _ => FaultUnit::Unknown(unit_code),
+ },
+ 0xa1 => FaultUnit::dPM,
+ 0xa2 => FaultUnit::dCDM_KS(0),
+ 0xa3 => FaultUnit::dCDM_KS(1),
+ 0xa4 => FaultUnit::dCDM_KS(2),
+ 0xa5 => FaultUnit::dIPP,
+ 0xa6 => FaultUnit::dIPP_CS,
+ 0xa7 => FaultUnit::dVDM_CSD,
+ 0xa8 => FaultUnit::dVDM_SSD,
+ 0xa9 => FaultUnit::dVDM_ILF,
+ 0xaa => FaultUnit::dVDM_ILD,
+ 0xab => FaultUnit::dRDE(0),
+ 0xac => FaultUnit::dRDE(1),
+ 0xad => FaultUnit::FC,
+ 0xae => FaultUnit::GSL2,
+ 0xb0..=0xb7 => FaultUnit::GL2CC_META(unit_code & 0xf),
+ 0xb8 => FaultUnit::GL2CC_MB,
+ 0xe0..=0xff => match unit_code & 0xf {
+ 0x0 => FaultUnit::gPM_SP((unit_code >> 4) & 1),
+ 0x1 => FaultUnit::gVDM_CSD_SP((unit_code >> 4) & 1),
+ 0x2 => FaultUnit::gVDM_SSD_SP((unit_code >> 4) & 1),
+ 0x3 => FaultUnit::gVDM_ILF_SP((unit_code >> 4) & 1),
+ 0x4 => FaultUnit::gVDM_TFP_SP((unit_code >> 4) & 1),
+ 0x5 => FaultUnit::gVDM_MMB_SP((unit_code >> 4) & 1),
+ 0x6 => FaultUnit::gCDM_CS_KS0_SP((unit_code >> 4) & 1),
+ 0x7 => FaultUnit::gCDM_CS_KS1_SP((unit_code >> 4) & 1),
+ 0x8 => FaultUnit::gCDM_CS_KS2_SP((unit_code >> 4) & 1),
+ 0x9 => FaultUnit::gCDM_KS0_SP((unit_code >> 4) & 1),
+ 0xa => FaultUnit::gCDM_KS1_SP((unit_code >> 4) & 1),
+ 0xb => FaultUnit::gCDM_KS2_SP((unit_code >> 4) & 1),
+ 0xc => FaultUnit::gIPP_SP((unit_code >> 4) & 1),
+ 0xd => FaultUnit::gIPP_CS_SP((unit_code >> 4) & 1),
+ 0xe => FaultUnit::gRDE0_SP((unit_code >> 4) & 1),
+ 0xf => FaultUnit::gRDE1_SP((unit_code >> 4) & 1),
+ _ => FaultUnit::Unknown(unit_code),
+ },
+ _ => FaultUnit::Unknown(unit_code),
+ };
+
+ let reason = match (fault_info >> 1) & 0x7 {
+ 0 => FaultReason::Unmapped,
+ 1 => FaultReason::AfFault,
+ 2 => FaultReason::WriteOnly,
+ 3 => FaultReason::ReadOnly,
+ 4 => FaultReason::NoAccess,
+ a => FaultReason::Unknown(a as u8),
+ };
+
+ Some(FaultInfo {
+ address: (fault_info >> 30) << 6,
+ sideband: ((fault_info >> 23) & 0x7f) as u8,
+ vm_slot: ((fault_info >> 17) & 0x3f) as u32,
+ unit_code,
+ unit,
+ level: ((fault_info >> 7) & 3) as u8,
+ unk_5: ((fault_info >> 5) & 3) as u8,
+ read: (fault_info & (1 << 4)) != 0,
+ reason,
+ })
+ }
+}
diff --git a/drivers/gpu/drm/asahi/slotalloc.rs b/drivers/gpu/drm/asahi/slotalloc.rs
new file mode 100644
index 000000000000..6493111643fe
--- /dev/null
+++ b/drivers/gpu/drm/asahi/slotalloc.rs
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Generic slot allocator
+//!
+//! This is a simple allocator to manage fixed-size pools of GPU resources that are transiently
+//! required during command execution. Each item resides in a "slot" at a given index. Users borrow
+//! and return free items from the available pool.
+//!
+//! Allocations are "sticky", and return a token that callers can use to request the same slot
+//! again later. This allows slots to be lazily invalidated, so that multiple uses by the same user
+//! avoid any actual cleanup work.
+//!
+//! The allocation policy is currently a simple LRU mechanism, doing a full linear scan over the
+//! slots when no token was previously provided. This is probably good enough, since in the absence
+//! of serious system contention most allocation requests will be immediately fulfilled from the
+//! previous slot without doing an LRU scan.
+
+use core::ops::{Deref, DerefMut};
+use kernel::{
+ error::{code::*, Result},
+ prelude::*,
+ sync::{Arc, CondVar, Mutex, UniqueArc},
+};
+
+/// Trait representing a single item within a slot.
+pub(crate) trait SlotItem {
+ /// Arbitrary user data associated with the SlotAllocator.
+ type Data;
+
+ /// Called eagerly when this item is released back into the available pool.
+ fn release(&mut self, _data: &mut Self::Data, _slot: u32) {}
+}
+
+/// Trivial implementation for users which do not require any slot data nor any allocator data.
+impl SlotItem for () {
+ type Data = ();
+}
+
+/// Represents a current or previous allocation of an item from a slot. Users keep `SlotToken`s
+/// around across allocations to request that, if possible, the same slot be reused.
+#[derive(Copy, Clone, Debug)]
+pub(crate) struct SlotToken {
+ time: u64,
+ slot: u32,
+}
+
+impl SlotToken {
+ /// Returns the slot index that this token represents a past assignment to.
+ pub(crate) fn last_slot(&self) -> u32 {
+ self.slot
+ }
+}
+
+/// A guard representing active ownership of a slot.
+pub(crate) struct Guard<T: SlotItem> {
+ item: Option<T>,
+ changed: bool,
+ token: SlotToken,
+ alloc: Arc<SlotAllocatorOuter<T>>,
+}
+
+impl<T: SlotItem> Guard<T> {
+ /// Returns the active slot owned by this `Guard`.
+ pub(crate) fn slot(&self) -> u32 {
+ self.token.slot
+ }
+
+ /// Returns `true` if the slot changed since the last allocation (or no `SlotToken` was
+ /// provided), or `false` if the previously allocated slot was successfully re-acquired with
+ /// no other users in the interim.
+ pub(crate) fn changed(&self) -> bool {
+ self.changed
+ }
+
+ /// Returns a `SlotToken` that can be used to re-request the same slot at a later time, after
+ /// this `Guard` is dropped.
+ pub(crate) fn token(&self) -> SlotToken {
+ self.token
+ }
+}
+
+impl<T: SlotItem> Deref for Guard<T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ self.item.as_ref().expect("SlotItem Guard lost our item!")
+ }
+}
+
+impl<T: SlotItem> DerefMut for Guard<T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ self.item.as_mut().expect("SlotItem Guard lost our item!")
+ }
+}
+
+/// A slot item that is currently free.
+struct Entry<T: SlotItem> {
+ item: T,
+ get_time: u64,
+ drop_time: u64,
+}
+
+/// Inner data for the `SlotAllocator`, protected by a `Mutex`.
+struct SlotAllocatorInner<T: SlotItem> {
+ data: T::Data,
+ slots: Vec<Option<Entry<T>>>,
+ get_count: u64,
+ drop_count: u64,
+}
+
+/// A single slot allocator instance.
+struct SlotAllocatorOuter<T: SlotItem> {
+ inner: Mutex<SlotAllocatorInner<T>>,
+ cond: CondVar,
+}
+
+/// A shared reference to a slot allocator instance.
+pub(crate) struct SlotAllocator<T: SlotItem>(Arc<SlotAllocatorOuter<T>>);
+
+impl<T: SlotItem> SlotAllocator<T> {
+ /// Creates a new `SlotAllocator`, with a fixed number of slots and arbitrary associated data.
+ ///
+ /// The caller provides a constructor callback which takes a reference to the `T::Data` and
+ /// creates a single slot. This is called during construction to create all the initial
+ /// items, which then live the lifetime of the `SlotAllocator`.
+ pub(crate) fn new(
+ num_slots: u32,
+ mut data: T::Data,
+ mut constructor: impl FnMut(&mut T::Data, u32) -> T,
+ ) -> Result<SlotAllocator<T>> {
+ let mut slots = Vec::try_with_capacity(num_slots as usize)?;
+
+ for i in 0..num_slots {
+ slots
+ .try_push(Some(Entry {
+ item: constructor(&mut data, i),
+ get_time: 0,
+ drop_time: 0,
+ }))
+ .expect("try_push() failed after reservation");
+ }
+
+ let inner = SlotAllocatorInner {
+ data,
+ slots,
+ get_count: 0,
+ drop_count: 0,
+ };
+
+ let mut alloc = Pin::from(UniqueArc::try_new(SlotAllocatorOuter {
+ // SAFETY: `condvar_init!` is called below.
+ cond: unsafe { CondVar::new() },
+ // SAFETY: `mutex_init!` is called below.
+ inner: unsafe { Mutex::new(inner) },
+ })?);
+
+ // SAFETY: `cond` is pinned when `alloc` is.
+ let pinned = unsafe { alloc.as_mut().map_unchecked_mut(|s| &mut s.cond) };
+ kernel::condvar_init!(pinned, "SlotAllocator::cond");
+
+ // SAFETY: `inner` is pinned when `alloc` is.
+ let pinned = unsafe { alloc.as_mut().map_unchecked_mut(|s| &mut s.inner) };
+ kernel::mutex_init!(pinned, "SlotAllocator::inner");
+
+ Ok(SlotAllocator(alloc.into()))
+ }
+
+ /// Calls a callback on the inner data associated with this allocator, taking the lock.
+ pub(crate) fn with_inner<RetVal>(&self, cb: impl FnOnce(&mut T::Data) -> RetVal) -> RetVal {
+ let mut inner = self.0.inner.lock();
+ cb(&mut inner.data)
+ }
+
+ /// Gets a fresh slot, optionally reusing a previous allocation if a `SlotToken` is provided.
+ ///
+ /// Blocks if no slots are free.
+ pub(crate) fn get(&self, token: Option<SlotToken>) -> Result<Guard<T>> {
+ self.get_inner(token, |_a, _b| Ok(()))
+ }
+
+ /// Gets a fresh slot, optionally reusing a previous allocation if a `SlotToken` is provided.
+ ///
+ /// Blocks if no slots are free.
+ ///
+ /// This version allows the caller to pass in a callback that gets a mutable reference to the
+ /// user data for the allocator and the freshly acquired slot, which is called before the
+ /// allocator lock is released. This can be used to perform bookkeeping associated with
+ /// specific slots (such as tracking their current owner).
+ pub(crate) fn get_inner(
+ &self,
+ token: Option<SlotToken>,
+ cb: impl FnOnce(&mut T::Data, &mut Guard<T>) -> Result<()>,
+ ) -> Result<Guard<T>> {
+ let mut inner = self.0.inner.lock();
+
+ if let Some(token) = token {
+ let slot = &mut inner.slots[token.slot as usize];
+ if slot.is_some() {
+ let count = slot.as_ref().unwrap().get_time;
+ if count == token.time {
+ let mut guard = Guard {
+ item: Some(slot.take().unwrap().item),
+ token,
+ changed: false,
+ alloc: self.0.clone(),
+ };
+ cb(&mut inner.data, &mut guard)?;
+ return Ok(guard);
+ }
+ }
+ }
+
+ let mut first = true;
+ let slot = loop {
+ let mut oldest_time = u64::MAX;
+ let mut oldest_slot = 0u32;
+
+ for (i, slot) in inner.slots.iter().enumerate() {
+ if let Some(slot) = slot.as_ref() {
+ if slot.drop_time < oldest_time {
+ oldest_slot = i as u32;
+ oldest_time = slot.drop_time;
+ }
+ }
+ }
+
+ if oldest_time == u64::MAX {
+ if first {
+ pr_warn!(
+ "{}: out of slots, blocking\n",
+ core::any::type_name::<Self>()
+ );
+ }
+ first = false;
+ if self.0.cond.wait(&mut inner) {
+ return Err(ERESTARTSYS);
+ }
+ } else {
+ break oldest_slot;
+ }
+ };
+
+ inner.get_count += 1;
+
+ let item = inner.slots[slot as usize]
+ .take()
+ .expect("Someone stole our slot?")
+ .item;
+
+ let mut guard = Guard {
+ item: Some(item),
+ changed: true,
+ token: SlotToken {
+ time: inner.get_count,
+ slot,
+ },
+ alloc: self.0.clone(),
+ };
+
+ cb(&mut inner.data, &mut guard)?;
+ Ok(guard)
+ }
+}
+
+impl<T: SlotItem> Clone for SlotAllocator<T> {
+ fn clone(&self) -> Self {
+ SlotAllocator(self.0.clone())
+ }
+}
+
+impl<T: SlotItem> Drop for Guard<T> {
+ fn drop(&mut self) {
+ let mut inner = self.alloc.inner.lock();
+ if inner.slots[self.token.slot as usize].is_some() {
+ pr_crit!(
+ "{}: tried to return an item into a full slot ({})\n",
+ core::any::type_name::<Self>(),
+ self.token.slot
+ );
+ } else {
+ inner.drop_count += 1;
+ let mut item = self.item.take().expect("Guard lost its item");
+ item.release(&mut inner.data, self.token.slot);
+ inner.slots[self.token.slot as usize] = Some(Entry {
+ item,
+ get_time: self.token.time,
+ drop_time: inner.drop_count,
+ });
+ self.alloc.cond.notify_one();
+ }
+ }
+}
diff --git a/drivers/gpu/drm/asahi/util.rs b/drivers/gpu/drm/asahi/util.rs
new file mode 100644
index 000000000000..8d1a37f17cd8
--- /dev/null
+++ b/drivers/gpu/drm/asahi/util.rs
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Miscellaneous utility functions
+
+use core::ops::{Add, BitAnd, Div, Not, Sub};
+
+/// Aligns an integer type to a power of two.
+pub(crate) fn align<T>(a: T, b: T) -> T
+where
+ T: Copy
+ + Default
+ + BitAnd<Output = T>
+ + Not<Output = T>
+ + Add<Output = T>
+ + Sub<Output = T>
+ + Div<Output = T>
+ + core::cmp::PartialEq,
+{
+ let def: T = Default::default();
+ #[allow(clippy::eq_op)]
+ let one: T = !def / !def;
+
+ assert!((b & (b - one)) == def);
+
+ (a + b - one) & !(b - one)
+}
+
+/// Integer division rounding up.
+pub(crate) fn div_ceil<T>(a: T, b: T) -> T
+where
+ T: Copy
+ + Default
+ + BitAnd<Output = T>
+ + Not<Output = T>
+ + Add<Output = T>
+ + Sub<Output = T>
+ + Div<Output = T>,
+{
+ let def: T = Default::default();
+ #[allow(clippy::eq_op)]
+ let one: T = !def / !def;
+
+ (a + b - one) / b
+}
diff --git a/drivers/gpu/drm/asahi/workqueue.rs b/drivers/gpu/drm/asahi/workqueue.rs
new file mode 100644
index 000000000000..ce1d1f89e48e
--- /dev/null
+++ b/drivers/gpu/drm/asahi/workqueue.rs
@@ -0,0 +1,880 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! GPU command execution queues
+//!
+//! The AGX GPU firmware schedules GPU work commands out of work queues, which are ring buffers of
+//! pointers to work commands. There can be an arbitrary number of work queues. Work queues have an
+//! associated type (vertex, fragment, or compute) and may only contain generic commands or commands
+//! specific to that type.
+//!
+//! This module manages queueing work commands into a work queue and submitting them for execution
+//! by the firmware. An active work queue needs an event to signal completion of its work, which is
+//! owned by what we call a batch. This event then notifies the work queue when work is completed,
+//! and that triggers freeing of all resources associated with that work. An idle work queue gives
+//! up its associated event.
+
+use crate::debug::*;
+use crate::fw::channels::PipeType;
+use crate::fw::types::*;
+use crate::fw::workqueue::*;
+use crate::object::OpaqueGpuObject;
+use crate::regs::FaultReason;
+use crate::{box_in_place, no_debug, place};
+use crate::{channel, driver, event, fw, gpu, object, regs};
+use core::num::NonZeroU64;
+use core::sync::atomic::Ordering;
+use kernel::{
+ bindings,
+ error::code::*,
+ prelude::*,
+ sync::{Arc, Guard, Mutex, UniqueArc},
+};
+
+const DEBUG_CLASS: DebugFlags = DebugFlags::WorkQueue;
+
+const MAX_JOB_SLOTS: u32 = 127;
+
+/// An enum of possible errors that might cause a piece of work to fail execution.
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+pub(crate) enum WorkError {
+ /// GPU timeout (command execution took too long).
+ Timeout,
+ /// GPU MMU fault (invalid access).
+ Fault(regs::FaultInfo),
+ /// Work failed due to an error caused by other concurrent GPU work.
+ Killed,
+ /// The GPU crashed.
+ NoDevice,
+ /// Unknown reason.
+ Unknown,
+}
+
+impl From<WorkError> for bindings::drm_asahi_result_info {
+ fn from(err: WorkError) -> Self {
+ match err {
+ WorkError::Fault(info) => Self {
+ status: bindings::drm_asahi_status_DRM_ASAHI_STATUS_FAULT,
+ fault_type: match info.reason {
+ FaultReason::Unmapped => bindings::drm_asahi_fault_DRM_ASAHI_FAULT_UNMAPPED,
+ FaultReason::AfFault => bindings::drm_asahi_fault_DRM_ASAHI_FAULT_AF_FAULT,
+ FaultReason::WriteOnly => bindings::drm_asahi_fault_DRM_ASAHI_FAULT_WRITE_ONLY,
+ FaultReason::ReadOnly => bindings::drm_asahi_fault_DRM_ASAHI_FAULT_READ_ONLY,
+ FaultReason::NoAccess => bindings::drm_asahi_fault_DRM_ASAHI_FAULT_NO_ACCESS,
+ FaultReason::Unknown(_) => bindings::drm_asahi_fault_DRM_ASAHI_FAULT_UNKNOWN,
+ },
+ unit: info.unit_code.into(),
+ sideband: info.sideband.into(),
+ level: info.level,
+ extra: info.unk_5.into(),
+ is_read: info.read as u8,
+ pad: 0,
+ address: info.address,
+ },
+ a => Self {
+ status: match a {
+ WorkError::Timeout => bindings::drm_asahi_status_DRM_ASAHI_STATUS_TIMEOUT,
+ WorkError::Killed => bindings::drm_asahi_status_DRM_ASAHI_STATUS_KILLED,
+ WorkError::NoDevice => bindings::drm_asahi_status_DRM_ASAHI_STATUS_NO_DEVICE,
+ _ => bindings::drm_asahi_status_DRM_ASAHI_STATUS_UNKNOWN_ERROR,
+ },
+ ..Default::default()
+ },
+ }
+ }
+}
+
+impl From<WorkError> for kernel::error::Error {
+ fn from(err: WorkError) -> Self {
+ match err {
+ WorkError::Timeout => ETIMEDOUT,
+ // Not EFAULT because that's for userspace faults
+ WorkError::Fault(_) => EIO,
+ WorkError::Unknown => ENODATA,
+ WorkError::Killed => ECANCELED,
+ WorkError::NoDevice => ENODEV,
+ }
+ }
+}
+
+/// A GPU context tracking structure, which must be explicitly invalidated when dropped.
+pub(crate) struct GpuContext {
+ dev: driver::AsahiDevice,
+ data: GpuObject<fw::workqueue::GpuContextData>,
+}
+no_debug!(GpuContext);
+
+impl GpuContext {
+ /// Allocate a new GPU context.
+ pub(crate) fn new(
+ dev: &driver::AsahiDevice,
+ alloc: &mut gpu::KernelAllocators,
+ ) -> Result<GpuContext> {
+ Ok(GpuContext {
+ dev: dev.clone(),
+ data: alloc
+ .shared
+ .new_object(Default::default(), |_inner| Default::default())?,
+ })
+ }
+
+ /// Returns the GPU pointer to the inner GPU context data structure.
+ pub(crate) fn gpu_pointer(&self) -> GpuPointer<'_, fw::workqueue::GpuContextData> {
+ self.data.gpu_pointer()
+ }
+}
+
+impl Drop for GpuContext {
+ fn drop(&mut self) {
+ mod_dev_dbg!(self.dev, "GpuContext: Invalidating GPU context\n");
+ let dev = self.dev.data();
+ if dev.gpu.invalidate_context(&self.data).is_err() {
+ dev_err!(self.dev, "GpuContext: Failed to invalidate GPU context!\n");
+ }
+ }
+}
+
+struct SubmittedWork<O, C>
+where
+ O: OpaqueGpuObject,
+ C: FnOnce(O, Option<WorkError>) + Send + Sync + 'static,
+{
+ object: O,
+ value: EventValue,
+ error: Option<WorkError>,
+ wptr: u32,
+ vm_slot: u32,
+ callback: C,
+}
+
+trait GenSubmittedWork: Send + Sync {
+ fn gpu_va(&self) -> NonZeroU64;
+ fn value(&self) -> event::EventValue;
+ fn wptr(&self) -> u32;
+ fn set_wptr(&mut self, wptr: u32);
+ fn mark_error(&mut self, error: WorkError);
+ fn complete(self: Box<Self>);
+}
+
+impl<O: OpaqueGpuObject, C: FnOnce(O, Option<WorkError>) + Send + Sync> GenSubmittedWork
+ for SubmittedWork<O, C>
+{
+ fn gpu_va(&self) -> NonZeroU64 {
+ self.object.gpu_va()
+ }
+
+ fn value(&self) -> event::EventValue {
+ self.value
+ }
+
+ fn wptr(&self) -> u32 {
+ self.wptr
+ }
+
+ fn set_wptr(&mut self, wptr: u32) {
+ self.wptr = wptr;
+ }
+
+ fn complete(self: Box<Self>) {
+ let SubmittedWork {
+ object,
+ value: _,
+ error,
+ wptr: _,
+ vm_slot: _,
+ callback,
+ } = *self;
+
+ callback(object, error);
+ }
+
+ fn mark_error(&mut self, error: WorkError) {
+ mod_pr_debug!("WorkQueue: Command at value {:#x?} failed\n", self.value);
+ self.error = Some(match error {
+ WorkError::Fault(info) if info.vm_slot != self.vm_slot => WorkError::Killed,
+ err => err,
+ });
+ }
+}
+
+/// Inner data for managing a single work queue.
+#[versions(AGX)]
+struct WorkQueueInner {
+ event_manager: Arc<event::EventManager>,
+ info: GpuObject<QueueInfo::ver>,
+ new: bool,
+ pipe_type: PipeType,
+ size: u32,
+ wptr: u32,
+ pending: Vec<Box<dyn GenSubmittedWork>>,
+ last_token: Option<event::Token>,
+ pending_jobs: usize,
+ last_submitted: Option<event::EventValue>,
+ last_completed: Option<event::EventValue>,
+ event: Option<(event::Event, event::EventValue)>,
+ priority: u32,
+ commit_seq: u64,
+ submit_seq: u64,
+}
+
+/// An instance of a work queue.
+#[versions(AGX)]
+pub(crate) struct WorkQueue {
+ info_pointer: GpuWeakPointer<QueueInfo::ver>,
+ inner: Mutex<WorkQueueInner::ver>,
+}
+
+#[versions(AGX)]
+impl WorkQueueInner::ver {
+ /// Return the GPU done pointer, representing how many work items have been completed by the
+ /// GPU.
+ fn doneptr(&self) -> u32 {
+ self.info
+ .state
+ .with(|raw, _inner| raw.gpu_doneptr.load(Ordering::Acquire))
+ }
+}
+
+#[versions(AGX)]
+#[derive(Copy, Clone)]
+pub(crate) struct QueueEventInfo {
+ pub(crate) stamp_pointer: GpuWeakPointer<Stamp>,
+ pub(crate) fw_stamp_pointer: GpuWeakPointer<FwStamp>,
+ pub(crate) slot: u32,
+ pub(crate) value: event::EventValue,
+ pub(crate) cmd_seq: u64,
+ pub(crate) info_ptr: GpuWeakPointer<QueueInfo::ver>,
+}
+
+#[versions(AGX)]
+pub(crate) struct Job {
+ wq: Arc<WorkQueue::ver>,
+ event_info: QueueEventInfo::ver,
+ start_value: EventValue,
+ pending: Vec<Box<dyn GenSubmittedWork>>,
+ committed: bool,
+ submitted: bool,
+ event_count: usize,
+}
+
+#[versions(AGX)]
+pub(crate) struct JobSubmission<'a> {
+ inner: Option<Guard<'a, Mutex<WorkQueueInner::ver>>>,
+ wptr: u32,
+ event_count: usize,
+ command_count: usize,
+}
+
+#[versions(AGX)]
+impl Job::ver {
+ pub(crate) fn event_info(&self) -> QueueEventInfo::ver {
+ let mut info = self.event_info;
+ info.cmd_seq += self.event_count as u64;
+
+ info
+ }
+
+ pub(crate) fn next_seq(&mut self) {
+ self.event_count += 1;
+ self.event_info.value.increment();
+ }
+
+ pub(crate) fn add<O: object::OpaqueGpuObject + 'static>(
+ &mut self,
+ command: O,
+ vm_slot: u32,
+ ) -> Result {
+ self.add_cb(command, vm_slot, |_, _| {})
+ }
+
+ pub(crate) fn add_cb<O: object::OpaqueGpuObject + 'static>(
+ &mut self,
+ command: O,
+ vm_slot: u32,
+ callback: impl FnOnce(O, Option<WorkError>) + Sync + Send + 'static,
+ ) -> Result {
+ if self.committed {
+ pr_err!("WorkQueue: Tried to mutate committed Job\n");
+ return Err(EINVAL);
+ }
+
+ self.pending.try_push(Box::try_new(SubmittedWork::<_, _> {
+ object: command,
+ value: self.event_info.value.next(),
+ error: None,
+ callback,
+ wptr: 0,
+ vm_slot,
+ })?)?;
+
+ Ok(())
+ }
+
+ pub(crate) fn commit(&mut self) -> Result {
+ if self.committed {
+ pr_err!("WorkQueue: Tried to commit committed Job\n");
+ return Err(EINVAL);
+ }
+
+ if self.pending.is_empty() {
+ pr_err!("WorkQueue: Job::commit() with no commands\n");
+ return Err(EINVAL);
+ }
+
+ let mut inner = self.wq.inner.lock();
+
+ let ev = inner.event.as_mut().expect("WorkQueue: Job lost its event");
+
+ if ev.1 != self.start_value {
+ pr_err!(
+ "WorkQueue: Job::commit() out of order (event slot {} {:?} != {:?}\n",
+ ev.0.slot(),
+ ev.1,
+ self.start_value
+ );
+ return Err(EINVAL);
+ }
+
+ ev.1 = self.event_info.value;
+ inner.commit_seq += self.pending.len() as u64;
+ self.committed = true;
+
+ Ok(())
+ }
+
+ pub(crate) fn can_submit(&self) -> bool {
+ self.wq.free_slots() > self.event_count && self.wq.free_space() > self.pending.len()
+ }
+
+ pub(crate) fn submit(&mut self) -> Result<JobSubmission::ver<'_>> {
+ if !self.committed {
+ pr_err!("WorkQueue: Tried to submit uncommitted Job\n");
+ return Err(EINVAL);
+ }
+
+ if self.submitted {
+ pr_err!("WorkQueue: Tried to submit Job twice\n");
+ return Err(EINVAL);
+ }
+
+ if self.pending.is_empty() {
+ pr_err!("WorkQueue: Job::submit() with no commands\n");
+ return Err(EINVAL);
+ }
+
+ let mut inner = self.wq.inner.lock();
+
+ if inner.submit_seq != self.event_info.cmd_seq {
+ pr_err!(
+ "WorkQueue: Job::submit() out of order (submit_seq {} != {})\n",
+ inner.submit_seq,
+ self.event_info.cmd_seq
+ );
+ return Err(EINVAL);
+ }
+
+ if inner.commit_seq < (self.event_info.cmd_seq + self.pending.len() as u64) {
+ pr_err!(
+ "WorkQueue: Job::submit() out of order (commit_seq {} != {})\n",
+ inner.commit_seq,
+ (self.event_info.cmd_seq + self.pending.len() as u64)
+ );
+ return Err(EINVAL);
+ }
+
+ let mut wptr = inner.wptr;
+ let command_count = self.pending.len();
+
+ if inner.free_space() <= command_count {
+ pr_err!("WorkQueue: Job does not fit in ring buffer\n");
+ return Err(EBUSY);
+ }
+
+ inner.pending.try_reserve(command_count)?;
+
+ inner.last_submitted = inner.event.as_ref().map(|e| e.1);
+
+ for mut command in self.pending.drain(..) {
+ command.set_wptr(wptr);
+
+ let next_wptr = (wptr + 1) % inner.size;
+ assert!(inner.doneptr() != next_wptr);
+ inner.info.ring[wptr as usize] = command.gpu_va().get();
+ wptr = next_wptr;
+
+ // Cannot fail, since we did a try_reserve(1) above
+ inner
+ .pending
+ .try_push(command)
+ .expect("try_push() failed after try_reserve()");
+ }
+
+ self.submitted = true;
+
+ Ok(JobSubmission::ver {
+ inner: Some(inner),
+ wptr,
+ command_count,
+ event_count: self.event_count,
+ })
+ }
+}
+
+#[versions(AGX)]
+impl<'a> JobSubmission::ver<'a> {
+ pub(crate) fn run(mut self, channel: &mut channel::PipeChannel::ver) {
+ let command_count = self.command_count;
+ let mut inner = self.inner.take().expect("No inner?");
+ let wptr = self.wptr;
+ core::mem::forget(self);
+
+ inner
+ .info
+ .state
+ .with(|raw, _inner| raw.cpu_wptr.store(wptr, Ordering::Release));
+
+ inner.wptr = wptr;
+
+ let event = inner.event.as_mut().expect("JobSubmission lost its event");
+
+ let event_slot = event.0.slot();
+
+ let msg = fw::channels::RunWorkQueueMsg::ver {
+ pipe_type: inner.pipe_type,
+ work_queue: Some(inner.info.weak_pointer()),
+ wptr: inner.wptr,
+ event_slot,
+ is_new: inner.new,
+ __pad: Default::default(),
+ };
+ channel.send(&msg);
+ inner.new = false;
+
+ inner.submit_seq += command_count as u64;
+ }
+
+ pub(crate) fn pipe_type(&self) -> PipeType {
+ self.inner.as_ref().expect("No inner?").pipe_type
+ }
+
+ pub(crate) fn priority(&self) -> u32 {
+ self.inner.as_ref().expect("No inner?").priority
+ }
+}
+
+#[versions(AGX)]
+impl Drop for Job::ver {
+ fn drop(&mut self) {
+ mod_pr_debug!("WorkQueue: Dropping Job\n");
+ let mut inner = self.wq.inner.lock();
+
+ if self.committed && !self.submitted {
+ let pipe_type = inner.pipe_type;
+ let event = inner.event.as_mut().expect("Job lost its event");
+ mod_pr_debug!(
+ "WorkQueue({:?}): Roll back {} events (slot {} val {:#x?}) and {} commands\n",
+ pipe_type,
+ self.event_count,
+ event.0.slot(),
+ event.1,
+ self.pending.len()
+ );
+ event.1.sub(self.event_count as u32);
+ inner.commit_seq -= self.pending.len() as u64;
+ }
+
+ inner.pending_jobs -= 1;
+
+ if inner.pending.is_empty() && inner.pending_jobs == 0 {
+ mod_pr_debug!("WorkQueue({:?}): Dropping event\n", inner.pipe_type);
+ inner.event = None;
+ inner.last_submitted = None;
+ inner.last_completed = None;
+ }
+ mod_pr_debug!("WorkQueue({:?}): Dropped Job\n", inner.pipe_type);
+ }
+}
+
+#[versions(AGX)]
+impl<'a> Drop for JobSubmission::ver<'a> {
+ fn drop(&mut self) {
+ let inner = self.inner.as_mut().expect("No inner?");
+ mod_pr_debug!("WorkQueue({:?}): Dropping JobSubmission\n", inner.pipe_type);
+
+ let new_len = inner.pending.len() - self.command_count;
+ inner.pending.truncate(new_len);
+
+ let pipe_type = inner.pipe_type;
+ let event = inner.event.as_mut().expect("JobSubmission lost its event");
+ mod_pr_debug!(
+ "WorkQueue({:?}): Roll back {} events (slot {} val {:#x?}) and {} commands\n",
+ pipe_type,
+ self.event_count,
+ event.0.slot(),
+ event.1,
+ self.command_count
+ );
+ event.1.sub(self.event_count as u32);
+ inner.commit_seq -= self.command_count as u64;
+ mod_pr_debug!("WorkQueue({:?}): Dropped JobSubmission\n", inner.pipe_type);
+ }
+}
+
+#[versions(AGX)]
+impl WorkQueueInner::ver {
+ /// Return the number of free entries in the workqueue
+ pub(crate) fn free_space(&self) -> usize {
+ self.size as usize - self.pending.len() - 1
+ }
+
+ pub(crate) fn free_slots(&self) -> usize {
+ let busy_slots = if let Some(ls) = self.last_submitted {
+ let lc = self
+ .last_completed
+ .expect("last_submitted but not completed?");
+ ls.delta(&lc)
+ } else {
+ 0
+ };
+
+ ((MAX_JOB_SLOTS as i32) - busy_slots).max(0) as usize
+ }
+}
+
+#[versions(AGX)]
+impl WorkQueue::ver {
+ /// Create a new WorkQueue of a given type and priority.
+ #[allow(clippy::too_many_arguments)]
+ pub(crate) fn new(
+ alloc: &mut gpu::KernelAllocators,
+ event_manager: Arc<event::EventManager>,
+ gpu_context: Arc<GpuContext>,
+ notifier_list: Arc<GpuObject<fw::event::NotifierList>>,
+ pipe_type: PipeType,
+ id: u64,
+ priority: u32,
+ size: u32,
+ ) -> Result<Arc<WorkQueue::ver>> {
+ let mut info = box_in_place!(QueueInfo::ver {
+ state: alloc.shared.new_default::<RingState>()?,
+ ring: alloc.shared.array_empty(size as usize)?,
+ gpu_buf: alloc.private.array_empty(0x2c18)?,
+ notifier_list: notifier_list,
+ gpu_context: gpu_context,
+ })?;
+
+ info.state.with_mut(|raw, _inner| {
+ raw.rb_size = size;
+ });
+
+ let inner = WorkQueueInner::ver {
+ event_manager,
+ info: alloc.private.new_boxed(info, |inner, ptr| {
+ Ok(place!(
+ ptr,
+ raw::QueueInfo::ver {
+ state: inner.state.gpu_pointer(),
+ ring: inner.ring.gpu_pointer(),
+ notifier_list: inner.notifier_list.gpu_pointer(),
+ gpu_buf: inner.gpu_buf.gpu_pointer(),
+ gpu_rptr1: Default::default(),
+ gpu_rptr2: Default::default(),
+ gpu_rptr3: Default::default(),
+ event_id: AtomicI32::new(-1),
+ priority: *raw::PRIORITY.get(priority as usize).ok_or(EINVAL)?,
+ unk_4c: -1,
+ uuid: id as u32,
+ unk_54: -1,
+ unk_58: Default::default(),
+ busy: Default::default(),
+ __pad: Default::default(),
+ unk_84_state: Default::default(),
+ unk_88: 0,
+ unk_8c: 0,
+ unk_90: 0,
+ unk_94: 0,
+ pending: Default::default(),
+ unk_9c: 0,
+ #[ver(V >= V13_2)]
+ unk_a0_0: 0,
+ gpu_context: inner.gpu_context.gpu_pointer(),
+ unk_a8: Default::default(),
+ #[ver(V >= V13_2)]
+ unk_b0: 0,
+ }
+ ))
+ })?,
+ new: true,
+ pipe_type,
+ size,
+ wptr: 0,
+ pending: Vec::new(),
+ last_token: None,
+ event: None,
+ priority,
+ pending_jobs: 0,
+ commit_seq: 0,
+ submit_seq: 0,
+ last_completed: None,
+ last_submitted: None,
+ };
+
+ let mut queue = Pin::from(UniqueArc::try_new(Self {
+ info_pointer: inner.info.weak_pointer(),
+ // SAFETY: `mutex_init!` is called below.
+ inner: unsafe { Mutex::new(inner) },
+ })?);
+
+ // SAFETY: `inner` is pinned when `queue` is.
+ let pinned = unsafe { queue.as_mut().map_unchecked_mut(|s| &mut s.inner) };
+ match pipe_type {
+ PipeType::Vertex => kernel::mutex_init!(pinned, "WorkQueue::inner (Vertex)"),
+ PipeType::Fragment => kernel::mutex_init!(pinned, "WorkQueue::inner (Fragment)"),
+ PipeType::Compute => kernel::mutex_init!(pinned, "WorkQueue::inner (Compute)"),
+ }
+
+ Ok(queue.into())
+ }
+
+ pub(crate) fn event_info(&self) -> Option<QueueEventInfo::ver> {
+ let inner = self.inner.lock();
+
+ inner.event.as_ref().map(|ev| QueueEventInfo::ver {
+ stamp_pointer: ev.0.stamp_pointer(),
+ fw_stamp_pointer: ev.0.fw_stamp_pointer(),
+ slot: ev.0.slot(),
+ value: ev.1,
+ cmd_seq: inner.commit_seq,
+ info_ptr: self.info_pointer,
+ })
+ }
+
+ pub(crate) fn new_job(self: &Arc<Self>) -> Result<Job::ver> {
+ let mut inner = self.inner.lock();
+
+ if inner.event.is_none() {
+ mod_pr_debug!("WorkQueue({:?}): Grabbing event\n", inner.pipe_type);
+ let event = inner.event_manager.get(inner.last_token, self.clone())?;
+ let cur = event.current();
+ inner.last_token = Some(event.token());
+ mod_pr_debug!(
+ "WorkQueue({:?}): Grabbed event slot {}: {:#x?}\n",
+ inner.pipe_type,
+ event.slot(),
+ cur
+ );
+ inner.event = Some((event, cur));
+ inner.last_submitted = Some(cur);
+ inner.last_completed = Some(cur);
+ }
+
+ inner.pending_jobs += 1;
+
+ let ev = &inner.event.as_ref().unwrap();
+
+ mod_pr_debug!("WorkQueue({:?}): New job\n", inner.pipe_type);
+ Ok(Job::ver {
+ wq: self.clone(),
+ event_info: QueueEventInfo::ver {
+ stamp_pointer: ev.0.stamp_pointer(),
+ fw_stamp_pointer: ev.0.fw_stamp_pointer(),
+ slot: ev.0.slot(),
+ value: ev.1,
+ cmd_seq: inner.commit_seq,
+ info_ptr: self.info_pointer,
+ },
+ start_value: ev.1,
+ pending: Vec::new(),
+ event_count: 0,
+ committed: false,
+ submitted: false,
+ })
+ }
+
+ /// Return the number of free entries in the workqueue
+ pub(crate) fn free_space(&self) -> usize {
+ self.inner.lock().free_space()
+ }
+
+ /// Return the number of free job slots in the workqueue
+ pub(crate) fn free_slots(&self) -> usize {
+ self.inner.lock().free_slots()
+ }
+
+ pub(crate) fn pipe_type(&self) -> PipeType {
+ self.inner.lock().pipe_type
+ }
+}
+
+/// Trait used to erase the version-specific type of WorkQueues, to avoid leaking
+/// version-specificity into the event module.
+pub(crate) trait WorkQueue {
+ fn signal(&self) -> bool;
+ fn mark_error(&self, value: event::EventValue, error: WorkError);
+ fn fail_all(&self, error: WorkError);
+}
+
+#[versions(AGX)]
+impl WorkQueue for WorkQueue::ver {
+ /// Signal a workqueue that some work was completed.
+ ///
+ /// This will check the event stamp value to find out exactly how many commands were processed.
+ fn signal(&self) -> bool {
+ let mut inner = self.inner.lock();
+ let event = inner.event.as_ref();
+ let value = match event {
+ None => {
+ pr_err!("WorkQueue: signal() called but no event?\n");
+ return true;
+ }
+ Some(event) => event.0.current(),
+ };
+
+ inner.last_completed = Some(value);
+
+ mod_pr_debug!(
+ "WorkQueue({:?}): Signaling event {:?} value {:#x?}\n",
+ inner.pipe_type,
+ inner.last_token,
+ value
+ );
+
+ let mut completed_commands: usize = 0;
+
+ for cmd in inner.pending.iter() {
+ if cmd.value() <= value {
+ mod_pr_debug!(
+ "WorkQueue({:?}): Command at value {:#x?} complete\n",
+ inner.pipe_type,
+ cmd.value()
+ );
+ completed_commands += 1;
+ } else {
+ break;
+ }
+ }
+
+ if completed_commands == 0 {
+ return inner.pending.is_empty();
+ }
+
+ let mut completed = Vec::new();
+
+ if completed.try_reserve(completed_commands).is_err() {
+ pr_crit!(
+ "WorkQueue({:?}): Failed to allocated space for {} completed commands\n",
+ inner.pipe_type,
+ completed_commands
+ );
+ }
+
+ let pipe_type = inner.pipe_type;
+
+ for cmd in inner.pending.drain(..completed_commands) {
+ if completed.try_push(cmd).is_err() {
+ pr_crit!(
+ "WorkQueue({:?}): Failed to signal a completed command\n",
+ pipe_type,
+ );
+ }
+ }
+
+ mod_pr_debug!(
+ "WorkQueue({:?}): Completed {} commands\n",
+ inner.pipe_type,
+ completed_commands
+ );
+
+ if let Some(i) = completed.last() {
+ inner
+ .info
+ .state
+ .with(|raw, _inner| raw.cpu_freeptr.store(i.wptr(), Ordering::Release));
+ }
+
+ let empty = inner.pending.is_empty();
+ if empty && inner.pending_jobs == 0 {
+ inner.event = None;
+ inner.last_submitted = None;
+ inner.last_completed = None;
+ }
+
+ core::mem::drop(inner);
+
+ for cmd in completed {
+ cmd.complete();
+ }
+
+ empty
+ }
+
+ /// Mark this queue's work up to a certain stamp value as having failed.
+ fn mark_error(&self, value: event::EventValue, error: WorkError) {
+ // If anything is marked completed, we can consider it successful
+ // at this point, even if we didn't get the signal event yet.
+ self.signal();
+
+ let mut inner = self.inner.lock();
+
+ if inner.event.is_none() {
+ pr_err!("WorkQueue: signal_fault() called but no event?\n");
+ return;
+ }
+
+ mod_pr_debug!(
+ "WorkQueue({:?}): Signaling fault for event {:?} at value {:#x?}\n",
+ inner.pipe_type,
+ inner.last_token,
+ value
+ );
+
+ for cmd in inner.pending.iter_mut() {
+ if cmd.value() <= value {
+ cmd.mark_error(error);
+ } else {
+ break;
+ }
+ }
+ }
+
+ /// Mark all of this queue's work as having failed, and complete it.
+ fn fail_all(&self, error: WorkError) {
+ // If anything is marked completed, we can consider it successful
+ // at this point, even if we didn't get the signal event yet.
+ self.signal();
+
+ let mut inner = self.inner.lock();
+
+ if inner.event.is_none() {
+ pr_err!("WorkQueue: fail_all() called but no event?\n");
+ return;
+ }
+
+ mod_pr_debug!(
+ "WorkQueue({:?}): Failing all jobs {:?}\n",
+ inner.pipe_type,
+ error
+ );
+
+ let mut cmds = Vec::new();
+
+ core::mem::swap(&mut inner.pending, &mut cmds);
+
+ if inner.pending_jobs == 0 {
+ inner.event = None;
+ }
+
+ core::mem::drop(inner);
+
+ for mut cmd in cmds {
+ cmd.mark_error(error);
+ cmd.complete();
+ }
+ }
+}
+
+#[versions(AGX)]
+impl Drop for WorkQueue::ver {
+ fn drop(&mut self) {
+ mod_pr_debug!("WorkQueue({:?}): Dropping\n", self.inner.lock().pipe_type);
+ }
+}
diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c
index b8db675e7fb5..3f27ebe4f40e 100644
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@@ -166,6 +166,7 @@ void drm_gem_private_object_init(struct drm_device *dev,
drm_vma_node_reset(&obj->vma_node);
INIT_LIST_HEAD(&obj->lru_node);
+ obj->exportable = true;
}
EXPORT_SYMBOL(drm_gem_private_object_init);
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index b602cd72a120..427d28287da8 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -537,7 +537,7 @@ int drm_gem_shmem_dumb_create(struct drm_file *file, struct drm_device *dev,
}
EXPORT_SYMBOL_GPL(drm_gem_shmem_dumb_create);
-static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
+vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
struct drm_gem_object *obj = vma->vm_private_data;
@@ -566,8 +566,9 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
return ret;
}
+EXPORT_SYMBOL_GPL(drm_gem_shmem_fault);
-static void drm_gem_shmem_vm_open(struct vm_area_struct *vma)
+void drm_gem_shmem_vm_open(struct vm_area_struct *vma)
{
struct drm_gem_object *obj = vma->vm_private_data;
struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
@@ -588,8 +589,9 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma)
drm_gem_vm_open(vma);
}
+EXPORT_SYMBOL_GPL(drm_gem_shmem_vm_open);
-static void drm_gem_shmem_vm_close(struct vm_area_struct *vma)
+void drm_gem_shmem_vm_close(struct vm_area_struct *vma)
{
struct drm_gem_object *obj = vma->vm_private_data;
struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
@@ -597,6 +599,7 @@ static void drm_gem_shmem_vm_close(struct vm_area_struct *vma)
drm_gem_shmem_put_pages(shmem);
drm_gem_vm_close(vma);
}
+EXPORT_SYMBOL_GPL(drm_gem_shmem_vm_close);
const struct vm_operations_struct drm_gem_shmem_vm_ops = {
.fault = drm_gem_shmem_fault,
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index f924b8b4ab6b..9d2dd982580e 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -391,6 +391,11 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev,
return dmabuf;
}
+ if (!obj->exportable) {
+ dmabuf = ERR_PTR(-EINVAL);
+ return dmabuf;
+ }
+
if (obj->funcs && obj->funcs->export)
dmabuf = obj->funcs->export(obj, flags);
else
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index fd22d753b4ed..ae2f7c7343f2 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -984,6 +984,16 @@ static int drm_sched_main(void *param)
if (!entity)
continue;
+ if (sched->ops->can_run_job) {
+ sched_job = to_drm_sched_job(spsc_queue_peek(&entity->job_queue));
+ if (!sched_job) {
+ complete_all(&entity->entity_idle);
+ continue;
+ }
+ if (!sched->ops->can_run_job(sched_job))
+ continue;
+ }
+
sched_job = drm_sched_entity_pop_job(entity);
if (!sched_job) {
@@ -1092,10 +1102,33 @@ EXPORT_SYMBOL(drm_sched_init);
void drm_sched_fini(struct drm_gpu_scheduler *sched)
{
struct drm_sched_entity *s_entity;
+ struct drm_sched_job *s_job, *tmp;
int i;
- if (sched->thread)
- kthread_stop(sched->thread);
+ if (!sched->thread)
+ return;
+
+ /*
+ * Stop the scheduler, detaching all jobs from their hardware callbacks
+ * and cleaning up complete jobs.
+ */
+ drm_sched_stop(sched, NULL);
+
+ /*
+ * Iterate through the pending job list and free all jobs.
+ * This assumes the driver has either guaranteed jobs are already stopped, or that
+ * otherwise it is responsible for keeping any necessary data structures for
+ * in-progress jobs alive even when the free_job() callback is called early (e.g. by
+ * putting them in its own queue or doing its own refcounting).
+ */
+ list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
+ spin_lock(&sched->job_list_lock);
+ list_del_init(&s_job->list);
+ spin_unlock(&sched->job_list_lock);
+ sched->ops->free_job(s_job);
+ }
+
+ kthread_stop(sched->thread);
for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
struct drm_sched_rq *rq = &sched->sched_rq[i];
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 72dcdd468cf3..85f35035ae95 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -130,6 +130,15 @@
#define ARM_MALI_LPAE_MEMATTR_IMP_DEF 0x88ULL
#define ARM_MALI_LPAE_MEMATTR_WRITE_ALLOC 0x8DULL
+#define APPLE_UAT_MEMATTR_PRIV (((arm_lpae_iopte)0x0) << 2)
+#define APPLE_UAT_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2)
+#define APPLE_UAT_MEMATTR_SHARED (((arm_lpae_iopte)0x2) << 2)
+#define APPLE_UAT_GPU_ACCESS (((arm_lpae_iopte)1) << 55)
+#define APPLE_UAT_UXN (((arm_lpae_iopte)1) << 54)
+#define APPLE_UAT_PXN (((arm_lpae_iopte)1) << 53)
+#define APPLE_UAT_AP1 (((arm_lpae_iopte)1) << 7)
+#define APPLE_UAT_AP0 (((arm_lpae_iopte)1) << 6)
+
/* IOPTE accessors */
#define iopte_deref(pte,d) __va(iopte_to_paddr(pte, d))
@@ -402,7 +411,42 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
{
arm_lpae_iopte pte;
- if (data->iop.fmt == ARM_64_LPAE_S1 ||
+ if (data->iop.fmt == APPLE_UAT) {
+ /*
+ * This bit enables GPU access and the particular permission
+ * rules that follow. Without it, access is firmware-only and
+ * permissions follow the firmware's Apple SPRR configuration.
+ */
+ pte = APPLE_UAT_GPU_ACCESS;
+ if (prot & IOMMU_PRIV) {
+ /* Firmware structures */
+ pte |= APPLE_UAT_AP0;
+ if (prot & IOMMU_WRITE) {
+ /* Firmware RW */
+ pte |= APPLE_UAT_UXN;
+ } else if (!(prot & IOMMU_READ)) {
+ /* No access */
+ pte |= APPLE_UAT_PXN;
+ }
+ } else if (prot & IOMMU_NOEXEC) {
+ /* GPU structures (no FW access) */
+ pte |= APPLE_UAT_AP1 | ARM_LPAE_PTE_nG;
+ if (!(prot & IOMMU_READ)) {
+ pte |= APPLE_UAT_PXN;
+ if (!(prot & IOMMU_WRITE))
+ pte |= APPLE_UAT_UXN;
+ } else if (prot & IOMMU_WRITE) {
+ pte |= APPLE_UAT_UXN;
+ }
+ } else {
+ pte |= ARM_LPAE_PTE_nG;
+ /* GPU structures (also FW accessible) */
+ if (prot & IOMMU_WRITE)
+ pte |= APPLE_UAT_UXN;
+ if (prot & IOMMU_READ)
+ pte |= APPLE_UAT_PXN;
+ }
+ } else if (data->iop.fmt == ARM_64_LPAE_S1 ||
data->iop.fmt == ARM_32_LPAE_S1) {
pte = ARM_LPAE_PTE_nG;
if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ))
@@ -421,7 +465,14 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
* Note that this logic is structured to accommodate Mali LPAE
* having stage-1-like attributes but stage-2-like permissions.
*/
- if (data->iop.fmt == ARM_64_LPAE_S2 ||
+ if (data->iop.fmt == APPLE_UAT) {
+ if (prot & IOMMU_MMIO)
+ pte |= APPLE_UAT_MEMATTR_DEV;
+ else if (prot & IOMMU_CACHE)
+ pte |= APPLE_UAT_MEMATTR_SHARED;
+ else
+ pte |= APPLE_UAT_MEMATTR_PRIV;
+ } else if (data->iop.fmt == ARM_64_LPAE_S2 ||
data->iop.fmt == ARM_32_LPAE_S2) {
if (prot & IOMMU_MMIO)
pte |= ARM_LPAE_PTE_MEMATTR_DEV;
@@ -444,12 +495,14 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
* "outside the GPU" (i.e. either the Inner or System domain in CPU
* terms, depending on coherency).
*/
- if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE)
+ if (data->iop.fmt == APPLE_UAT)
+ pte |= ARM_LPAE_PTE_SH_NS;
+ else if (prot & IOMMU_CACHE && data->iop.fmt != ARM_MALI_LPAE)
pte |= ARM_LPAE_PTE_SH_IS;
else
pte |= ARM_LPAE_PTE_SH_OS;
- if (prot & IOMMU_NOEXEC)
+ if (prot & IOMMU_NOEXEC && data->iop.fmt != APPLE_UAT)
pte |= ARM_LPAE_PTE_XN;
if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_NS)
@@ -1079,6 +1132,41 @@ out_free_data:
return NULL;
}
+static struct io_pgtable *
+apple_uat_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+ struct arm_lpae_io_pgtable *data;
+
+ /* No quirks for UAT (hopefully) */
+ if (cfg->quirks)
+ return NULL;
+
+ if (cfg->ias > 48 || cfg->oas > 42)
+ return NULL;
+
+ cfg->pgsize_bitmap &= SZ_16K;
+
+ data = arm_lpae_alloc_pgtable(cfg);
+ if (!data)
+ return NULL;
+
+ data->pgd = __arm_lpae_alloc_pages(ARM_LPAE_PGD_SIZE(data), GFP_KERNEL,
+ cfg);
+ if (!data->pgd)
+ goto out_free_data;
+
+ /* Ensure the empty pgd is visible before the TTBAT can be written */
+ wmb();
+
+ cfg->apple_uat_cfg.ttbr = virt_to_phys(data->pgd);
+
+ return &data->iop;
+
+out_free_data:
+ kfree(data);
+ return NULL;
+}
+
struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s1_init_fns = {
.alloc = arm_64_lpae_alloc_pgtable_s1,
.free = arm_lpae_free_pgtable,
@@ -1104,6 +1192,11 @@ struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns = {
.free = arm_lpae_free_pgtable,
};
+struct io_pgtable_init_fns io_pgtable_apple_uat_init_fns = {
+ .alloc = apple_uat_alloc_pgtable,
+ .free = arm_lpae_free_pgtable,
+};
+
#ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE_SELFTEST
static struct io_pgtable_cfg *cfg_cookie __initdata;
diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c
index b843fcd365d2..faec53e22388 100644
--- a/drivers/iommu/io-pgtable.c
+++ b/drivers/iommu/io-pgtable.c
@@ -20,6 +20,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = {
[ARM_64_LPAE_S1] = &io_pgtable_arm_64_lpae_s1_init_fns,
[ARM_64_LPAE_S2] = &io_pgtable_arm_64_lpae_s2_init_fns,
[ARM_MALI_LPAE] = &io_pgtable_arm_mali_lpae_init_fns,
+ [APPLE_UAT] = &io_pgtable_apple_uat_init_fns,
#endif
#ifdef CONFIG_IOMMU_IO_PGTABLE_DART
[APPLE_DART] = &io_pgtable_apple_dart_init_fns,
diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index a17c2f903f81..58e63d504640 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h
@@ -361,6 +361,14 @@ struct drm_gem_object {
* The current LRU list that the GEM object is on.
*/
struct drm_gem_lru *lru;
+
+ /**
+ * @exportable:
+ *
+ * Whether this GEM object can be exported via the drm_gem_object_funcs->export
+ * callback. Defaults to true.
+ */
+ bool exportable;
};
/**
diff --git a/include/drm/drm_gem_shmem_helper.h b/include/drm/drm_gem_shmem_helper.h
index a2201b2488c5..b9f349b3ed76 100644
--- a/include/drm/drm_gem_shmem_helper.h
+++ b/include/drm/drm_gem_shmem_helper.h
@@ -138,6 +138,9 @@ void drm_gem_shmem_print_info(const struct drm_gem_shmem_object *shmem,
struct drm_printer *p, unsigned int indent);
extern const struct vm_operations_struct drm_gem_shmem_vm_ops;
+vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf);
+void drm_gem_shmem_vm_open(struct vm_area_struct *vma);
+void drm_gem_shmem_vm_close(struct vm_area_struct *vma);
/*
* GEM object functions
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
index ca857ec9e7eb..ad7e9aff6a7d 100644
--- a/include/drm/gpu_scheduler.h
+++ b/include/drm/gpu_scheduler.h
@@ -390,6 +390,14 @@ struct drm_sched_backend_ops {
struct drm_sched_entity *s_entity);
/**
+ * @can_run_job: Called before job execution to check whether the
+ * hardware is free enough to run the job. This can be used to
+ * implement more complex hardware resource policies than the
+ * hw_submission limit.
+ */
+ bool (*can_run_job)(struct drm_sched_job *sched_job);
+
+ /**
* @run_job: Called to execute the job once all of the dependencies
* have been resolved. This may be called multiple times, if
* timedout_job() has happened and drm_sched_job_recovery()
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index e4697ff48d3a..50d4dce53ded 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -19,6 +19,7 @@ enum io_pgtable_fmt {
AMD_IOMMU_V2,
APPLE_DART,
APPLE_DART2,
+ APPLE_UAT,
IO_PGTABLE_NUM_FMTS,
};
@@ -148,6 +149,10 @@ struct io_pgtable_cfg {
u64 ttbr[4];
u32 n_ttbrs;
} apple_dart_cfg;
+
+ struct {
+ u64 ttbr;
+ } apple_uat_cfg;
};
};
@@ -262,5 +267,6 @@ extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns;
extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns;
extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns;
extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns;
+extern struct io_pgtable_init_fns io_pgtable_apple_uat_init_fns;
#endif /* __IO_PGTABLE_H */
diff --git a/include/uapi/drm/asahi_drm.h b/include/uapi/drm/asahi_drm.h
new file mode 100644
index 000000000000..084a80220c5b
--- /dev/null
+++ b/include/uapi/drm/asahi_drm.h
@@ -0,0 +1,560 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) The Asahi Linux Contributors
+ *
+ * Based on asahi_drm.h which is
+ *
+ * Copyright © 2014-2018 Broadcom
+ * Copyright © 2019 Collabora ltd.
+ */
+#ifndef _ASAHI_DRM_H_
+#define _ASAHI_DRM_H_
+
+#include "drm.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#define DRM_ASAHI_UNSTABLE_UABI_VERSION 10006
+
+#define DRM_ASAHI_GET_PARAMS 0x00
+#define DRM_ASAHI_VM_CREATE 0x01
+#define DRM_ASAHI_VM_DESTROY 0x02
+#define DRM_ASAHI_GEM_CREATE 0x03
+#define DRM_ASAHI_GEM_MMAP_OFFSET 0x04
+#define DRM_ASAHI_GEM_BIND 0x05
+#define DRM_ASAHI_QUEUE_CREATE 0x06
+#define DRM_ASAHI_QUEUE_DESTROY 0x07
+#define DRM_ASAHI_SUBMIT 0x08
+#define DRM_ASAHI_GET_TIME 0x09
+
+#define DRM_ASAHI_MAX_CLUSTERS 32
+
+struct drm_asahi_params_global {
+ __u32 unstable_uabi_version;
+ __u32 pad0;
+
+ __u64 feat_compat;
+ __u64 feat_incompat;
+
+ __u32 gpu_generation;
+ __u32 gpu_variant;
+ __u32 gpu_revision;
+ __u32 chip_id;
+
+ __u32 num_dies;
+ __u32 num_clusters_total;
+ __u32 num_cores_per_cluster;
+ __u32 num_frags_per_cluster;
+ __u32 num_gps_per_cluster;
+ __u32 num_cores_total_active;
+ __u64 core_masks[DRM_ASAHI_MAX_CLUSTERS];
+
+ __u32 vm_page_size;
+ __u32 pad1;
+ __u64 vm_user_start;
+ __u64 vm_user_end;
+ __u64 vm_shader_start;
+ __u64 vm_shader_end;
+
+ __u32 max_syncs_per_submission;
+ __u32 max_commands_per_submission;
+ __u32 max_commands_in_flight;
+ __u32 max_attachments;
+
+ __u32 timer_frequency_hz;
+ __u32 min_frequency_khz;
+ __u32 max_frequency_khz;
+ __u32 max_power_mw;
+
+ __u32 result_render_size;
+ __u32 result_compute_size;
+};
+
+/*
+enum drm_asahi_feat_compat {
+};
+*/
+
+enum drm_asahi_feat_incompat {
+ DRM_ASAHI_FEAT_MANDATORY_ZS_COMPRESSION = (1UL) << 0,
+};
+
+struct drm_asahi_get_params {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @param: Parameter group to fetch (MBZ) */
+ __u32 param_group;
+
+ /** @pad: MBZ */
+ __u32 pad;
+
+ /** @value: User pointer to write parameter struct */
+ __u64 pointer;
+
+ /** @value: Size of user buffer, max size supported on return */
+ __u64 size;
+};
+
+struct drm_asahi_vm_create {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @value: Returned VM ID */
+ __u32 vm_id;
+
+ /** @pad: MBZ */
+ __u32 pad;
+};
+
+struct drm_asahi_vm_destroy {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @value: VM ID to be destroyed */
+ __u32 vm_id;
+
+ /** @pad: MBZ */
+ __u32 pad;
+};
+
+#define ASAHI_GEM_WRITEBACK (1L << 0)
+#define ASAHI_GEM_VM_PRIVATE (1L << 1)
+
+struct drm_asahi_gem_create {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @size: Size of the BO */
+ __u64 size;
+
+ /** @flags: BO creation flags */
+ __u32 flags;
+
+ /** @handle: VM ID to assign to the BO, if ASAHI_GEM_VM_PRIVATE is set. */
+ __u32 vm_id;
+
+ /** @handle: Returned GEM handle for the BO */
+ __u32 handle;
+};
+
+struct drm_asahi_gem_mmap_offset {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @handle: Handle for the object being mapped. */
+ __u32 handle;
+
+ /** @flags: Must be zero */
+ __u32 flags;
+
+ /** @offset: The fake offset to use for subsequent mmap call */
+ __u64 offset;
+};
+
+enum drm_asahi_bind_op {
+ ASAHI_BIND_OP_BIND = 0,
+ ASAHI_BIND_OP_UNBIND = 1,
+ ASAHI_BIND_OP_UNBIND_ALL = 2,
+};
+
+#define ASAHI_BIND_READ (1L << 0)
+#define ASAHI_BIND_WRITE (1L << 1)
+
+struct drm_asahi_gem_bind {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @obj: Bind operation */
+ __u32 op;
+
+ /** @flags: One or more of ASAHI_BIND_* */
+ __u32 flags;
+
+ /** @obj: GEM object to bind */
+ __u32 handle;
+
+ /** @vm_id: The ID of the VM to bind to */
+ __u32 vm_id;
+
+ /** @offset: Offset into the object */
+ __u64 offset;
+
+ /** @range: Number of bytes from the object to bind to addr */
+ __u64 range;
+
+ /** @addr: Address to bind to */
+ __u64 addr;
+};
+
+enum drm_asahi_cmd_type {
+ DRM_ASAHI_CMD_RENDER = 0,
+ DRM_ASAHI_CMD_BLIT = 1,
+ DRM_ASAHI_CMD_COMPUTE = 2,
+};
+
+/* Note: this is an enum so that it can be resolved by Rust bindgen. */
+enum drm_asahi_queue_cap {
+ DRM_ASAHI_QUEUE_CAP_RENDER = (1UL << DRM_ASAHI_CMD_RENDER),
+ DRM_ASAHI_QUEUE_CAP_BLIT = (1UL << DRM_ASAHI_CMD_BLIT),
+ DRM_ASAHI_QUEUE_CAP_COMPUTE = (1UL << DRM_ASAHI_CMD_COMPUTE),
+};
+
+struct drm_asahi_queue_create {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @flags: MBZ */
+ __u32 flags;
+
+ /** @vm_id: The ID of the VM this queue is bound to */
+ __u32 vm_id;
+
+ /** @type: Bitmask of DRM_ASAHI_QUEUE_CAP_* */
+ __u32 queue_caps;
+
+ /** @priority: Queue priority, 0-3 */
+ __u32 priority;
+
+ /** @queue_id: The returned queue ID */
+ __u32 queue_id;
+};
+
+struct drm_asahi_queue_destroy {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @queue_id: The queue ID to be destroyed */
+ __u32 queue_id;
+};
+
+enum drm_asahi_sync_type {
+ DRM_ASAHI_SYNC_SYNCOBJ = 0,
+ DRM_ASAHI_SYNC_TIMELINE_SYNCOBJ = 1,
+};
+
+struct drm_asahi_sync {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @sync_type: One of drm_asahi_sync_type */
+ __u32 sync_type;
+
+ /** @handle: The sync object handle */
+ __u32 handle;
+
+ /** @timeline_value: Timeline value for timeline sync objects */
+ __u64 timeline_value;
+};
+
+enum drm_asahi_subqueue {
+ DRM_ASAHI_SUBQUEUE_RENDER = 0, /* Also blit */
+ DRM_ASAHI_SUBQUEUE_COMPUTE = 1,
+ DRM_ASAHI_SUBQUEUE_COUNT = 2,
+};
+
+#define DRM_ASAHI_BARRIER_NONE ~(0U)
+
+struct drm_asahi_command {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @type: One of drm_asahi_cmd_type */
+ __u32 cmd_type;
+
+ /** @flags: Flags for command submission */
+ __u32 flags;
+
+ /** @cmdbuf: Pointer to the appropriate command buffer structure */
+ __u64 cmd_buffer;
+
+ /** @cmdbuf: Size of the command buffer structure */
+ __u64 cmd_buffer_size;
+
+ /** @cmdbuf: Offset into the result BO to return information about this command */
+ __u64 result_offset;
+
+ /** @cmdbuf: Size of the result data structure */
+ __u64 result_size;
+
+ /** @barriers: Array of command indices per subqueue to wait on */
+ __u32 barriers[DRM_ASAHI_SUBQUEUE_COUNT];
+};
+
+struct drm_asahi_submit {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @in_syncs: An optional array of drm_asahi_sync to wait on before starting this job. */
+ __u64 in_syncs;
+
+ /** @in_syncs: An optional array of drm_asahi_sync objects to signal upon completion. */
+ __u64 out_syncs;
+
+ /** @commands: Pointer to the drm_asahi_command array of commands to submit. */
+ __u64 commands;
+
+ /** @flags: Flags for command submission (MBZ) */
+ __u32 flags;
+
+ /** @queue_id: The queue ID to be submitted to */
+ __u32 queue_id;
+
+ /** @result_handle: An optional BO handle to place result data in */
+ __u32 result_handle;
+
+ /** @in_sync_count: Number of sync objects to wait on before starting this job. */
+ __u32 in_sync_count;
+
+ /** @in_sync_count: Number of sync objects to signal upon completion of this job. */
+ __u32 out_sync_count;
+
+ /** @pad: Number of commands to be submitted */
+ __u32 command_count;
+};
+
+#define ASAHI_ATTACHMENT_C 0
+#define ASAHI_ATTACHMENT_Z 1
+#define ASAHI_ATTACHMENT_S 2
+
+/* FIXME: Type doesn't make sense here */
+struct drm_asahi_attachment {
+ __u32 type;
+ __u32 size;
+ __u64 pointer;
+};
+
+#define ASAHI_RENDER_NO_CLEAR_PIPELINE_TEXTURES (1UL << 0)
+#define ASAHI_RENDER_SET_WHEN_RELOADING_Z_OR_S (1UL << 1)
+#define ASAHI_RENDER_MEMORYLESS_RTS_USED (1UL << 2)
+#define ASAHI_RENDER_PROCESS_EMPTY_TILES (1UL << 3)
+#define ASAHI_RENDER_NO_VERTEX_CLUSTERING (1UL << 4)
+
+struct drm_asahi_cmd_render {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ __u64 flags;
+
+ __u64 encoder_ptr;
+
+ __u64 attachments;
+ __u32 attachment_count;
+ __u32 pad;
+
+ __u64 depth_buffer_1;
+ __u64 depth_buffer_2;
+ __u64 depth_buffer_3;
+ __u64 depth_meta_buffer_1;
+ __u64 depth_meta_buffer_2;
+ __u64 depth_meta_buffer_3;
+
+ __u64 stencil_buffer_1;
+ __u64 stencil_buffer_2;
+ __u64 stencil_buffer_3;
+ __u64 stencil_meta_buffer_1;
+ __u64 stencil_meta_buffer_2;
+ __u64 stencil_meta_buffer_3;
+
+ __u64 scissor_array;
+ __u64 depth_bias_array;
+ __u64 visibility_result_buffer;
+
+ __u64 zls_ctrl;
+ __u64 ppp_multisamplectl;
+ __u32 ppp_ctrl;
+
+ __u32 fb_width;
+ __u32 fb_height;
+
+ __u32 utile_width;
+ __u32 utile_height;
+
+ __u32 samples;
+ __u32 layers;
+
+ __u32 encoder_id;
+ __u32 cmd_ta_id;
+ __u32 cmd_3d_id;
+
+ __u32 iogpu_unk_49;
+ __u32 iogpu_unk_212;
+ __u32 iogpu_unk_214;
+
+ __u32 merge_upper_x;
+ __u32 merge_upper_y;
+
+ __u32 load_pipeline;
+ __u32 load_pipeline_bind;
+
+ __u32 store_pipeline;
+ __u32 store_pipeline_bind;
+
+ __u32 partial_reload_pipeline;
+ __u32 partial_reload_pipeline_bind;
+
+ __u32 partial_store_pipeline;
+ __u32 partial_store_pipeline_bind;
+
+ __u32 depth_dimensions;
+ __u32 isp_bgobjdepth;
+ __u32 isp_bgobjvals;
+
+};
+
+struct drm_asahi_cmd_compute {
+ __u64 flags;
+
+ __u64 encoder_ptr;
+ __u64 encoder_end;
+
+ __u64 attachments;
+ __u32 attachment_count;
+ __u32 pad;
+
+ __u64 buffer_descriptor;
+
+ __u32 buffer_descriptor_size; /* ? */
+ __u32 ctx_switch_prog;
+
+ __u32 encoder_id;
+ __u32 cmd_id;
+
+ __u32 iogpu_unk_40;
+ __u32 iogpu_unk_44;
+};
+
+enum drm_asahi_status {
+ DRM_ASAHI_STATUS_PENDING = 0,
+ DRM_ASAHI_STATUS_COMPLETE,
+ DRM_ASAHI_STATUS_UNKNOWN_ERROR,
+ DRM_ASAHI_STATUS_TIMEOUT,
+ DRM_ASAHI_STATUS_FAULT,
+ DRM_ASAHI_STATUS_KILLED,
+ DRM_ASAHI_STATUS_NO_DEVICE,
+};
+
+enum drm_asahi_fault {
+ DRM_ASAHI_FAULT_NONE = 0,
+ DRM_ASAHI_FAULT_UNKNOWN,
+ DRM_ASAHI_FAULT_UNMAPPED,
+ DRM_ASAHI_FAULT_AF_FAULT,
+ DRM_ASAHI_FAULT_WRITE_ONLY,
+ DRM_ASAHI_FAULT_READ_ONLY,
+ DRM_ASAHI_FAULT_NO_ACCESS,
+};
+
+struct drm_asahi_result_info {
+ /** @status: One of enum drm_asahi_status */
+ __u32 status;
+
+ /** @reason: One of drm_asahi_fault_type */
+ __u32 fault_type;
+
+ /** @unit: Unit number, hardware dependent */
+ __u32 unit;
+
+ /** @sideband: Sideband information, hardware dependent */
+ __u32 sideband;
+
+ /** @level: Page table level at which the fault occurred, hardware dependent */
+ __u8 level;
+
+ /** @read: Fault was a read */
+ __u8 is_read;
+
+ /** @pad: MBZ */
+ __u16 pad;
+
+ /** @unk_5: Extra bits, hardware dependent */
+ __u32 extra;
+
+ /** @address: Fault address, cache line aligned */
+ __u64 address;
+};
+
+#define DRM_ASAHI_RESULT_RENDER_TVB_GROW_OVF (1UL << 0)
+#define DRM_ASAHI_RESULT_RENDER_TVB_GROW_MIN (1UL << 1)
+#define DRM_ASAHI_RESULT_RENDER_TVB_OVERFLOWED (1UL << 2)
+
+struct drm_asahi_result_render {
+ /** @address: Common result information */
+ struct drm_asahi_result_info info;
+
+ /** @flags: Zero or more of of DRM_ASAHI_RESULT_RENDER_* */
+ __u64 flags;
+
+ /** @vertex_ts_start: Timestamp of the start of vertex processing */
+ __u64 vertex_ts_start;
+
+ /** @vertex_ts_end: Timestamp of the end of vertex processing */
+ __u64 vertex_ts_end;
+
+ /** @fragment_ts_start: Timestamp of the start of fragment processing */
+ __u64 fragment_ts_start;
+
+ /** @fragment_ts_end: Timestamp of the end of fragment processing */
+ __u64 fragment_ts_end;
+
+ /** @tvb_size_bytes: TVB size at the start of this render */
+ __u64 tvb_size_bytes;
+
+ /** @tvb_usage_bytes: Total TVB usage in bytes for this render */
+ __u64 tvb_usage_bytes;
+
+ /** @num_tvb_overflows: Number of TVB overflows that occurred for this render */
+ __u32 num_tvb_overflows;
+};
+
+struct drm_asahi_result_compute {
+ /** @address: Common result information */
+ struct drm_asahi_result_info info;
+
+ /** @flags: Zero or more of of DRM_ASAHI_RESULT_COMPUTE_* */
+ __u64 flags;
+
+ /** @ts_start: Timestamp of the start of this compute command */
+ __u64 ts_start;
+
+ /** @vertex_ts_end: Timestamp of the end of this compute command */
+ __u64 ts_end;
+};
+
+struct drm_asahi_get_time {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @flags: MBZ. */
+ __u64 flags;
+
+ /** @tv_sec: On return, seconds part of a point in time */
+ __s64 tv_sec;
+
+ /** @tv_nsec: On return, nanoseconds part of a point in time */
+ __s64 tv_nsec;
+
+ /** @gpu_timestamp: On return, the GPU timestamp at that point in time */
+ __u64 gpu_timestamp;
+};
+
+/* Note: this is an enum so that it can be resolved by Rust bindgen. */
+enum {
+ DRM_IOCTL_ASAHI_GET_PARAMS = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_GET_PARAMS, struct drm_asahi_get_params),
+ DRM_IOCTL_ASAHI_VM_CREATE = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_VM_CREATE, struct drm_asahi_vm_create),
+ DRM_IOCTL_ASAHI_VM_DESTROY = DRM_IOW(DRM_COMMAND_BASE + DRM_ASAHI_VM_DESTROY, struct drm_asahi_vm_destroy),
+ DRM_IOCTL_ASAHI_GEM_CREATE = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_GEM_CREATE, struct drm_asahi_gem_create),
+ DRM_IOCTL_ASAHI_GEM_MMAP_OFFSET = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_GEM_MMAP_OFFSET, struct drm_asahi_gem_mmap_offset),
+ DRM_IOCTL_ASAHI_GEM_BIND = DRM_IOW(DRM_COMMAND_BASE + DRM_ASAHI_GEM_BIND, struct drm_asahi_gem_bind),
+ DRM_IOCTL_ASAHI_QUEUE_CREATE = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_QUEUE_CREATE, struct drm_asahi_queue_create),
+ DRM_IOCTL_ASAHI_QUEUE_DESTROY = DRM_IOW(DRM_COMMAND_BASE + DRM_ASAHI_QUEUE_DESTROY, struct drm_asahi_queue_destroy),
+ DRM_IOCTL_ASAHI_SUBMIT = DRM_IOW(DRM_COMMAND_BASE + DRM_ASAHI_SUBMIT, struct drm_asahi_submit),
+ DRM_IOCTL_ASAHI_GET_TIME = DRM_IOWR(DRM_COMMAND_BASE + DRM_ASAHI_GET_TIME, struct drm_asahi_get_time),
+};
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* _ASAHI_DRM_H_ */
diff --git a/rust/Makefile b/rust/Makefile
index ff70c4c916f8..aee0701acf02 100644
--- a/rust/Makefile
+++ b/rust/Makefile
@@ -50,6 +50,7 @@ core-cfgs = \
--cfg no_fp_fmt_parse
alloc-cfgs = \
+ --cfg no_borrow \
--cfg no_fmt \
--cfg no_global_oom_handling \
--cfg no_macros \
@@ -262,6 +263,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \
# Derived from `scripts/Makefile.clang`.
BINDGEN_TARGET_x86 := x86_64-linux-gnu
+BINDGEN_TARGET_arm64 := aarch64-linux-gnu
BINDGEN_TARGET := $(BINDGEN_TARGET_$(SRCARCH))
# All warnings are inhibited since GCC builds are very experimental,
@@ -359,9 +361,23 @@ rust-analyzer:
$(Q)$(srctree)/scripts/generate_rust_analyzer.py $(srctree) $(objtree) \
$(RUST_LIB_SRC) > $(objtree)/rust-project.json
+redirect-intrinsics = \
+ __eqsf2 __gesf2 __lesf2 __nesf2 __unordsf2 \
+ __unorddf2 \
+ __muloti4 __multi3 \
+ __udivmodti4 __udivti3 __umodti3
+
+ifneq ($(or $(CONFIG_ARM64),$(and $(CONFIG_RISCV),$(CONFIG_64BIT))),)
+ # These intrinsics are defined for ARM64 and RISCV64
+ redirect-intrinsics += \
+ __ashrti3 \
+ __ashlti3 __lshrti3
+endif
+
$(obj)/core.o: private skip_clippy = 1
$(obj)/core.o: private skip_flags = -Dunreachable_pub
-$(obj)/core.o: private rustc_target_flags = $(core-cfgs)
+$(obj)/core.o: private rustc_objcopy = $(foreach sym,$(redirect-intrinsics),--redefine-sym $(sym)=__rust$(sym))
+$(obj)/core.o: private rustc_target_flags = $(core-cfgs) -Aunused-imports
$(obj)/core.o: $(RUST_LIB_SRC)/core/src/lib.rs $(obj)/target.json FORCE
$(call if_changed_dep,rustc_library)
diff --git a/rust/alloc/alloc.rs b/rust/alloc/alloc.rs
index ca224a541770..0142178370e9 100644
--- a/rust/alloc/alloc.rs
+++ b/rust/alloc/alloc.rs
@@ -27,16 +27,23 @@ extern "Rust" {
// (the code expanding that attribute macro generates those functions), or to call
// the default implementations in libstd (`__rdl_alloc` etc. in `library/std/src/alloc.rs`)
// otherwise.
- // The rustc fork of LLVM also special-cases these function names to be able to optimize them
+ // The rustc fork of LLVM 14 and earlier also special-cases these function names to be able to optimize them
// like `malloc`, `realloc`, and `free`, respectively.
#[rustc_allocator]
- #[rustc_allocator_nounwind]
+ #[cfg_attr(not(bootstrap), rustc_nounwind)]
+ #[cfg_attr(bootstrap, rustc_allocator_nounwind)]
fn __rust_alloc(size: usize, align: usize) -> *mut u8;
- #[rustc_allocator_nounwind]
+ #[rustc_deallocator]
+ #[cfg_attr(not(bootstrap), rustc_nounwind)]
+ #[cfg_attr(bootstrap, rustc_allocator_nounwind)]
fn __rust_dealloc(ptr: *mut u8, size: usize, align: usize);
- #[rustc_allocator_nounwind]
+ #[rustc_reallocator]
+ #[cfg_attr(not(bootstrap), rustc_nounwind)]
+ #[cfg_attr(bootstrap, rustc_allocator_nounwind)]
fn __rust_realloc(ptr: *mut u8, old_size: usize, align: usize, new_size: usize) -> *mut u8;
- #[rustc_allocator_nounwind]
+ #[rustc_allocator_zeroed]
+ #[cfg_attr(not(bootstrap), rustc_nounwind)]
+ #[cfg_attr(bootstrap, rustc_allocator_nounwind)]
fn __rust_alloc_zeroed(size: usize, align: usize) -> *mut u8;
}
@@ -72,11 +79,14 @@ pub use std::alloc::Global;
/// # Examples
///
/// ```
-/// use std::alloc::{alloc, dealloc, Layout};
+/// use std::alloc::{alloc, dealloc, handle_alloc_error, Layout};
///
/// unsafe {
/// let layout = Layout::new::<u16>();
/// let ptr = alloc(layout);
+/// if ptr.is_null() {
+/// handle_alloc_error(layout);
+/// }
///
/// *(ptr as *mut u16) = 42;
/// assert_eq!(*(ptr as *mut u16), 42);
@@ -400,13 +410,13 @@ pub mod __alloc_error_handler {
// if there is no `#[alloc_error_handler]`
#[rustc_std_internal_symbol]
- pub unsafe extern "C-unwind" fn __rdl_oom(size: usize, _align: usize) -> ! {
+ pub unsafe fn __rdl_oom(size: usize, _align: usize) -> ! {
panic!("memory allocation of {size} bytes failed")
}
// if there is an `#[alloc_error_handler]`
#[rustc_std_internal_symbol]
- pub unsafe extern "C-unwind" fn __rg_oom(size: usize, align: usize) -> ! {
+ pub unsafe fn __rg_oom(size: usize, align: usize) -> ! {
let layout = unsafe { Layout::from_size_align_unchecked(size, align) };
extern "Rust" {
#[lang = "oom"]
diff --git a/rust/alloc/borrow.rs b/rust/alloc/borrow.rs
deleted file mode 100644
index dde4957200d4..000000000000
--- a/rust/alloc/borrow.rs
+++ /dev/null
@@ -1,498 +0,0 @@
-// SPDX-License-Identifier: Apache-2.0 OR MIT
-
-//! A module for working with borrowed data.
-
-#![stable(feature = "rust1", since = "1.0.0")]
-
-use core::cmp::Ordering;
-use core::hash::{Hash, Hasher};
-use core::ops::Deref;
-#[cfg(not(no_global_oom_handling))]
-use core::ops::{Add, AddAssign};
-
-#[stable(feature = "rust1", since = "1.0.0")]
-pub use core::borrow::{Borrow, BorrowMut};
-
-use core::fmt;
-#[cfg(not(no_global_oom_handling))]
-use crate::string::String;
-
-use Cow::*;
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<'a, B: ?Sized> Borrow<B> for Cow<'a, B>
-where
- B: ToOwned,
- <B as ToOwned>::Owned: 'a,
-{
- fn borrow(&self) -> &B {
- &**self
- }
-}
-
-/// A generalization of `Clone` to borrowed data.
-///
-/// Some types make it possible to go from borrowed to owned, usually by
-/// implementing the `Clone` trait. But `Clone` works only for going from `&T`
-/// to `T`. The `ToOwned` trait generalizes `Clone` to construct owned data
-/// from any borrow of a given type.
-#[cfg_attr(not(test), rustc_diagnostic_item = "ToOwned")]
-#[stable(feature = "rust1", since = "1.0.0")]
-pub trait ToOwned {
- /// The resulting type after obtaining ownership.
- #[stable(feature = "rust1", since = "1.0.0")]
- type Owned: Borrow<Self>;
-
- /// Creates owned data from borrowed data, usually by cloning.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// let s: &str = "a";
- /// let ss: String = s.to_owned();
- ///
- /// let v: &[i32] = &[1, 2];
- /// let vv: Vec<i32> = v.to_owned();
- /// ```
- #[stable(feature = "rust1", since = "1.0.0")]
- #[must_use = "cloning is often expensive and is not expected to have side effects"]
- fn to_owned(&self) -> Self::Owned;
-
- /// Uses borrowed data to replace owned data, usually by cloning.
- ///
- /// This is borrow-generalized version of `Clone::clone_from`.
- ///
- /// # Examples
- ///
- /// Basic usage:
- ///
- /// ```
- /// # #![feature(toowned_clone_into)]
- /// let mut s: String = String::new();
- /// "hello".clone_into(&mut s);
- ///
- /// let mut v: Vec<i32> = Vec::new();
- /// [1, 2][..].clone_into(&mut v);
- /// ```
- #[unstable(feature = "toowned_clone_into", reason = "recently added", issue = "41263")]
- fn clone_into(&self, target: &mut Self::Owned) {
- *target = self.to_owned();
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<T> ToOwned for T
-where
- T: Clone,
-{
- type Owned = T;
- fn to_owned(&self) -> T {
- self.clone()
- }
-
- fn clone_into(&self, target: &mut T) {
- target.clone_from(self);
- }
-}
-
-/// A clone-on-write smart pointer.
-///
-/// The type `Cow` is a smart pointer providing clone-on-write functionality: it
-/// can enclose and provide immutable access to borrowed data, and clone the
-/// data lazily when mutation or ownership is required. The type is designed to
-/// work with general borrowed data via the `Borrow` trait.
-///
-/// `Cow` implements `Deref`, which means that you can call
-/// non-mutating methods directly on the data it encloses. If mutation
-/// is desired, `to_mut` will obtain a mutable reference to an owned
-/// value, cloning if necessary.
-///
-/// If you need reference-counting pointers, note that
-/// [`Rc::make_mut`][crate::rc::Rc::make_mut] and
-/// [`Arc::make_mut`][crate::sync::Arc::make_mut] can provide clone-on-write
-/// functionality as well.
-///
-/// # Examples
-///
-/// ```
-/// use std::borrow::Cow;
-///
-/// fn abs_all(input: &mut Cow<[i32]>) {
-/// for i in 0..input.len() {
-/// let v = input[i];
-/// if v < 0 {
-/// // Clones into a vector if not already owned.
-/// input.to_mut()[i] = -v;
-/// }
-/// }
-/// }
-///
-/// // No clone occurs because `input` doesn't need to be mutated.
-/// let slice = [0, 1, 2];
-/// let mut input = Cow::from(&slice[..]);
-/// abs_all(&mut input);
-///
-/// // Clone occurs because `input` needs to be mutated.
-/// let slice = [-1, 0, 1];
-/// let mut input = Cow::from(&slice[..]);
-/// abs_all(&mut input);
-///
-/// // No clone occurs because `input` is already owned.
-/// let mut input = Cow::from(vec![-1, 0, 1]);
-/// abs_all(&mut input);
-/// ```
-///
-/// Another example showing how to keep `Cow` in a struct:
-///
-/// ```
-/// use std::borrow::Cow;
-///
-/// struct Items<'a, X: 'a> where [X]: ToOwned<Owned = Vec<X>> {
-/// values: Cow<'a, [X]>,
-/// }
-///
-/// impl<'a, X: Clone + 'a> Items<'a, X> where [X]: ToOwned<Owned = Vec<X>> {
-/// fn new(v: Cow<'a, [X]>) -> Self {
-/// Items { values: v }
-/// }
-/// }
-///
-/// // Creates a container from borrowed values of a slice
-/// let readonly = [1, 2];
-/// let borrowed = Items::new((&readonly[..]).into());
-/// match borrowed {
-/// Items { values: Cow::Borrowed(b) } => println!("borrowed {b:?}"),
-/// _ => panic!("expect borrowed value"),
-/// }
-///
-/// let mut clone_on_write = borrowed;
-/// // Mutates the data from slice into owned vec and pushes a new value on top
-/// clone_on_write.values.to_mut().push(3);
-/// println!("clone_on_write = {:?}", clone_on_write.values);
-///
-/// // The data was mutated. Let's check it out.
-/// match clone_on_write {
-/// Items { values: Cow::Owned(_) } => println!("clone_on_write contains owned data"),
-/// _ => panic!("expect owned data"),
-/// }
-/// ```
-#[stable(feature = "rust1", since = "1.0.0")]
-#[cfg_attr(not(test), rustc_diagnostic_item = "Cow")]
-pub enum Cow<'a, B: ?Sized + 'a>
-where
- B: ToOwned,
-{
- /// Borrowed data.
- #[stable(feature = "rust1", since = "1.0.0")]
- Borrowed(#[stable(feature = "rust1", since = "1.0.0")] &'a B),
-
- /// Owned data.
- #[stable(feature = "rust1", since = "1.0.0")]
- Owned(#[stable(feature = "rust1", since = "1.0.0")] <B as ToOwned>::Owned),
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<B: ?Sized + ToOwned> Clone for Cow<'_, B> {
- fn clone(&self) -> Self {
- match *self {
- Borrowed(b) => Borrowed(b),
- Owned(ref o) => {
- let b: &B = o.borrow();
- Owned(b.to_owned())
- }
- }
- }
-
- fn clone_from(&mut self, source: &Self) {
- match (self, source) {
- (&mut Owned(ref mut dest), &Owned(ref o)) => o.borrow().clone_into(dest),
- (t, s) => *t = s.clone(),
- }
- }
-}
-
-impl<B: ?Sized + ToOwned> Cow<'_, B> {
- /// Returns true if the data is borrowed, i.e. if `to_mut` would require additional work.
- ///
- /// # Examples
- ///
- /// ```
- /// #![feature(cow_is_borrowed)]
- /// use std::borrow::Cow;
- ///
- /// let cow = Cow::Borrowed("moo");
- /// assert!(cow.is_borrowed());
- ///
- /// let bull: Cow<'_, str> = Cow::Owned("...moo?".to_string());
- /// assert!(!bull.is_borrowed());
- /// ```
- #[unstable(feature = "cow_is_borrowed", issue = "65143")]
- #[rustc_const_unstable(feature = "const_cow_is_borrowed", issue = "65143")]
- pub const fn is_borrowed(&self) -> bool {
- match *self {
- Borrowed(_) => true,
- Owned(_) => false,
- }
- }
-
- /// Returns true if the data is owned, i.e. if `to_mut` would be a no-op.
- ///
- /// # Examples
- ///
- /// ```
- /// #![feature(cow_is_borrowed)]
- /// use std::borrow::Cow;
- ///
- /// let cow: Cow<'_, str> = Cow::Owned("moo".to_string());
- /// assert!(cow.is_owned());
- ///
- /// let bull = Cow::Borrowed("...moo?");
- /// assert!(!bull.is_owned());
- /// ```
- #[unstable(feature = "cow_is_borrowed", issue = "65143")]
- #[rustc_const_unstable(feature = "const_cow_is_borrowed", issue = "65143")]
- pub const fn is_owned(&self) -> bool {
- !self.is_borrowed()
- }
-
- /// Acquires a mutable reference to the owned form of the data.
- ///
- /// Clones the data if it is not already owned.
- ///
- /// # Examples
- ///
- /// ```
- /// use std::borrow::Cow;
- ///
- /// let mut cow = Cow::Borrowed("foo");
- /// cow.to_mut().make_ascii_uppercase();
- ///
- /// assert_eq!(
- /// cow,
- /// Cow::Owned(String::from("FOO")) as Cow<str>
- /// );
- /// ```
- #[stable(feature = "rust1", since = "1.0.0")]
- pub fn to_mut(&mut self) -> &mut <B as ToOwned>::Owned {
- match *self {
- Borrowed(borrowed) => {
- *self = Owned(borrowed.to_owned());
- match *self {
- Borrowed(..) => unreachable!(),
- Owned(ref mut owned) => owned,
- }
- }
- Owned(ref mut owned) => owned,
- }
- }
-
- /// Extracts the owned data.
- ///
- /// Clones the data if it is not already owned.
- ///
- /// # Examples
- ///
- /// Calling `into_owned` on a `Cow::Borrowed` returns a clone of the borrowed data:
- ///
- /// ```
- /// use std::borrow::Cow;
- ///
- /// let s = "Hello world!";
- /// let cow = Cow::Borrowed(s);
- ///
- /// assert_eq!(
- /// cow.into_owned(),
- /// String::from(s)
- /// );
- /// ```
- ///
- /// Calling `into_owned` on a `Cow::Owned` returns the owned data. The data is moved out of the
- /// `Cow` without being cloned.
- ///
- /// ```
- /// use std::borrow::Cow;
- ///
- /// let s = "Hello world!";
- /// let cow: Cow<str> = Cow::Owned(String::from(s));
- ///
- /// assert_eq!(
- /// cow.into_owned(),
- /// String::from(s)
- /// );
- /// ```
- #[stable(feature = "rust1", since = "1.0.0")]
- pub fn into_owned(self) -> <B as ToOwned>::Owned {
- match self {
- Borrowed(borrowed) => borrowed.to_owned(),
- Owned(owned) => owned,
- }
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-#[rustc_const_unstable(feature = "const_deref", issue = "88955")]
-impl<B: ?Sized + ToOwned> const Deref for Cow<'_, B>
-where
- B::Owned: ~const Borrow<B>,
-{
- type Target = B;
-
- fn deref(&self) -> &B {
- match *self {
- Borrowed(borrowed) => borrowed,
- Owned(ref owned) => owned.borrow(),
- }
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<B: ?Sized> Eq for Cow<'_, B> where B: Eq + ToOwned {}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<B: ?Sized> Ord for Cow<'_, B>
-where
- B: Ord + ToOwned,
-{
- #[inline]
- fn cmp(&self, other: &Self) -> Ordering {
- Ord::cmp(&**self, &**other)
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<'a, 'b, B: ?Sized, C: ?Sized> PartialEq<Cow<'b, C>> for Cow<'a, B>
-where
- B: PartialEq<C> + ToOwned,
- C: ToOwned,
-{
- #[inline]
- fn eq(&self, other: &Cow<'b, C>) -> bool {
- PartialEq::eq(&**self, &**other)
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<'a, B: ?Sized> PartialOrd for Cow<'a, B>
-where
- B: PartialOrd + ToOwned,
-{
- #[inline]
- fn partial_cmp(&self, other: &Cow<'a, B>) -> Option<Ordering> {
- PartialOrd::partial_cmp(&**self, &**other)
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<B: ?Sized> fmt::Debug for Cow<'_, B>
-where
- B: fmt::Debug + ToOwned<Owned: fmt::Debug>,
-{
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- match *self {
- Borrowed(ref b) => fmt::Debug::fmt(b, f),
- Owned(ref o) => fmt::Debug::fmt(o, f),
- }
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<B: ?Sized> fmt::Display for Cow<'_, B>
-where
- B: fmt::Display + ToOwned<Owned: fmt::Display>,
-{
- fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
- match *self {
- Borrowed(ref b) => fmt::Display::fmt(b, f),
- Owned(ref o) => fmt::Display::fmt(o, f),
- }
- }
-}
-
-#[stable(feature = "default", since = "1.11.0")]
-impl<B: ?Sized> Default for Cow<'_, B>
-where
- B: ToOwned<Owned: Default>,
-{
- /// Creates an owned Cow<'a, B> with the default value for the contained owned value.
- fn default() -> Self {
- Owned(<B as ToOwned>::Owned::default())
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<B: ?Sized> Hash for Cow<'_, B>
-where
- B: Hash + ToOwned,
-{
- #[inline]
- fn hash<H: Hasher>(&self, state: &mut H) {
- Hash::hash(&**self, state)
- }
-}
-
-#[stable(feature = "rust1", since = "1.0.0")]
-impl<T: ?Sized + ToOwned> AsRef<T> for Cow<'_, T> {
- fn as_ref(&self) -> &T {
- self
- }
-}
-
-#[cfg(not(no_global_oom_handling))]
-#[stable(feature = "cow_add", since = "1.14.0")]
-impl<'a> Add<&'a str> for Cow<'a, str> {
- type Output = Cow<'a, str>;
-
- #[inline]
- fn add(mut self, rhs: &'a str) -> Self::Output {
- self += rhs;
- self
- }
-}
-
-#[cfg(not(no_global_oom_handling))]
-#[stable(feature = "cow_add", since = "1.14.0")]
-impl<'a> Add<Cow<'a, str>> for Cow<'a, str> {
- type Output = Cow<'a, str>;
-
- #[inline]
- fn add(mut self, rhs: Cow<'a, str>) -> Self::Output {
- self += rhs;
- self
- }
-}
-
-#[cfg(not(no_global_oom_handling))]
-#[stable(feature = "cow_add", since = "1.14.0")]
-impl<'a> AddAssign<&'a str> for Cow<'a, str> {
- fn add_assign(&mut self, rhs: &'a str) {
- if self.is_empty() {
- *self = Cow::Borrowed(rhs)
- } else if !rhs.is_empty() {
- if let Cow::Borrowed(lhs) = *self {
- let mut s = String::with_capacity(lhs.len() + rhs.len());
- s.push_str(lhs);
- *self = Cow::Owned(s);
- }
- self.to_mut().push_str(rhs);
- }
- }
-}
-
-#[cfg(not(no_global_oom_handling))]
-#[stable(feature = "cow_add", since = "1.14.0")]
-impl<'a> AddAssign<Cow<'a, str>> for Cow<'a, str> {
- fn add_assign(&mut self, rhs: Cow<'a, str>) {
- if self.is_empty() {
- *self = rhs
- } else if !rhs.is_empty() {
- if let Cow::Borrowed(lhs) = *self {
- let mut s = String::with_capacity(lhs.len() + rhs.len());
- s.push_str(lhs);
- *self = Cow::Owned(s);
- }
- self.to_mut().push_str(&rhs);
- }
- }
-}
diff --git a/rust/alloc/boxed.rs b/rust/alloc/boxed.rs
index 5e7518bffbfb..d4a03edd7d89 100644
--- a/rust/alloc/boxed.rs
+++ b/rust/alloc/boxed.rs
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
-//! A pointer type for heap allocation.
+//! The `Box<T>` type for heap allocation.
//!
//! [`Box<T>`], casually referred to as a 'box', provides the simplest form of
//! heap allocation in Rust. Boxes provide ownership for this allocation, and
@@ -124,7 +124,21 @@
//! definition is just using `T*` can lead to undefined behavior, as
//! described in [rust-lang/unsafe-code-guidelines#198][ucg#198].
//!
+//! # Considerations for unsafe code
+//!
+//! **Warning: This section is not normative and is subject to change, possibly
+//! being relaxed in the future! It is a simplified summary of the rules
+//! currently implemented in the compiler.**
+//!
+//! The aliasing rules for `Box<T>` are the same as for `&mut T`. `Box<T>`
+//! asserts uniqueness over its content. Using raw pointers derived from a box
+//! after that box has been mutated through, moved or borrowed as `&mut T`
+//! is not allowed. For more guidance on working with box from unsafe code, see
+//! [rust-lang/unsafe-code-guidelines#326][ucg#326].
+//!
+//!
//! [ucg#198]: https://github.com/rust-lang/unsafe-code-guidelines/issues/198
+//! [ucg#326]: https://github.com/rust-lang/unsafe-code-guidelines/issues/326
//! [dereferencing]: core::ops::Deref
//! [`Box::<T>::from_raw(value)`]: Box::from_raw
//! [`Global`]: crate::alloc::Global
@@ -139,6 +153,7 @@ use core::async_iter::AsyncIterator;
use core::borrow;
use core::cmp::Ordering;
use core::convert::{From, TryFrom};
+use core::error::Error;
use core::fmt;
use core::future::Future;
use core::hash::{Hash, Hasher};
@@ -163,6 +178,8 @@ use crate::raw_vec::RawVec;
#[cfg(not(no_global_oom_handling))]
use crate::str::from_boxed_utf8_unchecked;
#[cfg(not(no_global_oom_handling))]
+use crate::string::String;
+#[cfg(not(no_global_oom_handling))]
use crate::vec::Vec;
#[cfg(not(no_thin))]
@@ -196,12 +213,13 @@ impl<T> Box<T> {
/// ```
/// let five = Box::new(5);
/// ```
- #[cfg(not(no_global_oom_handling))]
+ #[cfg(all(not(no_global_oom_handling)))]
#[inline(always)]
#[stable(feature = "rust1", since = "1.0.0")]
#[must_use]
pub fn new(x: T) -> Self {
- box x
+ #[rustc_box]
+ Box::new(x)
}
/// Constructs a new box with uninitialized contents.
@@ -256,14 +274,21 @@ impl<T> Box<T> {
Self::new_zeroed_in(Global)
}
- /// Constructs a new `Pin<Box<T>>`. If `T` does not implement `Unpin`, then
+ /// Constructs a new `Pin<Box<T>>`. If `T` does not implement [`Unpin`], then
/// `x` will be pinned in memory and unable to be moved.
+ ///
+ /// Constructing and pinning of the `Box` can also be done in two steps: `Box::pin(x)`
+ /// does the same as <code>[Box::into_pin]\([Box::new]\(x))</code>. Consider using
+ /// [`into_pin`](Box::into_pin) if you already have a `Box<T>`, or if you want to
+ /// construct a (pinned) `Box` in a different way than with [`Box::new`].
#[cfg(not(no_global_oom_handling))]
#[stable(feature = "pin", since = "1.33.0")]
#[must_use]
#[inline(always)]
pub fn pin(x: T) -> Pin<Box<T>> {
- (box x).into()
+ (#[rustc_box]
+ Box::new(x))
+ .into()
}
/// Allocates memory on the heap then places `x` into it,
@@ -543,8 +568,13 @@ impl<T, A: Allocator> Box<T, A> {
unsafe { Ok(Box::from_raw_in(ptr.as_ptr(), alloc)) }
}
- /// Constructs a new `Pin<Box<T, A>>`. If `T` does not implement `Unpin`, then
+ /// Constructs a new `Pin<Box<T, A>>`. If `T` does not implement [`Unpin`], then
/// `x` will be pinned in memory and unable to be moved.
+ ///
+ /// Constructing and pinning of the `Box` can also be done in two steps: `Box::pin_in(x, alloc)`
+ /// does the same as <code>[Box::into_pin]\([Box::new_in]\(x, alloc))</code>. Consider using
+ /// [`into_pin`](Box::into_pin) if you already have a `Box<T, A>`, or if you want to
+ /// construct a (pinned) `Box` in a different way than with [`Box::new_in`].
#[cfg(not(no_global_oom_handling))]
#[unstable(feature = "allocator_api", issue = "32838")]
#[rustc_const_unstable(feature = "const_box", issue = "92521")]
@@ -926,6 +956,7 @@ impl<T: ?Sized> Box<T> {
/// [`Layout`]: crate::Layout
#[stable(feature = "box_raw", since = "1.4.0")]
#[inline]
+ #[must_use = "call `drop(from_raw(ptr))` if you intend to drop the `Box`"]
pub unsafe fn from_raw(raw: *mut T) -> Self {
unsafe { Self::from_raw_in(raw, Global) }
}
@@ -1160,19 +1191,44 @@ impl<T: ?Sized, A: Allocator> Box<T, A> {
unsafe { &mut *mem::ManuallyDrop::new(b).0.as_ptr() }
}
- /// Converts a `Box<T>` into a `Pin<Box<T>>`
+ /// Converts a `Box<T>` into a `Pin<Box<T>>`. If `T` does not implement [`Unpin`], then
+ /// `*boxed` will be pinned in memory and unable to be moved.
///
/// This conversion does not allocate on the heap and happens in place.
///
/// This is also available via [`From`].
- #[unstable(feature = "box_into_pin", issue = "62370")]
+ ///
+ /// Constructing and pinning a `Box` with <code>Box::into_pin([Box::new]\(x))</code>
+ /// can also be written more concisely using <code>[Box::pin]\(x)</code>.
+ /// This `into_pin` method is useful if you already have a `Box<T>`, or you are
+ /// constructing a (pinned) `Box` in a different way than with [`Box::new`].
+ ///
+ /// # Notes
+ ///
+ /// It's not recommended that crates add an impl like `From<Box<T>> for Pin<T>`,
+ /// as it'll introduce an ambiguity when calling `Pin::from`.
+ /// A demonstration of such a poor impl is shown below.
+ ///
+ /// ```compile_fail
+ /// # use std::pin::Pin;
+ /// struct Foo; // A type defined in this crate.
+ /// impl From<Box<()>> for Pin<Foo> {
+ /// fn from(_: Box<()>) -> Pin<Foo> {
+ /// Pin::new(Foo)
+ /// }
+ /// }
+ ///
+ /// let foo = Box::new(());
+ /// let bar = Pin::from(foo);
+ /// ```
+ #[stable(feature = "box_into_pin", since = "1.63.0")]
#[rustc_const_unstable(feature = "const_box", issue = "92521")]
pub const fn into_pin(boxed: Self) -> Pin<Self>
where
A: 'static,
{
// It's not possible to move or replace the insides of a `Pin<Box<T>>`
- // when `T: !Unpin`, so it's safe to pin it directly without any
+ // when `T: !Unpin`, so it's safe to pin it directly without any
// additional requirements.
unsafe { Pin::new_unchecked(boxed) }
}
@@ -1190,7 +1246,8 @@ unsafe impl<#[may_dangle] T: ?Sized, A: Allocator> Drop for Box<T, A> {
impl<T: Default> Default for Box<T> {
/// Creates a `Box<T>`, with the `Default` value for T.
fn default() -> Self {
- box T::default()
+ #[rustc_box]
+ Box::new(T::default())
}
}
@@ -1408,9 +1465,17 @@ impl<T: ?Sized, A: Allocator> const From<Box<T, A>> for Pin<Box<T, A>>
where
A: 'static,
{
- /// Converts a `Box<T>` into a `Pin<Box<T>>`
+ /// Converts a `Box<T>` into a `Pin<Box<T>>`. If `T` does not implement [`Unpin`], then
+ /// `*boxed` will be pinned in memory and unable to be moved.
///
/// This conversion does not allocate on the heap and happens in place.
+ ///
+ /// This is also available via [`Box::into_pin`].
+ ///
+ /// Constructing and pinning a `Box` with <code><Pin<Box\<T>>>::from([Box::new]\(x))</code>
+ /// can also be written more concisely using <code>[Box::pin]\(x)</code>.
+ /// This `From` implementation is useful if you already have a `Box<T>`, or you are
+ /// constructing a (pinned) `Box` in a different way than with [`Box::new`].
fn from(boxed: Box<T, A>) -> Self {
Box::into_pin(boxed)
}
@@ -1422,7 +1487,7 @@ impl<T: Copy> From<&[T]> for Box<[T]> {
/// Converts a `&[T]` into a `Box<[T]>`
///
/// This conversion allocates on the heap
- /// and performs a copy of `slice`.
+ /// and performs a copy of `slice` and its contents.
///
/// # Examples
/// ```rust
@@ -1554,10 +1619,27 @@ impl<T, const N: usize> From<[T; N]> for Box<[T]> {
/// println!("{boxed:?}");
/// ```
fn from(array: [T; N]) -> Box<[T]> {
- box array
+ #[rustc_box]
+ Box::new(array)
}
}
+/// Casts a boxed slice to a boxed array.
+///
+/// # Safety
+///
+/// `boxed_slice.len()` must be exactly `N`.
+unsafe fn boxed_slice_as_array_unchecked<T, A: Allocator, const N: usize>(
+ boxed_slice: Box<[T], A>,
+) -> Box<[T; N], A> {
+ debug_assert_eq!(boxed_slice.len(), N);
+
+ let (ptr, alloc) = Box::into_raw_with_allocator(boxed_slice);
+ // SAFETY: Pointer and allocator came from an existing box,
+ // and our safety condition requires that the length is exactly `N`
+ unsafe { Box::from_raw_in(ptr as *mut [T; N], alloc) }
+}
+
#[stable(feature = "boxed_slice_try_from", since = "1.43.0")]
impl<T, const N: usize> TryFrom<Box<[T]>> for Box<[T; N]> {
type Error = Box<[T]>;
@@ -1573,13 +1655,46 @@ impl<T, const N: usize> TryFrom<Box<[T]>> for Box<[T; N]> {
/// `boxed_slice.len()` does not equal `N`.
fn try_from(boxed_slice: Box<[T]>) -> Result<Self, Self::Error> {
if boxed_slice.len() == N {
- Ok(unsafe { Box::from_raw(Box::into_raw(boxed_slice) as *mut [T; N]) })
+ Ok(unsafe { boxed_slice_as_array_unchecked(boxed_slice) })
} else {
Err(boxed_slice)
}
}
}
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "boxed_array_try_from_vec", since = "1.66.0")]
+impl<T, const N: usize> TryFrom<Vec<T>> for Box<[T; N]> {
+ type Error = Vec<T>;
+
+ /// Attempts to convert a `Vec<T>` into a `Box<[T; N]>`.
+ ///
+ /// Like [`Vec::into_boxed_slice`], this is in-place if `vec.capacity() == N`,
+ /// but will require a reallocation otherwise.
+ ///
+ /// # Errors
+ ///
+ /// Returns the original `Vec<T>` in the `Err` variant if
+ /// `boxed_slice.len()` does not equal `N`.
+ ///
+ /// # Examples
+ ///
+ /// This can be used with [`vec!`] to create an array on the heap:
+ ///
+ /// ```
+ /// let state: Box<[f32; 100]> = vec![1.0; 100].try_into().unwrap();
+ /// assert_eq!(state.len(), 100);
+ /// ```
+ fn try_from(vec: Vec<T>) -> Result<Self, Self::Error> {
+ if vec.len() == N {
+ let boxed_slice = vec.into_boxed_slice();
+ Ok(unsafe { boxed_slice_as_array_unchecked(boxed_slice) })
+ } else {
+ Err(vec)
+ }
+ }
+}
+
impl<A: Allocator> Box<dyn Any, A> {
/// Attempt to downcast the box to a concrete type.
///
@@ -1973,8 +2088,7 @@ impl<T: ?Sized, A: Allocator> AsMut<T> for Box<T, A> {
* could have a method to project a Pin<T> from it.
*/
#[stable(feature = "pin", since = "1.33.0")]
-#[rustc_const_unstable(feature = "const_box", issue = "92521")]
-impl<T: ?Sized, A: Allocator> const Unpin for Box<T, A> where A: 'static {}
+impl<T: ?Sized, A: Allocator> Unpin for Box<T, A> where A: 'static {}
#[unstable(feature = "generator_trait", issue = "43122")]
impl<G: ?Sized + Generator<R> + Unpin, R, A: Allocator> Generator<R> for Box<G, A>
@@ -2026,3 +2140,292 @@ impl<S: ?Sized + AsyncIterator + Unpin> AsyncIterator for Box<S> {
(**self).size_hint()
}
}
+
+impl dyn Error {
+ #[inline]
+ #[stable(feature = "error_downcast", since = "1.3.0")]
+ #[rustc_allow_incoherent_impl]
+ /// Attempts to downcast the box to a concrete type.
+ pub fn downcast<T: Error + 'static>(self: Box<Self>) -> Result<Box<T>, Box<dyn Error>> {
+ if self.is::<T>() {
+ unsafe {
+ let raw: *mut dyn Error = Box::into_raw(self);
+ Ok(Box::from_raw(raw as *mut T))
+ }
+ } else {
+ Err(self)
+ }
+ }
+}
+
+impl dyn Error + Send {
+ #[inline]
+ #[stable(feature = "error_downcast", since = "1.3.0")]
+ #[rustc_allow_incoherent_impl]
+ /// Attempts to downcast the box to a concrete type.
+ pub fn downcast<T: Error + 'static>(self: Box<Self>) -> Result<Box<T>, Box<dyn Error + Send>> {
+ let err: Box<dyn Error> = self;
+ <dyn Error>::downcast(err).map_err(|s| unsafe {
+ // Reapply the `Send` marker.
+ mem::transmute::<Box<dyn Error>, Box<dyn Error + Send>>(s)
+ })
+ }
+}
+
+impl dyn Error + Send + Sync {
+ #[inline]
+ #[stable(feature = "error_downcast", since = "1.3.0")]
+ #[rustc_allow_incoherent_impl]
+ /// Attempts to downcast the box to a concrete type.
+ pub fn downcast<T: Error + 'static>(self: Box<Self>) -> Result<Box<T>, Box<Self>> {
+ let err: Box<dyn Error> = self;
+ <dyn Error>::downcast(err).map_err(|s| unsafe {
+ // Reapply the `Send + Sync` marker.
+ mem::transmute::<Box<dyn Error>, Box<dyn Error + Send + Sync>>(s)
+ })
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "rust1", since = "1.0.0")]
+impl<'a, E: Error + 'a> From<E> for Box<dyn Error + 'a> {
+ /// Converts a type of [`Error`] into a box of dyn [`Error`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::fmt;
+ /// use std::mem;
+ ///
+ /// #[derive(Debug)]
+ /// struct AnError;
+ ///
+ /// impl fmt::Display for AnError {
+ /// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ /// write!(f, "An error")
+ /// }
+ /// }
+ ///
+ /// impl Error for AnError {}
+ ///
+ /// let an_error = AnError;
+ /// assert!(0 == mem::size_of_val(&an_error));
+ /// let a_boxed_error = Box::<dyn Error>::from(an_error);
+ /// assert!(mem::size_of::<Box<dyn Error>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ fn from(err: E) -> Box<dyn Error + 'a> {
+ Box::new(err)
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "rust1", since = "1.0.0")]
+impl<'a, E: Error + Send + Sync + 'a> From<E> for Box<dyn Error + Send + Sync + 'a> {
+ /// Converts a type of [`Error`] + [`Send`] + [`Sync`] into a box of
+ /// dyn [`Error`] + [`Send`] + [`Sync`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::fmt;
+ /// use std::mem;
+ ///
+ /// #[derive(Debug)]
+ /// struct AnError;
+ ///
+ /// impl fmt::Display for AnError {
+ /// fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ /// write!(f, "An error")
+ /// }
+ /// }
+ ///
+ /// impl Error for AnError {}
+ ///
+ /// unsafe impl Send for AnError {}
+ ///
+ /// unsafe impl Sync for AnError {}
+ ///
+ /// let an_error = AnError;
+ /// assert!(0 == mem::size_of_val(&an_error));
+ /// let a_boxed_error = Box::<dyn Error + Send + Sync>::from(an_error);
+ /// assert!(
+ /// mem::size_of::<Box<dyn Error + Send + Sync>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ fn from(err: E) -> Box<dyn Error + Send + Sync + 'a> {
+ Box::new(err)
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "rust1", since = "1.0.0")]
+impl From<String> for Box<dyn Error + Send + Sync> {
+ /// Converts a [`String`] into a box of dyn [`Error`] + [`Send`] + [`Sync`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::mem;
+ ///
+ /// let a_string_error = "a string error".to_string();
+ /// let a_boxed_error = Box::<dyn Error + Send + Sync>::from(a_string_error);
+ /// assert!(
+ /// mem::size_of::<Box<dyn Error + Send + Sync>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ #[inline]
+ fn from(err: String) -> Box<dyn Error + Send + Sync> {
+ struct StringError(String);
+
+ impl Error for StringError {
+ #[allow(deprecated)]
+ fn description(&self) -> &str {
+ &self.0
+ }
+ }
+
+ impl fmt::Display for StringError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(&self.0, f)
+ }
+ }
+
+ // Purposefully skip printing "StringError(..)"
+ impl fmt::Debug for StringError {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(&self.0, f)
+ }
+ }
+
+ Box::new(StringError(err))
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "string_box_error", since = "1.6.0")]
+impl From<String> for Box<dyn Error> {
+ /// Converts a [`String`] into a box of dyn [`Error`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::mem;
+ ///
+ /// let a_string_error = "a string error".to_string();
+ /// let a_boxed_error = Box::<dyn Error>::from(a_string_error);
+ /// assert!(mem::size_of::<Box<dyn Error>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ fn from(str_err: String) -> Box<dyn Error> {
+ let err1: Box<dyn Error + Send + Sync> = From::from(str_err);
+ let err2: Box<dyn Error> = err1;
+ err2
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "rust1", since = "1.0.0")]
+impl<'a> From<&str> for Box<dyn Error + Send + Sync + 'a> {
+ /// Converts a [`str`] into a box of dyn [`Error`] + [`Send`] + [`Sync`].
+ ///
+ /// [`str`]: prim@str
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::mem;
+ ///
+ /// let a_str_error = "a str error";
+ /// let a_boxed_error = Box::<dyn Error + Send + Sync>::from(a_str_error);
+ /// assert!(
+ /// mem::size_of::<Box<dyn Error + Send + Sync>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ #[inline]
+ fn from(err: &str) -> Box<dyn Error + Send + Sync + 'a> {
+ From::from(String::from(err))
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "string_box_error", since = "1.6.0")]
+impl From<&str> for Box<dyn Error> {
+ /// Converts a [`str`] into a box of dyn [`Error`].
+ ///
+ /// [`str`]: prim@str
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::mem;
+ ///
+ /// let a_str_error = "a str error";
+ /// let a_boxed_error = Box::<dyn Error>::from(a_str_error);
+ /// assert!(mem::size_of::<Box<dyn Error>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ fn from(err: &str) -> Box<dyn Error> {
+ From::from(String::from(err))
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "cow_box_error", since = "1.22.0")]
+impl<'a, 'b> From<Cow<'b, str>> for Box<dyn Error + Send + Sync + 'a> {
+ /// Converts a [`Cow`] into a box of dyn [`Error`] + [`Send`] + [`Sync`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::mem;
+ /// use std::borrow::Cow;
+ ///
+ /// let a_cow_str_error = Cow::from("a str error");
+ /// let a_boxed_error = Box::<dyn Error + Send + Sync>::from(a_cow_str_error);
+ /// assert!(
+ /// mem::size_of::<Box<dyn Error + Send + Sync>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ fn from(err: Cow<'b, str>) -> Box<dyn Error + Send + Sync + 'a> {
+ From::from(String::from(err))
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+#[stable(feature = "cow_box_error", since = "1.22.0")]
+impl<'a> From<Cow<'a, str>> for Box<dyn Error> {
+ /// Converts a [`Cow`] into a box of dyn [`Error`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// use std::error::Error;
+ /// use std::mem;
+ /// use std::borrow::Cow;
+ ///
+ /// let a_cow_str_error = Cow::from("a str error");
+ /// let a_boxed_error = Box::<dyn Error>::from(a_cow_str_error);
+ /// assert!(mem::size_of::<Box<dyn Error>>() == mem::size_of_val(&a_boxed_error))
+ /// ```
+ fn from(err: Cow<'a, str>) -> Box<dyn Error> {
+ From::from(String::from(err))
+ }
+}
+
+#[stable(feature = "box_error", since = "1.8.0")]
+impl<T: core::error::Error> core::error::Error for Box<T> {
+ #[allow(deprecated, deprecated_in_future)]
+ fn description(&self) -> &str {
+ core::error::Error::description(&**self)
+ }
+
+ #[allow(deprecated)]
+ fn cause(&self) -> Option<&dyn core::error::Error> {
+ core::error::Error::cause(&**self)
+ }
+
+ fn source(&self) -> Option<&(dyn core::error::Error + 'static)> {
+ core::error::Error::source(&**self)
+ }
+}
diff --git a/rust/alloc/collections/mod.rs b/rust/alloc/collections/mod.rs
index 1eec265b28f8..da6154412416 100644
--- a/rust/alloc/collections/mod.rs
+++ b/rust/alloc/collections/mod.rs
@@ -154,3 +154,6 @@ trait SpecExtend<I: IntoIterator> {
/// Extends `self` with the contents of the given iterator.
fn spec_extend(&mut self, iter: I);
}
+
+#[stable(feature = "try_reserve", since = "1.57.0")]
+impl core::error::Error for TryReserveError {}
diff --git a/rust/alloc/lib.rs b/rust/alloc/lib.rs
index b2a13a53f19a..cc3850c3b519 100644
--- a/rust/alloc/lib.rs
+++ b/rust/alloc/lib.rs
@@ -58,10 +58,6 @@
//! [`Rc`]: rc
//! [`RefCell`]: core::cell
-// To run liballoc tests without x.py without ending up with two copies of liballoc, Miri needs to be
-// able to "empty" this crate. See <https://github.com/rust-lang/miri-test-libstd/issues/4>.
-// rustc itself never sets the feature, so this line has no affect there.
-#![cfg(any(not(feature = "miri-test-libstd"), test, doctest))]
#![allow(unused_attributes)]
#![stable(feature = "alloc", since = "1.36.0")]
#![doc(
@@ -75,10 +71,16 @@
any(not(feature = "miri-test-libstd"), test, doctest),
no_global_oom_handling,
not(no_global_oom_handling),
+ not(no_rc),
+ not(no_sync),
target_has_atomic = "ptr"
))]
#![no_std]
#![needs_allocator]
+// To run liballoc tests without x.py without ending up with two copies of liballoc, Miri needs to be
+// able to "empty" this crate. See <https://github.com/rust-lang/miri-test-libstd/issues/4>.
+// rustc itself never sets the feature, so this line has no affect there.
+#![cfg(any(not(feature = "miri-test-libstd"), test, doctest))]
//
// Lints:
#![deny(unsafe_op_in_unsafe_fn)]
@@ -88,10 +90,10 @@
#![allow(explicit_outlives_requirements)]
//
// Library features:
-#![cfg_attr(not(no_global_oom_handling), feature(alloc_c_string))]
#![feature(alloc_layout_extra)]
#![feature(allocator_api)]
#![feature(array_chunks)]
+#![feature(array_into_iter_constructors)]
#![feature(array_methods)]
#![feature(array_windows)]
#![feature(assert_matches)]
@@ -99,8 +101,8 @@
#![feature(coerce_unsized)]
#![cfg_attr(not(no_global_oom_handling), feature(const_alloc_error))]
#![feature(const_box)]
-#![cfg_attr(not(no_global_oom_handling), feature(const_btree_new))]
-#![feature(const_cow_is_borrowed)]
+#![cfg_attr(not(no_global_oom_handling), feature(const_btree_len))]
+#![cfg_attr(not(no_borrow), feature(const_cow_is_borrowed))]
#![feature(const_convert)]
#![feature(const_size_of_val)]
#![feature(const_align_of_val)]
@@ -108,13 +110,14 @@
#![feature(const_maybe_uninit_write)]
#![feature(const_maybe_uninit_as_mut_ptr)]
#![feature(const_refs_to_cell)]
-#![feature(core_c_str)]
#![feature(core_intrinsics)]
-#![feature(core_ffi_c)]
#![feature(const_eval_select)]
#![feature(const_pin)]
+#![feature(const_waker)]
#![feature(cstr_from_bytes_until_nul)]
#![feature(dispatch_from_dyn)]
+#![feature(error_generic_member_access)]
+#![feature(error_in_core)]
#![feature(exact_size_is_empty)]
#![feature(extend_one)]
#![feature(fmt_internals)]
@@ -122,16 +125,24 @@
#![feature(hasher_prefixfree_extras)]
#![feature(inplace_iteration)]
#![feature(iter_advance_by)]
+#![feature(iter_next_chunk)]
#![feature(layout_for_ptr)]
#![feature(maybe_uninit_slice)]
+#![feature(maybe_uninit_uninit_array)]
+#![feature(maybe_uninit_uninit_array_transpose)]
#![cfg_attr(test, feature(new_uninit))]
#![feature(nonnull_slice_from_raw_parts)]
#![feature(pattern)]
+#![feature(pointer_byte_offsets)]
+#![feature(provide_any)]
#![feature(ptr_internals)]
#![feature(ptr_metadata)]
#![feature(ptr_sub_ptr)]
#![feature(receiver_trait)]
+#![feature(saturating_int_impl)]
#![feature(set_ptr_value)]
+#![feature(sized_type_properties)]
+#![feature(slice_from_ptr_range)]
#![feature(slice_group_by)]
#![feature(slice_ptr_get)]
#![feature(slice_ptr_len)]
@@ -145,12 +156,13 @@
#![feature(unchecked_math)]
#![feature(unicode_internals)]
#![feature(unsize)]
+#![feature(utf8_chunks)]
+#![feature(std_internals)]
//
// Language features:
#![feature(allocator_internals)]
#![feature(allow_internal_unstable)]
#![feature(associated_type_bounds)]
-#![feature(box_syntax)]
#![feature(cfg_sanitize)]
#![feature(const_deref)]
#![feature(const_mut_refs)]
@@ -164,19 +176,20 @@
#![cfg_attr(not(test), feature(generator_trait))]
#![feature(hashmap_internals)]
#![feature(lang_items)]
-#![feature(let_else)]
#![feature(min_specialization)]
#![feature(negative_impls)]
#![feature(never_type)]
-#![feature(nll)] // Not necessary, but here to test the `nll` feature.
#![feature(rustc_allow_const_fn_unstable)]
#![feature(rustc_attrs)]
+#![feature(pointer_is_aligned)]
#![feature(slice_internals)]
#![feature(staged_api)]
+#![feature(stmt_expr_attributes)]
#![cfg_attr(test, feature(test))]
#![feature(unboxed_closures)]
#![feature(unsized_fn_params)]
#![feature(c_unwind)]
+#![feature(with_negative_coherence)]
//
// Rustdoc features:
#![feature(doc_cfg)]
@@ -216,9 +229,10 @@ pub mod boxed;
mod boxed {
pub use std::boxed::Box;
}
+#[cfg(not(no_borrow))]
pub mod borrow;
pub mod collections;
-#[cfg(not(no_global_oom_handling))]
+#[cfg(all(not(no_rc), not(no_sync), not(no_global_oom_handling)))]
pub mod ffi;
#[cfg(not(no_fmt))]
pub mod fmt;
@@ -230,9 +244,9 @@ pub mod str;
#[cfg(not(no_string))]
pub mod string;
#[cfg(not(no_sync))]
-#[cfg(target_has_atomic = "ptr")]
+#[cfg(all(not(no_rc), target_has_atomic = "ptr"))]
pub mod sync;
-#[cfg(all(not(no_global_oom_handling), target_has_atomic = "ptr"))]
+#[cfg(all(not(no_global_oom_handling), not(no_rc), not(no_sync), target_has_atomic = "ptr"))]
pub mod task;
#[cfg(test)]
mod tests;
diff --git a/rust/alloc/raw_vec.rs b/rust/alloc/raw_vec.rs
index eb77db5def55..f02faff67a80 100644
--- a/rust/alloc/raw_vec.rs
+++ b/rust/alloc/raw_vec.rs
@@ -5,7 +5,7 @@
use core::alloc::LayoutError;
use core::cmp;
use core::intrinsics;
-use core::mem::{self, ManuallyDrop, MaybeUninit};
+use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties};
use core::ops::Drop;
use core::ptr::{self, NonNull, Unique};
use core::slice;
@@ -177,7 +177,7 @@ impl<T, A: Allocator> RawVec<T, A> {
#[cfg(not(no_global_oom_handling))]
fn allocate_in(capacity: usize, init: AllocInit, alloc: A) -> Self {
// Don't allocate here because `Drop` will not deallocate when `capacity` is 0.
- if mem::size_of::<T>() == 0 || capacity == 0 {
+ if T::IS_ZST || capacity == 0 {
Self::new_in(alloc)
} else {
// We avoid `unwrap_or_else` here because it bloats the amount of
@@ -212,7 +212,7 @@ impl<T, A: Allocator> RawVec<T, A> {
fn try_allocate_in(capacity: usize, init: AllocInit, alloc: A) -> Result<Self, TryReserveError> {
// Don't allocate here because `Drop` will not deallocate when `capacity` is 0.
- if mem::size_of::<T>() == 0 || capacity == 0 {
+ if T::IS_ZST || capacity == 0 {
return Ok(Self::new_in(alloc));
}
@@ -262,7 +262,7 @@ impl<T, A: Allocator> RawVec<T, A> {
/// This will always be `usize::MAX` if `T` is zero-sized.
#[inline(always)]
pub fn capacity(&self) -> usize {
- if mem::size_of::<T>() == 0 { usize::MAX } else { self.cap }
+ if T::IS_ZST { usize::MAX } else { self.cap }
}
/// Returns a shared reference to the allocator backing this `RawVec`.
@@ -271,7 +271,7 @@ impl<T, A: Allocator> RawVec<T, A> {
}
fn current_memory(&self) -> Option<(NonNull<u8>, Layout)> {
- if mem::size_of::<T>() == 0 || self.cap == 0 {
+ if T::IS_ZST || self.cap == 0 {
None
} else {
// We have an allocated chunk of memory, so we can bypass runtime
@@ -419,7 +419,7 @@ impl<T, A: Allocator> RawVec<T, A> {
// This is ensured by the calling contexts.
debug_assert!(additional > 0);
- if mem::size_of::<T>() == 0 {
+ if T::IS_ZST {
// Since we return a capacity of `usize::MAX` when `elem_size` is
// 0, getting to here necessarily means the `RawVec` is overfull.
return Err(CapacityOverflow.into());
@@ -445,7 +445,7 @@ impl<T, A: Allocator> RawVec<T, A> {
// `grow_amortized`, but this method is usually instantiated less often so
// it's less critical.
fn grow_exact(&mut self, len: usize, additional: usize) -> Result<(), TryReserveError> {
- if mem::size_of::<T>() == 0 {
+ if T::IS_ZST {
// Since we return a capacity of `usize::MAX` when the type size is
// 0, getting to here necessarily means the `RawVec` is overfull.
return Err(CapacityOverflow.into());
diff --git a/rust/alloc/slice.rs b/rust/alloc/slice.rs
index e444e97fa145..467935cf9a06 100644
--- a/rust/alloc/slice.rs
+++ b/rust/alloc/slice.rs
@@ -1,84 +1,14 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
-//! A dynamically-sized view into a contiguous sequence, `[T]`.
+//! Utilities for the slice primitive type.
//!
//! *[See also the slice primitive type](slice).*
//!
-//! Slices are a view into a block of memory represented as a pointer and a
-//! length.
+//! Most of the structs in this module are iterator types which can only be created
+//! using a certain function. For example, `slice.iter()` yields an [`Iter`].
//!
-//! ```
-//! // slicing a Vec
-//! let vec = vec![1, 2, 3];
-//! let int_slice = &vec[..];
-//! // coercing an array to a slice
-//! let str_slice: &[&str] = &["one", "two", "three"];
-//! ```
-//!
-//! Slices are either mutable or shared. The shared slice type is `&[T]`,
-//! while the mutable slice type is `&mut [T]`, where `T` represents the element
-//! type. For example, you can mutate the block of memory that a mutable slice
-//! points to:
-//!
-//! ```
-//! let x = &mut [1, 2, 3];
-//! x[1] = 7;
-//! assert_eq!(x, &[1, 7, 3]);
-//! ```
-//!
-//! Here are some of the things this module contains:
-//!
-//! ## Structs
-//!
-//! There are several structs that are useful for slices, such as [`Iter`], which
-//! represents iteration over a slice.
-//!
-//! ## Trait Implementations
-//!
-//! There are several implementations of common traits for slices. Some examples
-//! include:
-//!
-//! * [`Clone`]
-//! * [`Eq`], [`Ord`] - for slices whose element type are [`Eq`] or [`Ord`].
-//! * [`Hash`] - for slices whose element type is [`Hash`].
-//!
-//! ## Iteration
-//!
-//! The slices implement `IntoIterator`. The iterator yields references to the
-//! slice elements.
-//!
-//! ```
-//! let numbers = &[0, 1, 2];
-//! for n in numbers {
-//! println!("{n} is a number!");
-//! }
-//! ```
-//!
-//! The mutable slice yields mutable references to the elements:
-//!
-//! ```
-//! let mut scores = [7, 8, 9];
-//! for score in &mut scores[..] {
-//! *score += 1;
-//! }
-//! ```
-//!
-//! This iterator yields mutable references to the slice's elements, so while
-//! the element type of the slice is `i32`, the element type of the iterator is
-//! `&mut i32`.
-//!
-//! * [`.iter`] and [`.iter_mut`] are the explicit methods to return the default
-//! iterators.
-//! * Further methods that return iterators are [`.split`], [`.splitn`],
-//! [`.chunks`], [`.windows`] and more.
-//!
-//! [`Hash`]: core::hash::Hash
-//! [`.iter`]: slice::iter
-//! [`.iter_mut`]: slice::iter_mut
-//! [`.split`]: slice::split
-//! [`.splitn`]: slice::splitn
-//! [`.chunks`]: slice::chunks
-//! [`.windows`]: slice::windows
+//! A few functions are provided to create a slice from a value reference
+//! or from a raw pointer.
#![stable(feature = "rust1", since = "1.0.0")]
// Many of the usings in this module are only used in the test configuration.
// It's cleaner to just turn off the unused_imports warning than to fix them.
@@ -88,9 +18,7 @@ use core::borrow::{Borrow, BorrowMut};
#[cfg(not(no_global_oom_handling))]
use core::cmp::Ordering::{self, Less};
#[cfg(not(no_global_oom_handling))]
-use core::mem;
-#[cfg(not(no_global_oom_handling))]
-use core::mem::size_of;
+use core::mem::{self, SizedTypeProperties};
#[cfg(not(no_global_oom_handling))]
use core::ptr;
@@ -116,6 +44,8 @@ pub use core::slice::EscapeAscii;
pub use core::slice::SliceIndex;
#[stable(feature = "from_ref", since = "1.28.0")]
pub use core::slice::{from_mut, from_ref};
+#[unstable(feature = "slice_from_ptr_range", issue = "89792")]
+pub use core::slice::{from_mut_ptr_range, from_ptr_range};
#[stable(feature = "rust1", since = "1.0.0")]
pub use core::slice::{from_raw_parts, from_raw_parts_mut};
#[stable(feature = "rust1", since = "1.0.0")]
@@ -275,7 +205,7 @@ impl<T> [T] {
where
T: Ord,
{
- merge_sort(self, |a, b| a.lt(b));
+ merge_sort(self, T::lt);
}
/// Sorts the slice with a comparator function.
@@ -836,14 +766,14 @@ impl<T: Clone, V: Borrow<[T]>> Join<&[T]> for [V] {
////////////////////////////////////////////////////////////////////////////////
#[stable(feature = "rust1", since = "1.0.0")]
-impl<T> Borrow<[T]> for Vec<T> {
+impl<T, A: Allocator> Borrow<[T]> for Vec<T, A> {
fn borrow(&self) -> &[T] {
&self[..]
}
}
#[stable(feature = "rust1", since = "1.0.0")]
-impl<T> BorrowMut<[T]> for Vec<T> {
+impl<T, A: Allocator> BorrowMut<[T]> for Vec<T, A> {
fn borrow_mut(&mut self) -> &mut [T] {
&mut self[..]
}
@@ -1024,7 +954,7 @@ where
// Consume the greater side.
// If equal, prefer the right run to maintain stability.
unsafe {
- let to_copy = if is_less(&*right.offset(-1), &*left.offset(-1)) {
+ let to_copy = if is_less(&*right.sub(1), &*left.sub(1)) {
decrement_and_get(left)
} else {
decrement_and_get(right)
@@ -1038,12 +968,12 @@ where
unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
let old = *ptr;
- *ptr = unsafe { ptr.offset(1) };
+ *ptr = unsafe { ptr.add(1) };
old
}
unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
- *ptr = unsafe { ptr.offset(-1) };
+ *ptr = unsafe { ptr.sub(1) };
*ptr
}
@@ -1088,7 +1018,7 @@ where
const MIN_RUN: usize = 10;
// Sorting has no meaningful behavior on zero-sized types.
- if size_of::<T>() == 0 {
+ if T::IS_ZST {
return;
}
diff --git a/rust/alloc/vec/drain.rs b/rust/alloc/vec/drain.rs
index b6a5f98e4fcd..3594ad890c3d 100644
--- a/rust/alloc/vec/drain.rs
+++ b/rust/alloc/vec/drain.rs
@@ -3,7 +3,7 @@
use crate::alloc::{Allocator, Global};
use core::fmt;
use core::iter::{FusedIterator, TrustedLen};
-use core::mem;
+use core::mem::{self, ManuallyDrop, SizedTypeProperties};
use core::ptr::{self, NonNull};
use core::slice::{self};
@@ -67,6 +67,77 @@ impl<'a, T, A: Allocator> Drain<'a, T, A> {
pub fn allocator(&self) -> &A {
unsafe { self.vec.as_ref().allocator() }
}
+
+ /// Keep unyielded elements in the source `Vec`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(drain_keep_rest)]
+ ///
+ /// let mut vec = vec!['a', 'b', 'c'];
+ /// let mut drain = vec.drain(..);
+ ///
+ /// assert_eq!(drain.next().unwrap(), 'a');
+ ///
+ /// // This call keeps 'b' and 'c' in the vec.
+ /// drain.keep_rest();
+ ///
+ /// // If we wouldn't call `keep_rest()`,
+ /// // `vec` would be empty.
+ /// assert_eq!(vec, ['b', 'c']);
+ /// ```
+ #[unstable(feature = "drain_keep_rest", issue = "101122")]
+ pub fn keep_rest(self) {
+ // At this moment layout looks like this:
+ //
+ // [head] [yielded by next] [unyielded] [yielded by next_back] [tail]
+ // ^-- start \_________/-- unyielded_len \____/-- self.tail_len
+ // ^-- unyielded_ptr ^-- tail
+ //
+ // Normally `Drop` impl would drop [unyielded] and then move [tail] to the `start`.
+ // Here we want to
+ // 1. Move [unyielded] to `start`
+ // 2. Move [tail] to a new start at `start + len(unyielded)`
+ // 3. Update length of the original vec to `len(head) + len(unyielded) + len(tail)`
+ // a. In case of ZST, this is the only thing we want to do
+ // 4. Do *not* drop self, as everything is put in a consistent state already, there is nothing to do
+ let mut this = ManuallyDrop::new(self);
+
+ unsafe {
+ let source_vec = this.vec.as_mut();
+
+ let start = source_vec.len();
+ let tail = this.tail_start;
+
+ let unyielded_len = this.iter.len();
+ let unyielded_ptr = this.iter.as_slice().as_ptr();
+
+ // ZSTs have no identity, so we don't need to move them around.
+ let needs_move = mem::size_of::<T>() != 0;
+
+ if needs_move {
+ let start_ptr = source_vec.as_mut_ptr().add(start);
+
+ // memmove back unyielded elements
+ if unyielded_ptr != start_ptr {
+ let src = unyielded_ptr;
+ let dst = start_ptr;
+
+ ptr::copy(src, dst, unyielded_len);
+ }
+
+ // memmove back untouched tail
+ if tail != (start + unyielded_len) {
+ let src = source_vec.as_ptr().add(tail);
+ let dst = start_ptr.add(unyielded_len);
+ ptr::copy(src, dst, this.tail_len);
+ }
+ }
+
+ source_vec.set_len(start + unyielded_len + this.tail_len);
+ }
+ }
}
#[stable(feature = "vec_drain_as_slice", since = "1.46.0")]
@@ -133,7 +204,7 @@ impl<T, A: Allocator> Drop for Drain<'_, T, A> {
let mut vec = self.vec;
- if mem::size_of::<T>() == 0 {
+ if T::IS_ZST {
// ZSTs have no identity, so we don't need to move them around, we only need to drop the correct amount.
// this can be achieved by manipulating the Vec length instead of moving values out from `iter`.
unsafe {
diff --git a/rust/alloc/vec/drain_filter.rs b/rust/alloc/vec/drain_filter.rs
index b04fce041622..4b019220657d 100644
--- a/rust/alloc/vec/drain_filter.rs
+++ b/rust/alloc/vec/drain_filter.rs
@@ -1,8 +1,9 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
use crate::alloc::{Allocator, Global};
-use core::ptr::{self};
-use core::slice::{self};
+use core::mem::{self, ManuallyDrop};
+use core::ptr;
+use core::slice;
use super::Vec;
@@ -56,6 +57,61 @@ where
pub fn allocator(&self) -> &A {
self.vec.allocator()
}
+
+ /// Keep unyielded elements in the source `Vec`.
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// #![feature(drain_filter)]
+ /// #![feature(drain_keep_rest)]
+ ///
+ /// let mut vec = vec!['a', 'b', 'c'];
+ /// let mut drain = vec.drain_filter(|_| true);
+ ///
+ /// assert_eq!(drain.next().unwrap(), 'a');
+ ///
+ /// // This call keeps 'b' and 'c' in the vec.
+ /// drain.keep_rest();
+ ///
+ /// // If we wouldn't call `keep_rest()`,
+ /// // `vec` would be empty.
+ /// assert_eq!(vec, ['b', 'c']);
+ /// ```
+ #[unstable(feature = "drain_keep_rest", issue = "101122")]
+ pub fn keep_rest(self) {
+ // At this moment layout looks like this:
+ //
+ // _____________________/-- old_len
+ // / \
+ // [kept] [yielded] [tail]
+ // \_______/ ^-- idx
+ // \-- del
+ //
+ // Normally `Drop` impl would drop [tail] (via .for_each(drop), ie still calling `pred`)
+ //
+ // 1. Move [tail] after [kept]
+ // 2. Update length of the original vec to `old_len - del`
+ // a. In case of ZST, this is the only thing we want to do
+ // 3. Do *not* drop self, as everything is put in a consistent state already, there is nothing to do
+ let mut this = ManuallyDrop::new(self);
+
+ unsafe {
+ // ZSTs have no identity, so we don't need to move them around.
+ let needs_move = mem::size_of::<T>() != 0;
+
+ if needs_move && this.idx < this.old_len && this.del > 0 {
+ let ptr = this.vec.as_mut_ptr();
+ let src = ptr.add(this.idx);
+ let dst = src.sub(this.del);
+ let tail_len = this.old_len - this.idx;
+ src.copy_to(dst, tail_len);
+ }
+
+ let new_len = this.old_len - this.del;
+ this.vec.set_len(new_len);
+ }
+ }
}
#[unstable(feature = "drain_filter", reason = "recently added", issue = "43244")]
diff --git a/rust/alloc/vec/into_iter.rs b/rust/alloc/vec/into_iter.rs
index f7a50e76691e..a8a2a8b66bfd 100644
--- a/rust/alloc/vec/into_iter.rs
+++ b/rust/alloc/vec/into_iter.rs
@@ -4,13 +4,13 @@
use super::AsVecIntoIter;
use crate::alloc::{Allocator, Global};
use crate::raw_vec::RawVec;
+use core::array;
use core::fmt;
-use core::intrinsics::arith_offset;
use core::iter::{
FusedIterator, InPlaceIterable, SourceIter, TrustedLen, TrustedRandomAccessNoCoerce,
};
use core::marker::PhantomData;
-use core::mem::{self, ManuallyDrop};
+use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties};
#[cfg(not(no_global_oom_handling))]
use core::ops::Deref;
use core::ptr::{self, NonNull};
@@ -97,13 +97,16 @@ impl<T, A: Allocator> IntoIter<T, A> {
}
/// Drops remaining elements and relinquishes the backing allocation.
+ /// This method guarantees it won't panic before relinquishing
+ /// the backing allocation.
///
/// This is roughly equivalent to the following, but more efficient
///
/// ```
/// # let mut into_iter = Vec::<u8>::with_capacity(10).into_iter();
+ /// let mut into_iter = std::mem::replace(&mut into_iter, Vec::new().into_iter());
/// (&mut into_iter).for_each(core::mem::drop);
- /// unsafe { core::ptr::write(&mut into_iter, Vec::new().into_iter()); }
+ /// std::mem::forget(into_iter);
/// ```
///
/// This method is used by in-place iteration, refer to the vec::in_place_collect
@@ -120,6 +123,8 @@ impl<T, A: Allocator> IntoIter<T, A> {
self.ptr = self.buf.as_ptr();
self.end = self.buf.as_ptr();
+ // Dropping the remaining elements can panic, so this needs to be
+ // done only after updating the other fields.
unsafe {
ptr::drop_in_place(remaining);
}
@@ -150,19 +155,19 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
#[inline]
fn next(&mut self) -> Option<T> {
- if self.ptr as *const _ == self.end {
+ if self.ptr == self.end {
None
- } else if mem::size_of::<T>() == 0 {
+ } else if T::IS_ZST {
// purposefully don't use 'ptr.offset' because for
// vectors with 0-size elements this would return the
// same pointer.
- self.ptr = unsafe { arith_offset(self.ptr as *const i8, 1) as *mut T };
+ self.ptr = self.ptr.wrapping_byte_add(1);
// Make up a value of this ZST.
Some(unsafe { mem::zeroed() })
} else {
let old = self.ptr;
- self.ptr = unsafe { self.ptr.offset(1) };
+ self.ptr = unsafe { self.ptr.add(1) };
Some(unsafe { ptr::read(old) })
}
@@ -170,7 +175,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
- let exact = if mem::size_of::<T>() == 0 {
+ let exact = if T::IS_ZST {
self.end.addr().wrapping_sub(self.ptr.addr())
} else {
unsafe { self.end.sub_ptr(self.ptr) }
@@ -182,11 +187,11 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
fn advance_by(&mut self, n: usize) -> Result<(), usize> {
let step_size = self.len().min(n);
let to_drop = ptr::slice_from_raw_parts_mut(self.ptr as *mut T, step_size);
- if mem::size_of::<T>() == 0 {
+ if T::IS_ZST {
// SAFETY: due to unchecked casts of unsigned amounts to signed offsets the wraparound
// effectively results in unsigned pointers representing positions 0..usize::MAX,
// which is valid for ZSTs.
- self.ptr = unsafe { arith_offset(self.ptr as *const i8, step_size as isize) as *mut T }
+ self.ptr = self.ptr.wrapping_byte_add(step_size);
} else {
// SAFETY: the min() above ensures that step_size is in bounds
self.ptr = unsafe { self.ptr.add(step_size) };
@@ -206,6 +211,43 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
self.len()
}
+ #[inline]
+ fn next_chunk<const N: usize>(&mut self) -> Result<[T; N], core::array::IntoIter<T, N>> {
+ let mut raw_ary = MaybeUninit::uninit_array();
+
+ let len = self.len();
+
+ if T::IS_ZST {
+ if len < N {
+ self.forget_remaining_elements();
+ // Safety: ZSTs can be conjured ex nihilo, only the amount has to be correct
+ return Err(unsafe { array::IntoIter::new_unchecked(raw_ary, 0..len) });
+ }
+
+ self.ptr = self.ptr.wrapping_byte_add(N);
+ // Safety: ditto
+ return Ok(unsafe { raw_ary.transpose().assume_init() });
+ }
+
+ if len < N {
+ // Safety: `len` indicates that this many elements are available and we just checked that
+ // it fits into the array.
+ unsafe {
+ ptr::copy_nonoverlapping(self.ptr, raw_ary.as_mut_ptr() as *mut T, len);
+ self.forget_remaining_elements();
+ return Err(array::IntoIter::new_unchecked(raw_ary, 0..len));
+ }
+ }
+
+ // Safety: `len` is larger than the array size. Copy a fixed amount here to fully initialize
+ // the array.
+ return unsafe {
+ ptr::copy_nonoverlapping(self.ptr, raw_ary.as_mut_ptr() as *mut T, N);
+ self.ptr = self.ptr.add(N);
+ Ok(raw_ary.transpose().assume_init())
+ };
+ }
+
unsafe fn __iterator_get_unchecked(&mut self, i: usize) -> Self::Item
where
Self: TrustedRandomAccessNoCoerce,
@@ -219,7 +261,7 @@ impl<T, A: Allocator> Iterator for IntoIter<T, A> {
// that `T: Copy` so reading elements from the buffer doesn't invalidate
// them for `Drop`.
unsafe {
- if mem::size_of::<T>() == 0 { mem::zeroed() } else { ptr::read(self.ptr.add(i)) }
+ if T::IS_ZST { mem::zeroed() } else { ptr::read(self.ptr.add(i)) }
}
}
}
@@ -230,14 +272,14 @@ impl<T, A: Allocator> DoubleEndedIterator for IntoIter<T, A> {
fn next_back(&mut self) -> Option<T> {
if self.end == self.ptr {
None
- } else if mem::size_of::<T>() == 0 {
+ } else if T::IS_ZST {
// See above for why 'ptr.offset' isn't used
- self.end = unsafe { arith_offset(self.end as *const i8, -1) as *mut T };
+ self.end = self.end.wrapping_byte_sub(1);
// Make up a value of this ZST.
Some(unsafe { mem::zeroed() })
} else {
- self.end = unsafe { self.end.offset(-1) };
+ self.end = unsafe { self.end.sub(1) };
Some(unsafe { ptr::read(self.end) })
}
@@ -246,14 +288,12 @@ impl<T, A: Allocator> DoubleEndedIterator for IntoIter<T, A> {
#[inline]
fn advance_back_by(&mut self, n: usize) -> Result<(), usize> {
let step_size = self.len().min(n);
- if mem::size_of::<T>() == 0 {
+ if T::IS_ZST {
// SAFETY: same as for advance_by()
- self.end = unsafe {
- arith_offset(self.end as *const i8, step_size.wrapping_neg() as isize) as *mut T
- }
+ self.end = self.end.wrapping_byte_sub(step_size);
} else {
// SAFETY: same as for advance_by()
- self.end = unsafe { self.end.offset(step_size.wrapping_neg() as isize) };
+ self.end = unsafe { self.end.sub(step_size) };
}
let to_drop = ptr::slice_from_raw_parts_mut(self.end as *mut T, step_size);
// SAFETY: same as for advance_by()
diff --git a/rust/alloc/vec/is_zero.rs b/rust/alloc/vec/is_zero.rs
index 377f3d172777..426bb2c9f6ff 100644
--- a/rust/alloc/vec/is_zero.rs
+++ b/rust/alloc/vec/is_zero.rs
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT
+use core::num::{Saturating, Wrapping};
use crate::boxed::Box;
@@ -19,12 +20,14 @@ macro_rules! impl_is_zero {
};
}
+impl_is_zero!(i8, |x| x == 0); // It is needed to impl for arrays and tuples of i8.
impl_is_zero!(i16, |x| x == 0);
impl_is_zero!(i32, |x| x == 0);
impl_is_zero!(i64, |x| x == 0);
impl_is_zero!(i128, |x| x == 0);
impl_is_zero!(isize, |x| x == 0);
+impl_is_zero!(u8, |x| x == 0); // It is needed to impl for arrays and tuples of u8.
impl_is_zero!(u16, |x| x == 0);
impl_is_zero!(u32, |x| x == 0);
impl_is_zero!(u64, |x| x == 0);
@@ -56,15 +59,41 @@ unsafe impl<T: IsZero, const N: usize> IsZero for [T; N] {
fn is_zero(&self) -> bool {
// Because this is generated as a runtime check, it's not obvious that
// it's worth doing if the array is really long. The threshold here
- // is largely arbitrary, but was picked because as of 2022-05-01 LLVM
- // can const-fold the check in `vec![[0; 32]; n]` but not in
- // `vec![[0; 64]; n]`: https://godbolt.org/z/WTzjzfs5b
+ // is largely arbitrary, but was picked because as of 2022-07-01 LLVM
+ // fails to const-fold the check in `vec![[1; 32]; n]`
+ // See https://github.com/rust-lang/rust/pull/97581#issuecomment-1166628022
// Feel free to tweak if you have better evidence.
- N <= 32 && self.iter().all(IsZero::is_zero)
+ N <= 16 && self.iter().all(IsZero::is_zero)
}
}
+// This is recursive macro.
+macro_rules! impl_for_tuples {
+ // Stopper
+ () => {
+ // No use for implementing for empty tuple because it is ZST.
+ };
+ ($first_arg:ident $(,$rest:ident)*) => {
+ unsafe impl <$first_arg: IsZero, $($rest: IsZero,)*> IsZero for ($first_arg, $($rest,)*){
+ #[inline]
+ fn is_zero(&self) -> bool{
+ // Destructure tuple to N references
+ // Rust allows to hide generic params by local variable names.
+ #[allow(non_snake_case)]
+ let ($first_arg, $($rest,)*) = self;
+
+ $first_arg.is_zero()
+ $( && $rest.is_zero() )*
+ }
+ }
+
+ impl_for_tuples!($($rest),*);
+ }
+}
+
+impl_for_tuples!(A, B, C, D, E, F, G, H);
+
// `Option<&T>` and `Option<Box<T>>` are guaranteed to represent `None` as null.
// For fat pointers, the bytes that would be the pointer metadata in the `Some`
// variant are padding in the `None` variant, so ignoring them and
@@ -118,3 +147,39 @@ impl_is_zero_option_of_nonzero!(
NonZeroUsize,
NonZeroIsize,
);
+
+unsafe impl<T: IsZero> IsZero for Wrapping<T> {
+ #[inline]
+ fn is_zero(&self) -> bool {
+ self.0.is_zero()
+ }
+}
+
+unsafe impl<T: IsZero> IsZero for Saturating<T> {
+ #[inline]
+ fn is_zero(&self) -> bool {
+ self.0.is_zero()
+ }
+}
+
+macro_rules! impl_for_optional_bool {
+ ($($t:ty,)+) => {$(
+ unsafe impl IsZero for $t {
+ #[inline]
+ fn is_zero(&self) -> bool {
+ // SAFETY: This is *not* a stable layout guarantee, but
+ // inside `core` we're allowed to rely on the current rustc
+ // behaviour that options of bools will be one byte with
+ // no padding, so long as they're nested less than 254 deep.
+ let raw: u8 = unsafe { core::mem::transmute(*self) };
+ raw == 0
+ }
+ }
+ )+};
+}
+impl_for_optional_bool! {
+ Option<bool>,
+ Option<Option<bool>>,
+ Option<Option<Option<bool>>>,
+ // Could go further, but not worth the metadata overhead
+}
diff --git a/rust/alloc/vec/mod.rs b/rust/alloc/vec/mod.rs
index 8ac6c1e3b2a8..da65deb296cd 100644
--- a/rust/alloc/vec/mod.rs
+++ b/rust/alloc/vec/mod.rs
@@ -61,17 +61,18 @@ use core::cmp::Ordering;
use core::convert::TryFrom;
use core::fmt;
use core::hash::{Hash, Hasher};
-use core::intrinsics::{arith_offset, assume};
+use core::intrinsics::assume;
use core::iter;
#[cfg(not(no_global_oom_handling))]
use core::iter::FromIterator;
use core::marker::PhantomData;
-use core::mem::{self, ManuallyDrop, MaybeUninit};
+use core::mem::{self, ManuallyDrop, MaybeUninit, SizedTypeProperties};
use core::ops::{self, Index, IndexMut, Range, RangeBounds};
use core::ptr::{self, NonNull};
use core::slice::{self, SliceIndex};
use crate::alloc::{Allocator, Global};
+#[cfg(not(no_borrow))]
use crate::borrow::{Cow, ToOwned};
use crate::boxed::Box;
use crate::collections::TryReserveError;
@@ -94,6 +95,7 @@ pub use self::drain::Drain;
mod drain;
+#[cfg(not(no_borrow))]
#[cfg(not(no_global_oom_handling))]
mod cow;
@@ -120,14 +122,12 @@ use self::spec_from_elem::SpecFromElem;
#[cfg(not(no_global_oom_handling))]
mod spec_from_elem;
-#[cfg(not(no_global_oom_handling))]
use self::set_len_on_drop::SetLenOnDrop;
-#[cfg(not(no_global_oom_handling))]
mod set_len_on_drop;
#[cfg(not(no_global_oom_handling))]
-use self::in_place_drop::InPlaceDrop;
+use self::in_place_drop::{InPlaceDrop, InPlaceDstBufDrop};
#[cfg(not(no_global_oom_handling))]
mod in_place_drop;
@@ -147,7 +147,8 @@ mod spec_from_iter;
#[cfg(not(no_global_oom_handling))]
use self::spec_extend::SpecExtend;
-#[cfg(not(no_global_oom_handling))]
+use self::spec_extend::TrySpecExtend;
+
mod spec_extend;
/// A contiguous growable array type, written as `Vec<T>`, short for 'vector'.
@@ -427,17 +428,25 @@ impl<T> Vec<T> {
Vec { buf: RawVec::NEW, len: 0 }
}
- /// Constructs a new, empty `Vec<T>` with the specified capacity.
+ /// Constructs a new, empty `Vec<T>` with at least the specified capacity.
///
- /// The vector will be able to hold exactly `capacity` elements without
- /// reallocating. If `capacity` is 0, the vector will not allocate.
+ /// The vector will be able to hold at least `capacity` elements without
+ /// reallocating. This method is allowed to allocate for more elements than
+ /// `capacity`. If `capacity` is 0, the vector will not allocate.
///
/// It is important to note that although the returned vector has the
- /// *capacity* specified, the vector will have a zero *length*. For an
- /// explanation of the difference between length and capacity, see
+ /// minimum *capacity* specified, the vector will have a zero *length*. For
+ /// an explanation of the difference between length and capacity, see
/// *[Capacity and reallocation]*.
///
+ /// If it is important to know the exact allocated capacity of a `Vec`,
+ /// always use the [`capacity`] method after construction.
+ ///
+ /// For `Vec<T>` where `T` is a zero-sized type, there will be no allocation
+ /// and the capacity will always be `usize::MAX`.
+ ///
/// [Capacity and reallocation]: #capacity-and-reallocation
+ /// [`capacity`]: Vec::capacity
///
/// # Panics
///
@@ -450,19 +459,24 @@ impl<T> Vec<T> {
///
/// // The vector contains no items, even though it has capacity for more
/// assert_eq!(vec.len(), 0);
- /// assert_eq!(vec.capacity(), 10);
+ /// assert!(vec.capacity() >= 10);
///
/// // These are all done without reallocating...
/// for i in 0..10 {
/// vec.push(i);
/// }
/// assert_eq!(vec.len(), 10);
- /// assert_eq!(vec.capacity(), 10);
+ /// assert!(vec.capacity() >= 10);
///
/// // ...but this may make the vector reallocate
/// vec.push(11);
/// assert_eq!(vec.len(), 11);
/// assert!(vec.capacity() >= 11);
+ ///
+ /// // A vector of a zero-sized type will always over-allocate, since no
+ /// // allocation is necessary
+ /// let vec_units = Vec::<()>::with_capacity(10);
+ /// assert_eq!(vec_units.capacity(), usize::MAX);
/// ```
#[cfg(not(no_global_oom_handling))]
#[inline]
@@ -472,17 +486,25 @@ impl<T> Vec<T> {
Self::with_capacity_in(capacity, Global)
}
- /// Tries to construct a new, empty `Vec<T>` with the specified capacity.
+ /// Tries to construct a new, empty `Vec<T>` with at least the specified capacity.
///
- /// The vector will be able to hold exactly `capacity` elements without
- /// reallocating. If `capacity` is 0, the vector will not allocate.
+ /// The vector will be able to hold at least `capacity` elements without
+ /// reallocating. This method is allowed to allocate for more elements than
+ /// `capacity`. If `capacity` is 0, the vector will not allocate.
///
/// It is important to note that although the returned vector has the
- /// *capacity* specified, the vector will have a zero *length*. For an
- /// explanation of the difference between length and capacity, see
+ /// minimum *capacity* specified, the vector will have a zero *length*. For
+ /// an explanation of the difference between length and capacity, see
/// *[Capacity and reallocation]*.
///
+ /// If it is important to know the exact allocated capacity of a `Vec`,
+ /// always use the [`capacity`] method after construction.
+ ///
+ /// For `Vec<T>` where `T` is a zero-sized type, there will be no allocation
+ /// and the capacity will always be `usize::MAX`.
+ ///
/// [Capacity and reallocation]: #capacity-and-reallocation
+ /// [`capacity`]: Vec::capacity
///
/// # Examples
///
@@ -491,22 +513,24 @@ impl<T> Vec<T> {
///
/// // The vector contains no items, even though it has capacity for more
/// assert_eq!(vec.len(), 0);
- /// assert_eq!(vec.capacity(), 10);
+ /// assert!(vec.capacity() >= 10);
///
/// // These are all done without reallocating...
/// for i in 0..10 {
/// vec.push(i);
/// }
/// assert_eq!(vec.len(), 10);
- /// assert_eq!(vec.capacity(), 10);
+ /// assert!(vec.capacity() >= 10);
///
/// // ...but this may make the vector reallocate
/// vec.push(11);
/// assert_eq!(vec.len(), 11);
/// assert!(vec.capacity() >= 11);
///
- /// let mut result = Vec::try_with_capacity(usize::MAX);
- /// assert!(result.is_err());
+ /// // A vector of a zero-sized type will always over-allocate, since no
+ /// // allocation is necessary
+ /// let vec_units = Vec::<()>::try_with_capacity(10).unwrap();
+ /// assert_eq!(vec_units.capacity(), usize::MAX);
/// ```
#[inline]
#[stable(feature = "kernel", since = "1.0.0")]
@@ -514,15 +538,13 @@ impl<T> Vec<T> {
Self::try_with_capacity_in(capacity, Global)
}
- /// Creates a `Vec<T>` directly from the raw components of another vector.
+ /// Creates a `Vec<T>` directly from a pointer, a capacity, and a length.
///
/// # Safety
///
/// This is highly unsafe, due to the number of invariants that aren't
/// checked:
///
- /// * `ptr` needs to have been previously allocated via [`String`]/`Vec<T>`
- /// (at least, it's highly likely to be incorrect if it wasn't).
/// * `T` needs to have the same alignment as what `ptr` was allocated with.
/// (`T` having a less strict alignment is not sufficient, the alignment really
/// needs to be equal to satisfy the [`dealloc`] requirement that memory must be
@@ -531,6 +553,14 @@ impl<T> Vec<T> {
/// to be the same size as the pointer was allocated with. (Because similar to
/// alignment, [`dealloc`] must be called with the same layout `size`.)
/// * `length` needs to be less than or equal to `capacity`.
+ /// * The first `length` values must be properly initialized values of type `T`.
+ /// * `capacity` needs to be the capacity that the pointer was allocated with.
+ /// * The allocated size in bytes must be no larger than `isize::MAX`.
+ /// See the safety documentation of [`pointer::offset`].
+ ///
+ /// These requirements are always upheld by any `ptr` that has been allocated
+ /// via `Vec<T>`. Other allocation sources are allowed if the invariants are
+ /// upheld.
///
/// Violating these may cause problems like corrupting the allocator's
/// internal data structures. For example it is normally **not** safe
@@ -573,8 +603,8 @@ impl<T> Vec<T> {
///
/// unsafe {
/// // Overwrite memory with 4, 5, 6
- /// for i in 0..len as isize {
- /// ptr::write(p.offset(i), 4 + i);
+ /// for i in 0..len {
+ /// ptr::write(p.add(i), 4 + i);
/// }
///
/// // Put everything back together into a Vec
@@ -582,6 +612,32 @@ impl<T> Vec<T> {
/// assert_eq!(rebuilt, [4, 5, 6]);
/// }
/// ```
+ ///
+ /// Using memory that was allocated elsewhere:
+ ///
+ /// ```rust
+ /// #![feature(allocator_api)]
+ ///
+ /// use std::alloc::{AllocError, Allocator, Global, Layout};
+ ///
+ /// fn main() {
+ /// let layout = Layout::array::<u32>(16).expect("overflow cannot happen");
+ ///
+ /// let vec = unsafe {
+ /// let mem = match Global.allocate(layout) {
+ /// Ok(mem) => mem.cast::<u32>().as_ptr(),
+ /// Err(AllocError) => return,
+ /// };
+ ///
+ /// mem.write(1_000_000);
+ ///
+ /// Vec::from_raw_parts_in(mem, 1, 16, Global)
+ /// };
+ ///
+ /// assert_eq!(vec, &[1_000_000]);
+ /// assert_eq!(vec.capacity(), 16);
+ /// }
+ /// ```
#[inline]
#[stable(feature = "rust1", since = "1.0.0")]
pub unsafe fn from_raw_parts(ptr: *mut T, length: usize, capacity: usize) -> Self {
@@ -610,18 +666,26 @@ impl<T, A: Allocator> Vec<T, A> {
Vec { buf: RawVec::new_in(alloc), len: 0 }
}
- /// Constructs a new, empty `Vec<T, A>` with the specified capacity with the provided
- /// allocator.
+ /// Constructs a new, empty `Vec<T, A>` with at least the specified capacity
+ /// with the provided allocator.
///
- /// The vector will be able to hold exactly `capacity` elements without
- /// reallocating. If `capacity` is 0, the vector will not allocate.
+ /// The vector will be able to hold at least `capacity` elements without
+ /// reallocating. This method is allowed to allocate for more elements than
+ /// `capacity`. If `capacity` is 0, the vector will not allocate.
///
/// It is important to note that although the returned vector has the
- /// *capacity* specified, the vector will have a zero *length*. For an
- /// explanation of the difference between length and capacity, see
+ /// minimum *capacity* specified, the vector will have a zero *length*. For
+ /// an explanation of the difference between length and capacity, see
/// *[Capacity and reallocation]*.
///
+ /// If it is important to know the exact allocated capacity of a `Vec`,
+ /// always use the [`capacity`] method after construction.
+ ///
+ /// For `Vec<T, A>` where `T` is a zero-sized type, there will be no allocation
+ /// and the capacity will always be `usize::MAX`.
+ ///
/// [Capacity and reallocation]: #capacity-and-reallocation
+ /// [`capacity`]: Vec::capacity
///
/// # Panics
///
@@ -651,6 +715,11 @@ impl<T, A: Allocator> Vec<T, A> {
/// vec.push(11);
/// assert_eq!(vec.len(), 11);
/// assert!(vec.capacity() >= 11);
+ ///
+ /// // A vector of a zero-sized type will always over-allocate, since no
+ /// // allocation is necessary
+ /// let vec_units = Vec::<(), System>::with_capacity_in(10, System);
+ /// assert_eq!(vec_units.capacity(), usize::MAX);
/// ```
#[cfg(not(no_global_oom_handling))]
#[inline]
@@ -659,18 +728,26 @@ impl<T, A: Allocator> Vec<T, A> {
Vec { buf: RawVec::with_capacity_in(capacity, alloc), len: 0 }
}
- /// Tries to construct a new, empty `Vec<T, A>` with the specified capacity
+ /// Tries to construct a new, empty `Vec<T, A>` with at least the specified capacity
/// with the provided allocator.
///
- /// The vector will be able to hold exactly `capacity` elements without
- /// reallocating. If `capacity` is 0, the vector will not allocate.
+ /// The vector will be able to hold at least `capacity` elements without
+ /// reallocating. This method is allowed to allocate for more elements than
+ /// `capacity`. If `capacity` is 0, the vector will not allocate.
///
/// It is important to note that although the returned vector has the
- /// *capacity* specified, the vector will have a zero *length*. For an
- /// explanation of the difference between length and capacity, see
+ /// minimum *capacity* specified, the vector will have a zero *length*. For
+ /// an explanation of the difference between length and capacity, see
/// *[Capacity and reallocation]*.
///
+ /// If it is important to know the exact allocated capacity of a `Vec`,
+ /// always use the [`capacity`] method after construction.
+ ///
+ /// For `Vec<T, A>` where `T` is a zero-sized type, there will be no allocation
+ /// and the capacity will always be `usize::MAX`.
+ ///
/// [Capacity and reallocation]: #capacity-and-reallocation
+ /// [`capacity`]: Vec::capacity
///
/// # Examples
///
@@ -697,8 +774,10 @@ impl<T, A: Allocator> Vec<T, A> {
/// assert_eq!(vec.len(), 11);
/// assert!(vec.capacity() >= 11);
///
- /// let mut result = Vec::try_with_capacity_in(usize::MAX, System);
- /// assert!(result.is_err());
+ /// // A vector of a zero-sized type will always over-allocate, since no
+ /// // allocation is necessary
+ /// let vec_units = Vec::<(), System>::try_with_capacity_in(10, System).unwrap();
+ /// assert_eq!(vec_units.capacity(), usize::MAX);
/// ```
#[inline]
#[stable(feature = "kernel", since = "1.0.0")]
@@ -706,21 +785,30 @@ impl<T, A: Allocator> Vec<T, A> {
Ok(Vec { buf: RawVec::try_with_capacity_in(capacity, alloc)?, len: 0 })
}
- /// Creates a `Vec<T, A>` directly from the raw components of another vector.
+ /// Creates a `Vec<T, A>` directly from a pointer, a capacity, a length,
+ /// and an allocator.
///
/// # Safety
///
/// This is highly unsafe, due to the number of invariants that aren't
/// checked:
///
- /// * `ptr` needs to have been previously allocated via [`String`]/`Vec<T>`
- /// (at least, it's highly likely to be incorrect if it wasn't).
- /// * `T` needs to have the same size and alignment as what `ptr` was allocated with.
+ /// * `T` needs to have the same alignment as what `ptr` was allocated with.
/// (`T` having a less strict alignment is not sufficient, the alignment really
/// needs to be equal to satisfy the [`dealloc`] requirement that memory must be
/// allocated and deallocated with the same layout.)
+ /// * The size of `T` times the `capacity` (ie. the allocated size in bytes) needs
+ /// to be the same size as the pointer was allocated with. (Because similar to
+ /// alignment, [`dealloc`] must be called with the same layout `size`.)
/// * `length` needs to be less than or equal to `capacity`.
- /// * `capacity` needs to be the capacity that the pointer was allocated with.
+ /// * The first `length` values must be properly initialized values of type `T`.
+ /// * `capacity` needs to [*fit*] the layout size that the pointer was allocated with.
+ /// * The allocated size in bytes must be no larger than `isize::MAX`.
+ /// See the safety documentation of [`pointer::offset`].
+ ///
+ /// These requirements are always upheld by any `ptr` that has been allocated
+ /// via `Vec<T, A>`. Other allocation sources are allowed if the invariants are
+ /// upheld.
///
/// Violating these may cause problems like corrupting the allocator's
/// internal data structures. For example it is **not** safe
@@ -738,6 +826,7 @@ impl<T, A: Allocator> Vec<T, A> {
///
/// [`String`]: crate::string::String
/// [`dealloc`]: crate::alloc::GlobalAlloc::dealloc
+ /// [*fit*]: crate::alloc::Allocator#memory-fitting
///
/// # Examples
///
@@ -767,8 +856,8 @@ impl<T, A: Allocator> Vec<T, A> {
///
/// unsafe {
/// // Overwrite memory with 4, 5, 6
- /// for i in 0..len as isize {
- /// ptr::write(p.offset(i), 4 + i);
+ /// for i in 0..len {
+ /// ptr::write(p.add(i), 4 + i);
/// }
///
/// // Put everything back together into a Vec
@@ -776,6 +865,29 @@ impl<T, A: Allocator> Vec<T, A> {
/// assert_eq!(rebuilt, [4, 5, 6]);
/// }
/// ```
+ ///
+ /// Using memory that was allocated elsewhere:
+ ///
+ /// ```rust
+ /// use std::alloc::{alloc, Layout};
+ ///
+ /// fn main() {
+ /// let layout = Layout::array::<u32>(16).expect("overflow cannot happen");
+ /// let vec = unsafe {
+ /// let mem = alloc(layout).cast::<u32>();
+ /// if mem.is_null() {
+ /// return;
+ /// }
+ ///
+ /// mem.write(1_000_000);
+ ///
+ /// Vec::from_raw_parts(mem, 1, 16)
+ /// };
+ ///
+ /// assert_eq!(vec, &[1_000_000]);
+ /// assert_eq!(vec.capacity(), 16);
+ /// }
+ /// ```
#[inline]
#[unstable(feature = "allocator_api", issue = "32838")]
pub unsafe fn from_raw_parts_in(ptr: *mut T, length: usize, capacity: usize, alloc: A) -> Self {
@@ -868,13 +980,14 @@ impl<T, A: Allocator> Vec<T, A> {
(ptr, len, capacity, alloc)
}
- /// Returns the number of elements the vector can hold without
+ /// Returns the total number of elements the vector can hold without
/// reallocating.
///
/// # Examples
///
/// ```
- /// let vec: Vec<i32> = Vec::with_capacity(10);
+ /// let mut vec: Vec<i32> = Vec::with_capacity(10);
+ /// vec.push(42);
/// assert_eq!(vec.capacity(), 10);
/// ```
#[inline]
@@ -884,10 +997,10 @@ impl<T, A: Allocator> Vec<T, A> {
}
/// Reserves capacity for at least `additional` more elements to be inserted
- /// in the given `Vec<T>`. The collection may reserve more space to avoid
- /// frequent reallocations. After calling `reserve`, capacity will be
- /// greater than or equal to `self.len() + additional`. Does nothing if
- /// capacity is already sufficient.
+ /// in the given `Vec<T>`. The collection may reserve more space to
+ /// speculatively avoid frequent reallocations. After calling `reserve`,
+ /// capacity will be greater than or equal to `self.len() + additional`.
+ /// Does nothing if capacity is already sufficient.
///
/// # Panics
///
@@ -906,10 +1019,12 @@ impl<T, A: Allocator> Vec<T, A> {
self.buf.reserve(self.len, additional);
}
- /// Reserves the minimum capacity for exactly `additional` more elements to
- /// be inserted in the given `Vec<T>`. After calling `reserve_exact`,
- /// capacity will be greater than or equal to `self.len() + additional`.
- /// Does nothing if the capacity is already sufficient.
+ /// Reserves the minimum capacity for at least `additional` more elements to
+ /// be inserted in the given `Vec<T>`. Unlike [`reserve`], this will not
+ /// deliberately over-allocate to speculatively avoid frequent allocations.
+ /// After calling `reserve_exact`, capacity will be greater than or equal to
+ /// `self.len() + additional`. Does nothing if the capacity is already
+ /// sufficient.
///
/// Note that the allocator may give the collection more space than it
/// requests. Therefore, capacity can not be relied upon to be precisely
@@ -935,10 +1050,11 @@ impl<T, A: Allocator> Vec<T, A> {
}
/// Tries to reserve capacity for at least `additional` more elements to be inserted
- /// in the given `Vec<T>`. The collection may reserve more space to avoid
+ /// in the given `Vec<T>`. The collection may reserve more space to speculatively avoid
/// frequent reallocations. After calling `try_reserve`, capacity will be
- /// greater than or equal to `self.len() + additional`. Does nothing if
- /// capacity is already sufficient.
+ /// greater than or equal to `self.len() + additional` if it returns
+ /// `Ok(())`. Does nothing if capacity is already sufficient. This method
+ /// preserves the contents even if an error occurs.
///
/// # Errors
///
@@ -970,10 +1086,11 @@ impl<T, A: Allocator> Vec<T, A> {
self.buf.try_reserve(self.len, additional)
}
- /// Tries to reserve the minimum capacity for exactly `additional`
- /// elements to be inserted in the given `Vec<T>`. After calling
- /// `try_reserve_exact`, capacity will be greater than or equal to
- /// `self.len() + additional` if it returns `Ok(())`.
+ /// Tries to reserve the minimum capacity for at least `additional`
+ /// elements to be inserted in the given `Vec<T>`. Unlike [`try_reserve`],
+ /// this will not deliberately over-allocate to speculatively avoid frequent
+ /// allocations. After calling `try_reserve_exact`, capacity will be greater
+ /// than or equal to `self.len() + additional` if it returns `Ok(())`.
/// Does nothing if the capacity is already sufficient.
///
/// Note that the allocator may give the collection more space than it
@@ -1198,7 +1315,8 @@ impl<T, A: Allocator> Vec<T, A> {
self
}
- /// Returns a raw pointer to the vector's buffer.
+ /// Returns a raw pointer to the vector's buffer, or a dangling raw pointer
+ /// valid for zero sized reads if the vector didn't allocate.
///
/// The caller must ensure that the vector outlives the pointer this
/// function returns, or else it will end up pointing to garbage.
@@ -1235,7 +1353,8 @@ impl<T, A: Allocator> Vec<T, A> {
ptr
}
- /// Returns an unsafe mutable pointer to the vector's buffer.
+ /// Returns an unsafe mutable pointer to the vector's buffer, or a dangling
+ /// raw pointer valid for zero sized reads if the vector didn't allocate.
///
/// The caller must ensure that the vector outlives the pointer this
/// function returns, or else it will end up pointing to garbage.
@@ -1439,9 +1558,6 @@ impl<T, A: Allocator> Vec<T, A> {
}
let len = self.len();
- if index > len {
- assert_failed(index, len);
- }
// space for the new element
if len == self.buf.capacity() {
@@ -1453,9 +1569,15 @@ impl<T, A: Allocator> Vec<T, A> {
// The spot to put the new value
{
let p = self.as_mut_ptr().add(index);
- // Shift everything over to make space. (Duplicating the
- // `index`th element into two consecutive places.)
- ptr::copy(p, p.offset(1), len - index);
+ if index < len {
+ // Shift everything over to make space. (Duplicating the
+ // `index`th element into two consecutive places.)
+ ptr::copy(p, p.add(1), len - index);
+ } else if index == len {
+ // No elements need shifting.
+ } else {
+ assert_failed(index, len);
+ }
// Write it in, overwriting the first copy of the `index`th
// element.
ptr::write(p, element);
@@ -1512,7 +1634,7 @@ impl<T, A: Allocator> Vec<T, A> {
ret = ptr::read(ptr);
// Shift everything down to fill in that spot.
- ptr::copy(ptr.offset(1), ptr, len - index - 1);
+ ptr::copy(ptr.add(1), ptr, len - index - 1);
}
self.set_len(len - 1);
ret
@@ -1561,11 +1683,11 @@ impl<T, A: Allocator> Vec<T, A> {
///
/// ```
/// let mut vec = vec![1, 2, 3, 4];
- /// vec.retain_mut(|x| if *x > 3 {
- /// false
- /// } else {
+ /// vec.retain_mut(|x| if *x <= 3 {
/// *x += 1;
/// true
+ /// } else {
+ /// false
/// });
/// assert_eq!(vec, [2, 3, 4]);
/// ```
@@ -1853,6 +1975,51 @@ impl<T, A: Allocator> Vec<T, A> {
Ok(())
}
+ /// Appends an element if there is sufficient spare capacity, otherwise an error is returned
+ /// with the element.
+ ///
+ /// Unlike [`push`] this method will not reallocate when there's insufficient capacity.
+ /// The caller should use [`reserve`] or [`try_reserve`] to ensure that there is enough capacity.
+ ///
+ /// [`push`]: Vec::push
+ /// [`reserve`]: Vec::reserve
+ /// [`try_reserve`]: Vec::try_reserve
+ ///
+ /// # Examples
+ ///
+ /// A manual, panic-free alternative to [`FromIterator`]:
+ ///
+ /// ```
+ /// #![feature(vec_push_within_capacity)]
+ ///
+ /// use std::collections::TryReserveError;
+ /// fn from_iter_fallible<T>(iter: impl Iterator<Item=T>) -> Result<Vec<T>, TryReserveError> {
+ /// let mut vec = Vec::new();
+ /// for value in iter {
+ /// if let Err(value) = vec.push_within_capacity(value) {
+ /// vec.try_reserve(1)?;
+ /// // this cannot fail, the previous line either returned or added at least 1 free slot
+ /// let _ = vec.push_within_capacity(value);
+ /// }
+ /// }
+ /// Ok(vec)
+ /// }
+ /// assert_eq!(from_iter_fallible(0..100), Ok(Vec::from_iter(0..100)));
+ /// ```
+ #[inline]
+ #[unstable(feature = "vec_push_within_capacity", issue = "100486")]
+ pub fn push_within_capacity(&mut self, value: T) -> Result<(), T> {
+ if self.len == self.buf.capacity() {
+ return Err(value);
+ }
+ unsafe {
+ let end = self.as_mut_ptr().add(self.len);
+ ptr::write(end, value);
+ self.len += 1;
+ }
+ Ok(())
+ }
+
/// Removes the last element from a vector and returns it, or [`None`] if it
/// is empty.
///
@@ -1885,7 +2052,7 @@ impl<T, A: Allocator> Vec<T, A> {
///
/// # Panics
///
- /// Panics if the number of elements in the vector overflows a `usize`.
+ /// Panics if the new capacity exceeds `isize::MAX` bytes.
///
/// # Examples
///
@@ -1917,6 +2084,17 @@ impl<T, A: Allocator> Vec<T, A> {
self.len += count;
}
+ /// Tries to append elements to `self` from other buffer.
+ #[inline]
+ unsafe fn try_append_elements(&mut self, other: *const [T]) -> Result<(), TryReserveError> {
+ let count = unsafe { (*other).len() };
+ self.try_reserve(count)?;
+ let len = self.len();
+ unsafe { ptr::copy_nonoverlapping(other as *const T, self.as_mut_ptr().add(len), count) };
+ self.len += count;
+ Ok(())
+ }
+
/// Removes the specified range from the vector in bulk, returning all
/// removed elements as an iterator. If the iterator is dropped before
/// being fully consumed, it drops the remaining removed elements.
@@ -1968,9 +2146,7 @@ impl<T, A: Allocator> Vec<T, A> {
unsafe {
// set self.vec length's to start, to be safe in case Drain is leaked
self.set_len(start);
- // Use the borrow in the IterMut to indicate borrowing behavior of the
- // whole Drain iterator (like &mut T).
- let range_slice = slice::from_raw_parts_mut(self.as_mut_ptr().add(start), end - start);
+ let range_slice = slice::from_raw_parts(self.as_ptr().add(start), end - start);
Drain {
tail_start: end,
tail_len: len - end,
@@ -2162,7 +2338,6 @@ impl<T, A: Allocator> Vec<T, A> {
/// static_ref[0] += 1;
/// assert_eq!(static_ref, &[2, 2, 3]);
/// ```
- #[cfg(not(no_global_oom_handling))]
#[stable(feature = "vec_leak", since = "1.47.0")]
#[inline]
pub fn leak<'a>(self) -> &'a mut [T]
@@ -2338,6 +2513,45 @@ impl<T: Clone, A: Allocator> Vec<T, A> {
}
}
+ /// Tries to resize the `Vec` in-place so that `len` is equal to `new_len`.
+ ///
+ /// If `new_len` is greater than `len`, the `Vec` is extended by the
+ /// difference, with each additional slot filled with `value`.
+ /// If `new_len` is less than `len`, the `Vec` is simply truncated.
+ ///
+ /// This method requires `T` to implement [`Clone`],
+ /// in order to be able to clone the passed value.
+ /// If you need more flexibility (or want to rely on [`Default`] instead of
+ /// [`Clone`]), use [`Vec::resize_with`].
+ /// If you only need to resize to a smaller size, use [`Vec::truncate`].
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut vec = vec!["hello"];
+ /// vec.try_resize(3, "world").unwrap();
+ /// assert_eq!(vec, ["hello", "world", "world"]);
+ ///
+ /// let mut vec = vec![1, 2, 3, 4];
+ /// vec.try_resize(2, 0).unwrap();
+ /// assert_eq!(vec, [1, 2]);
+ ///
+ /// let mut vec = vec![42];
+ /// let result = vec.try_resize(usize::MAX, 0);
+ /// assert!(result.is_err());
+ /// ```
+ #[stable(feature = "kernel", since = "1.0.0")]
+ pub fn try_resize(&mut self, new_len: usize, value: T) -> Result<(), TryReserveError> {
+ let len = self.len();
+
+ if new_len > len {
+ self.try_extend_with(new_len - len, ExtendElement(value))
+ } else {
+ self.truncate(new_len);
+ Ok(())
+ }
+ }
+
/// Clones and appends all elements in a slice to the `Vec`.
///
/// Iterates over the slice `other`, clones each element, and then appends
@@ -2363,6 +2577,30 @@ impl<T: Clone, A: Allocator> Vec<T, A> {
self.spec_extend(other.iter())
}
+ /// Tries to clone and append all elements in a slice to the `Vec`.
+ ///
+ /// Iterates over the slice `other`, clones each element, and then appends
+ /// it to this `Vec`. The `other` slice is traversed in-order.
+ ///
+ /// Note that this function is same as [`extend`] except that it is
+ /// specialized to work with slices instead. If and when Rust gets
+ /// specialization this function will likely be deprecated (but still
+ /// available).
+ ///
+ /// # Examples
+ ///
+ /// ```
+ /// let mut vec = vec![1];
+ /// vec.try_extend_from_slice(&[2, 3, 4]).unwrap();
+ /// assert_eq!(vec, [1, 2, 3, 4]);
+ /// ```
+ ///
+ /// [`extend`]: Vec::extend
+ #[stable(feature = "kernel", since = "1.0.0")]
+ pub fn try_extend_from_slice(&mut self, other: &[T]) -> Result<(), TryReserveError> {
+ self.try_spec_extend(other.iter())
+ }
+
/// Copies elements from `src` range to the end of the vector.
///
/// # Panics
@@ -2426,7 +2664,7 @@ impl<T, A: Allocator, const N: usize> Vec<[T; N], A> {
#[unstable(feature = "slice_flatten", issue = "95629")]
pub fn into_flattened(self) -> Vec<T, A> {
let (ptr, len, cap, alloc) = self.into_raw_parts_with_alloc();
- let (new_len, new_cap) = if mem::size_of::<T>() == 0 {
+ let (new_len, new_cap) = if T::IS_ZST {
(len.checked_mul(N).expect("vec len overflow"), usize::MAX)
} else {
// SAFETY:
@@ -2488,7 +2726,7 @@ impl<T, A: Allocator> Vec<T, A> {
// Write all elements except the last one
for _ in 1..n {
ptr::write(ptr, value.next());
- ptr = ptr.offset(1);
+ ptr = ptr.add(1);
// Increment the length in every step in case next() panics
local_len.increment_len(1);
}
@@ -2502,6 +2740,36 @@ impl<T, A: Allocator> Vec<T, A> {
// len set by scope guard
}
}
+
+ /// Try to extend the vector by `n` values, using the given generator.
+ fn try_extend_with<E: ExtendWith<T>>(&mut self, n: usize, mut value: E) -> Result<(), TryReserveError> {
+ self.try_reserve(n)?;
+
+ unsafe {
+ let mut ptr = self.as_mut_ptr().add(self.len());
+ // Use SetLenOnDrop to work around bug where compiler
+ // might not realize the store through `ptr` through self.set_len()
+ // don't alias.
+ let mut local_len = SetLenOnDrop::new(&mut self.len);
+
+ // Write all elements except the last one
+ for _ in 1..n {
+ ptr::write(ptr, value.next());
+ ptr = ptr.add(1);
+ // Increment the length in every step in case next() panics
+ local_len.increment_len(1);
+ }
+
+ if n > 0 {
+ // We can write the last element directly without cloning needlessly
+ ptr::write(ptr, value.last());
+ local_len.increment_len(1);
+ }
+
+ // len set by scope guard
+ Ok(())
+ }
+ }
}
impl<T: PartialEq, A: Allocator> Vec<T, A> {
@@ -2584,7 +2852,7 @@ impl<T: Copy, A: Allocator> ExtendFromWithinSpec for Vec<T, A> {
// SAFETY:
// - Both pointers are created from unique slice references (`&mut [_]`)
// so they are valid and do not overlap.
- // - Elements are :Copy so it's OK to to copy them, without doing
+ // - Elements are :Copy so it's OK to copy them, without doing
// anything with the original values
// - `count` is equal to the len of `source`, so source is valid for
// `count` reads
@@ -2607,6 +2875,7 @@ impl<T: Copy, A: Allocator> ExtendFromWithinSpec for Vec<T, A> {
impl<T, A: Allocator> ops::Deref for Vec<T, A> {
type Target = [T];
+ #[inline]
fn deref(&self) -> &[T] {
unsafe { slice::from_raw_parts(self.as_ptr(), self.len) }
}
@@ -2614,6 +2883,7 @@ impl<T, A: Allocator> ops::Deref for Vec<T, A> {
#[stable(feature = "rust1", since = "1.0.0")]
impl<T, A: Allocator> ops::DerefMut for Vec<T, A> {
+ #[inline]
fn deref_mut(&mut self) -> &mut [T] {
unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), self.len) }
}
@@ -2740,10 +3010,13 @@ impl<T, A: Allocator> IntoIterator for Vec<T, A> {
///
/// ```
/// let v = vec!["a".to_string(), "b".to_string()];
- /// for s in v.into_iter() {
- /// // s has type String, not &String
- /// println!("{s}");
- /// }
+ /// let mut v_iter = v.into_iter();
+ ///
+ /// let first_element: Option<String> = v_iter.next();
+ ///
+ /// assert_eq!(first_element, Some("a".to_string()));
+ /// assert_eq!(v_iter.next(), Some("b".to_string()));
+ /// assert_eq!(v_iter.next(), None);
/// ```
#[inline]
fn into_iter(self) -> IntoIter<T, A> {
@@ -2751,8 +3024,8 @@ impl<T, A: Allocator> IntoIterator for Vec<T, A> {
let mut me = ManuallyDrop::new(self);
let alloc = ManuallyDrop::new(ptr::read(me.allocator()));
let begin = me.as_mut_ptr();
- let end = if mem::size_of::<T>() == 0 {
- arith_offset(begin as *const i8, me.len() as isize) as *const T
+ let end = if T::IS_ZST {
+ begin.wrapping_byte_add(me.len())
} else {
begin.add(me.len()) as *const T
};
@@ -2836,6 +3109,34 @@ impl<T, A: Allocator> Vec<T, A> {
}
}
+ // leaf method to which various SpecFrom/SpecExtend implementations delegate when
+ // they have no further optimizations to apply
+ fn try_extend_desugared<I: Iterator<Item = T>>(&mut self, mut iterator: I) -> Result<(), TryReserveError> {
+ // This is the case for a general iterator.
+ //
+ // This function should be the moral equivalent of:
+ //
+ // for item in iterator {
+ // self.push(item);
+ // }
+ while let Some(element) = iterator.next() {
+ let len = self.len();
+ if len == self.capacity() {
+ let (lower, _) = iterator.size_hint();
+ self.try_reserve(lower.saturating_add(1))?;
+ }
+ unsafe {
+ ptr::write(self.as_mut_ptr().add(len), element);
+ // Since next() executes user code which can panic we have to bump the length
+ // after each step.
+ // NB can't overflow since we would have had to alloc the address space
+ self.set_len(len + 1);
+ }
+ }
+
+ Ok(())
+ }
+
/// Creates a splicing iterator that replaces the specified range in the vector
/// with the given `replace_with` iterator and yields the removed items.
/// `replace_with` does not need to be the same length as `range`.
@@ -3002,6 +3303,8 @@ unsafe impl<#[may_dangle] T, A: Allocator> Drop for Vec<T, A> {
#[rustc_const_unstable(feature = "const_default_impls", issue = "87864")]
impl<T> const Default for Vec<T> {
/// Creates an empty `Vec<T>`.
+ ///
+ /// The vector will not allocate until elements are pushed onto it.
fn default() -> Vec<T> {
Vec::new()
}
@@ -3094,15 +3397,19 @@ impl<T, const N: usize> From<[T; N]> for Vec<T> {
/// ```
#[cfg(not(test))]
fn from(s: [T; N]) -> Vec<T> {
- <[T]>::into_vec(box s)
+ <[T]>::into_vec(
+ #[rustc_box]
+ Box::new(s),
+ )
}
#[cfg(test)]
fn from(s: [T; N]) -> Vec<T> {
- crate::slice::into_vec(box s)
+ crate::slice::into_vec(Box::new(s))
}
}
+#[cfg(not(no_borrow))]
#[stable(feature = "vec_from_cow_slice", since = "1.14.0")]
impl<'a, T> From<Cow<'a, [T]>> for Vec<T>
where
diff --git a/rust/alloc/vec/set_len_on_drop.rs b/rust/alloc/vec/set_len_on_drop.rs
new file mode 100644
index 000000000000..8b66bc812129
--- /dev/null
+++ b/rust/alloc/vec/set_len_on_drop.rs
@@ -0,0 +1,28 @@
+// Set the length of the vec when the `SetLenOnDrop` value goes out of scope.
+//
+// The idea is: The length field in SetLenOnDrop is a local variable
+// that the optimizer will see does not alias with any stores through the Vec's data
+// pointer. This is a workaround for alias analysis issue #32155
+pub(super) struct SetLenOnDrop<'a> {
+ len: &'a mut usize,
+ local_len: usize,
+}
+
+impl<'a> SetLenOnDrop<'a> {
+ #[inline]
+ pub(super) fn new(len: &'a mut usize) -> Self {
+ SetLenOnDrop { local_len: *len, len }
+ }
+
+ #[inline]
+ pub(super) fn increment_len(&mut self, increment: usize) {
+ self.local_len += increment;
+ }
+}
+
+impl Drop for SetLenOnDrop<'_> {
+ #[inline]
+ fn drop(&mut self) {
+ *self.len = self.local_len;
+ }
+}
diff --git a/rust/alloc/vec/spec_extend.rs b/rust/alloc/vec/spec_extend.rs
new file mode 100644
index 000000000000..73e69fec87f5
--- /dev/null
+++ b/rust/alloc/vec/spec_extend.rs
@@ -0,0 +1,170 @@
+use crate::alloc::Allocator;
+use crate::collections::{TryReserveError, TryReserveErrorKind};
+use core::iter::TrustedLen;
+use core::ptr::{self};
+use core::slice::{self};
+
+use super::{IntoIter, SetLenOnDrop, Vec};
+
+// Specialization trait used for Vec::extend
+#[cfg(not(no_global_oom_handling))]
+pub(super) trait SpecExtend<T, I> {
+ fn spec_extend(&mut self, iter: I);
+}
+
+// Specialization trait used for Vec::try_extend
+pub(super) trait TrySpecExtend<T, I> {
+ fn try_spec_extend(&mut self, iter: I) -> Result<(), TryReserveError>;
+}
+
+#[cfg(not(no_global_oom_handling))]
+impl<T, I, A: Allocator> SpecExtend<T, I> for Vec<T, A>
+where
+ I: Iterator<Item = T>,
+{
+ default fn spec_extend(&mut self, iter: I) {
+ self.extend_desugared(iter)
+ }
+}
+
+impl<T, I, A: Allocator> TrySpecExtend<T, I> for Vec<T, A>
+where
+ I: Iterator<Item = T>,
+{
+ default fn try_spec_extend(&mut self, iter: I) -> Result<(), TryReserveError> {
+ self.try_extend_desugared(iter)
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+impl<T, I, A: Allocator> SpecExtend<T, I> for Vec<T, A>
+where
+ I: TrustedLen<Item = T>,
+{
+ default fn spec_extend(&mut self, iterator: I) {
+ // This is the case for a TrustedLen iterator.
+ let (low, high) = iterator.size_hint();
+ if let Some(additional) = high {
+ debug_assert_eq!(
+ low,
+ additional,
+ "TrustedLen iterator's size hint is not exact: {:?}",
+ (low, high)
+ );
+ self.reserve(additional);
+ unsafe {
+ let mut ptr = self.as_mut_ptr().add(self.len());
+ let mut local_len = SetLenOnDrop::new(&mut self.len);
+ iterator.for_each(move |element| {
+ ptr::write(ptr, element);
+ ptr = ptr.add(1);
+ // Since the loop executes user code which can panic we have to bump the pointer
+ // after each step.
+ // NB can't overflow since we would have had to alloc the address space
+ local_len.increment_len(1);
+ });
+ }
+ } else {
+ // Per TrustedLen contract a `None` upper bound means that the iterator length
+ // truly exceeds usize::MAX, which would eventually lead to a capacity overflow anyway.
+ // Since the other branch already panics eagerly (via `reserve()`) we do the same here.
+ // This avoids additional codegen for a fallback code path which would eventually
+ // panic anyway.
+ panic!("capacity overflow");
+ }
+ }
+}
+
+impl<T, I, A: Allocator> TrySpecExtend<T, I> for Vec<T, A>
+where
+ I: TrustedLen<Item = T>,
+{
+ default fn try_spec_extend(&mut self, iterator: I) -> Result<(), TryReserveError> {
+ // This is the case for a TrustedLen iterator.
+ let (low, high) = iterator.size_hint();
+ if let Some(additional) = high {
+ debug_assert_eq!(
+ low,
+ additional,
+ "TrustedLen iterator's size hint is not exact: {:?}",
+ (low, high)
+ );
+ self.try_reserve(additional)?;
+ unsafe {
+ let mut ptr = self.as_mut_ptr().add(self.len());
+ let mut local_len = SetLenOnDrop::new(&mut self.len);
+ iterator.for_each(move |element| {
+ ptr::write(ptr, element);
+ ptr = ptr.add(1);
+ // NB can't overflow since we would have had to alloc the address space
+ local_len.increment_len(1);
+ });
+ }
+ Ok(())
+ } else {
+ Err(TryReserveErrorKind::CapacityOverflow.into())
+ }
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+impl<T, A: Allocator> SpecExtend<T, IntoIter<T>> for Vec<T, A> {
+ fn spec_extend(&mut self, mut iterator: IntoIter<T>) {
+ unsafe {
+ self.append_elements(iterator.as_slice() as _);
+ }
+ iterator.forget_remaining_elements();
+ }
+}
+
+impl<T, A: Allocator> TrySpecExtend<T, IntoIter<T>> for Vec<T, A> {
+ fn try_spec_extend(&mut self, mut iterator: IntoIter<T>) -> Result<(), TryReserveError> {
+ unsafe {
+ self.try_append_elements(iterator.as_slice() as _)?;
+ }
+ iterator.ptr = iterator.end;
+ Ok(())
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+impl<'a, T: 'a, I, A: Allocator + 'a> SpecExtend<&'a T, I> for Vec<T, A>
+where
+ I: Iterator<Item = &'a T>,
+ T: Clone,
+{
+ default fn spec_extend(&mut self, iterator: I) {
+ self.spec_extend(iterator.cloned())
+ }
+}
+
+impl<'a, T: 'a, I, A: Allocator + 'a> TrySpecExtend<&'a T, I> for Vec<T, A>
+where
+ I: Iterator<Item = &'a T>,
+ T: Clone,
+{
+ default fn try_spec_extend(&mut self, iterator: I) -> Result<(), TryReserveError> {
+ self.try_spec_extend(iterator.cloned())
+ }
+}
+
+#[cfg(not(no_global_oom_handling))]
+impl<'a, T: 'a, A: Allocator + 'a> SpecExtend<&'a T, slice::Iter<'a, T>> for Vec<T, A>
+where
+ T: Copy,
+{
+ fn spec_extend(&mut self, iterator: slice::Iter<'a, T>) {
+ let slice = iterator.as_slice();
+ unsafe { self.append_elements(slice) };
+ }
+}
+
+impl<'a, T: 'a, A: Allocator + 'a> TrySpecExtend<&'a T, slice::Iter<'a, T>> for Vec<T, A>
+where
+ T: Copy,
+{
+ fn try_spec_extend(&mut self, iterator: slice::Iter<'a, T>) -> Result<(), TryReserveError> {
+ let slice = iterator.as_slice();
+ unsafe { self.try_append_elements(slice) }
+ }
+}
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index c48bc284214a..b3ef609d0947 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -6,8 +6,59 @@
* Sorted alphabetically.
*/
+#include <drm/drm_device.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_syncobj.h>
+#include <drm/gpu_scheduler.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-fence-chain.h>
+#include <linux/dma-mapping.h>
+#include <linux/fs.h>
+#include <linux/iosys-map.h>
+#include <linux/io-pgtable.h>
+#include <linux/ktime.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/poll.h>
+#include <linux/refcount.h>
#include <linux/slab.h>
+#include <linux/soc/apple/rtkit.h>
+#include <linux/sysctl.h>
+#include <linux/timekeeping.h>
+#include <linux/xarray.h>
+#include <uapi/asm-generic/ioctl.h>
+#include <uapi/drm/asahi_drm.h>
+#include <uapi/drm/drm.h>
/* `bindgen` gets confused at certain things. */
const gfp_t BINDINGS_GFP_KERNEL = GFP_KERNEL;
const gfp_t BINDINGS___GFP_ZERO = __GFP_ZERO;
+
+const gfp_t BINDINGS_XA_FLAGS_LOCK_IRQ = XA_FLAGS_LOCK_IRQ;
+const gfp_t BINDINGS_XA_FLAGS_LOCK_BH = XA_FLAGS_LOCK_BH;
+const gfp_t BINDINGS_XA_FLAGS_TRACK_FREE = XA_FLAGS_TRACK_FREE;
+const gfp_t BINDINGS_XA_FLAGS_ZERO_BUSY = XA_FLAGS_ZERO_BUSY;
+const gfp_t BINDINGS_XA_FLAGS_ALLOC_WRAPPED = XA_FLAGS_ALLOC_WRAPPED;
+const gfp_t BINDINGS_XA_FLAGS_ACCOUNT = XA_FLAGS_ACCOUNT;
+const gfp_t BINDINGS_XA_FLAGS_ALLOC = XA_FLAGS_ALLOC;
+const gfp_t BINDINGS_XA_FLAGS_ALLOC1 = XA_FLAGS_ALLOC1;
+
+const xa_mark_t BINDINGS_XA_MARK_0 = XA_MARK_0;
+const xa_mark_t BINDINGS_XA_MARK_1 = XA_MARK_1;
+const xa_mark_t BINDINGS_XA_MARK_2 = XA_MARK_2;
+const xa_mark_t BINDINGS_XA_PRESENT = XA_PRESENT;
+const xa_mark_t BINDINGS_XA_MARK_MAX = XA_MARK_MAX;
+const xa_mark_t BINDINGS_XA_FREE_MARK = XA_FREE_MARK;
+
+const __poll_t BINDINGS_EPOLLIN = EPOLLIN;
+const __poll_t BINDINGS_EPOLLOUT = EPOLLOUT;
+const __poll_t BINDINGS_EPOLLERR = EPOLLERR;
+const __poll_t BINDINGS_EPOLLHUP = EPOLLHUP;
diff --git a/rust/bindings/lib.rs b/rust/bindings/lib.rs
index 6c50ee62c56b..9bcbea04dac3 100644
--- a/rust/bindings/lib.rs
+++ b/rust/bindings/lib.rs
@@ -9,7 +9,6 @@
//! using this crate.
#![no_std]
-#![feature(core_ffi_c)]
// See <https://github.com/rust-lang/rust-bindgen/issues/1651>.
#![cfg_attr(test, allow(deref_nullptr))]
#![cfg_attr(test, allow(unaligned_references))]
@@ -41,6 +40,7 @@ mod bindings_raw {
#[allow(dead_code)]
mod bindings_helper {
// Import the generated bindings for types.
+ use super::bindings_raw::*;
include!(concat!(
env!("OBJTREE"),
"/rust/bindings/bindings_helpers_generated.rs"
diff --git a/rust/compiler_builtins.rs b/rust/compiler_builtins.rs
index f8f39a3e6855..43378357ece9 100644
--- a/rust/compiler_builtins.rs
+++ b/rust/compiler_builtins.rs
@@ -28,7 +28,7 @@ macro_rules! define_panicking_intrinsics(
($reason: tt, { $($ident: ident, )* }) => {
$(
#[doc(hidden)]
- #[no_mangle]
+ #[export_name = concat!("__rust", stringify!($ident))]
pub extern "C" fn $ident() {
panic!($reason);
}
@@ -61,3 +61,6 @@ define_panicking_intrinsics!("`u128` should not be used", {
__udivti3,
__umodti3,
});
+
+// NOTE: if you are adding a new intrinsic here, you should also add it to
+// `redirect-intrinsics` in `rust/Makefile`.
diff --git a/rust/helpers.c b/rust/helpers.c
index b4f15eee2ffd..e023b1265b58 100644
--- a/rust/helpers.c
+++ b/rust/helpers.c
@@ -18,8 +18,25 @@
* accidentally exposed.
*/
+#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
+#include <drm/drm_syncobj.h>
#include <linux/bug.h>
#include <linux/build_bug.h>
+#include <linux/device.h>
+#include <linux/dma-fence.h>
+#include <linux/dma-fence-chain.h>
+#include <linux/dma-mapping.h>
+#include <linux/err.h>
+#include <linux/errname.h>
+#include <linux/mutex.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/rcupdate.h>
+#include <linux/refcount.h>
+#include <linux/xarray.h>
__noreturn void rust_helper_BUG(void)
{
@@ -27,6 +44,504 @@ __noreturn void rust_helper_BUG(void)
}
EXPORT_SYMBOL_GPL(rust_helper_BUG);
+refcount_t rust_helper_REFCOUNT_INIT(int n)
+{
+ return (refcount_t)REFCOUNT_INIT(n);
+}
+EXPORT_SYMBOL_GPL(rust_helper_REFCOUNT_INIT);
+
+void rust_helper_refcount_inc(refcount_t *r)
+{
+ refcount_inc(r);
+}
+EXPORT_SYMBOL_GPL(rust_helper_refcount_inc);
+
+bool rust_helper_refcount_dec_and_test(refcount_t *r)
+{
+ return refcount_dec_and_test(r);
+}
+EXPORT_SYMBOL_GPL(rust_helper_refcount_dec_and_test);
+
+__force void *rust_helper_ERR_PTR(long err)
+{
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(rust_helper_ERR_PTR);
+
+bool rust_helper_IS_ERR(__force const void *ptr)
+{
+ return IS_ERR(ptr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_IS_ERR);
+
+long rust_helper_PTR_ERR(__force const void *ptr)
+{
+ return PTR_ERR(ptr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_PTR_ERR);
+
+const char *rust_helper_errname(int err)
+{
+ return errname(err);
+}
+EXPORT_SYMBOL_GPL(rust_helper_errname);
+
+void rust_helper_xa_init_flags(struct xarray *xa, gfp_t flags)
+{
+ xa_init_flags(xa, flags);
+}
+EXPORT_SYMBOL_GPL(rust_helper_xa_init_flags);
+
+bool rust_helper_xa_empty(struct xarray *xa)
+{
+ return xa_empty(xa);
+}
+EXPORT_SYMBOL_GPL(rust_helper_xa_empty);
+
+int rust_helper_xa_alloc(struct xarray *xa, u32 *id, void *entry, struct xa_limit limit, gfp_t gfp)
+{
+ return xa_alloc(xa, id, entry, limit, gfp);
+}
+EXPORT_SYMBOL_GPL(rust_helper_xa_alloc);
+
+void rust_helper_xa_lock(struct xarray *xa)
+{
+ xa_lock(xa);
+}
+EXPORT_SYMBOL_GPL(rust_helper_xa_lock);
+
+void rust_helper_xa_unlock(struct xarray *xa)
+{
+ xa_unlock(xa);
+}
+EXPORT_SYMBOL_GPL(rust_helper_xa_unlock);
+
+int rust_helper_xa_err(void *entry)
+{
+ return xa_err(entry);
+}
+EXPORT_SYMBOL_GPL(rust_helper_xa_err);
+
+void *rust_helper_dev_get_drvdata(struct device *dev)
+{
+ return dev_get_drvdata(dev);
+}
+EXPORT_SYMBOL_GPL(rust_helper_dev_get_drvdata);
+
+const char *rust_helper_dev_name(const struct device *dev)
+{
+ return dev_name(dev);
+}
+EXPORT_SYMBOL_GPL(rust_helper_dev_name);
+
+struct task_struct *rust_helper_get_current(void)
+{
+ return current;
+}
+EXPORT_SYMBOL_GPL(rust_helper_get_current);
+
+int rust_helper_signal_pending(struct task_struct *t)
+{
+ return signal_pending(t);
+}
+EXPORT_SYMBOL_GPL(rust_helper_signal_pending);
+
+void rust_helper_init_wait(struct wait_queue_entry *wq_entry)
+{
+ init_wait(wq_entry);
+}
+EXPORT_SYMBOL_GPL(rust_helper_init_wait);
+
+unsigned long rust_helper_copy_from_user(void *to, const void __user *from, unsigned long n)
+{
+ return copy_from_user(to, from, n);
+}
+EXPORT_SYMBOL_GPL(rust_helper_copy_from_user);
+
+unsigned long rust_helper_copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+ return copy_to_user(to, from, n);
+}
+EXPORT_SYMBOL_GPL(rust_helper_copy_to_user);
+
+unsigned long rust_helper_clear_user(void __user *to, unsigned long n)
+{
+ return clear_user(to, n);
+}
+EXPORT_SYMBOL_GPL(rust_helper_clear_user);
+
+void rust_helper_rcu_read_lock(void)
+{
+ rcu_read_lock();
+}
+EXPORT_SYMBOL_GPL(rust_helper_rcu_read_lock);
+
+void rust_helper_rcu_read_unlock(void)
+{
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(rust_helper_rcu_read_unlock);
+
+void rust_helper_synchronize_rcu(void)
+{
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(rust_helper_synchronize_rcu);
+
+void __iomem *rust_helper_ioremap(resource_size_t offset, unsigned long size)
+{
+ return ioremap(offset, size);
+}
+EXPORT_SYMBOL_GPL(rust_helper_ioremap);
+
+void __iomem *rust_helper_ioremap_np(resource_size_t offset, unsigned long size)
+{
+ return ioremap_np(offset, size);
+}
+EXPORT_SYMBOL_GPL(rust_helper_ioremap_np);
+
+u8 rust_helper_readb(const volatile void __iomem *addr)
+{
+ return readb(addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_readb);
+
+u16 rust_helper_readw(const volatile void __iomem *addr)
+{
+ return readw(addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_readw);
+
+u32 rust_helper_readl(const volatile void __iomem *addr)
+{
+ return readl(addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_readl);
+
+#ifdef CONFIG_64BIT
+u64 rust_helper_readq(const volatile void __iomem *addr)
+{
+ return readq(addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_readq);
+#endif
+
+void rust_helper_writeb(u8 value, volatile void __iomem *addr)
+{
+ writeb(value, addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_writeb);
+
+void rust_helper_writew(u16 value, volatile void __iomem *addr)
+{
+ writew(value, addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_writew);
+
+void rust_helper_writel(u32 value, volatile void __iomem *addr)
+{
+ writel(value, addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_writel);
+
+#ifdef CONFIG_64BIT
+void rust_helper_writeq(u64 value, volatile void __iomem *addr)
+{
+ writeq(value, addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_writeq);
+#endif
+
+u8 rust_helper_readb_relaxed(const volatile void __iomem *addr)
+{
+ return readb_relaxed(addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_readb_relaxed);
+
+u16 rust_helper_readw_relaxed(const volatile void __iomem *addr)
+{
+ return readw_relaxed(addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_readw_relaxed);
+
+u32 rust_helper_readl_relaxed(const volatile void __iomem *addr)
+{
+ return readl_relaxed(addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_readl_relaxed);
+
+#ifdef CONFIG_64BIT
+u64 rust_helper_readq_relaxed(const volatile void __iomem *addr)
+{
+ return readq_relaxed(addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_readq_relaxed);
+#endif
+
+void rust_helper_writeb_relaxed(u8 value, volatile void __iomem *addr)
+{
+ writeb_relaxed(value, addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_writeb_relaxed);
+
+void rust_helper_writew_relaxed(u16 value, volatile void __iomem *addr)
+{
+ writew_relaxed(value, addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_writew_relaxed);
+
+void rust_helper_writel_relaxed(u32 value, volatile void __iomem *addr)
+{
+ writel_relaxed(value, addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_writel_relaxed);
+
+#ifdef CONFIG_64BIT
+void rust_helper_writeq_relaxed(u64 value, volatile void __iomem *addr)
+{
+ writeq_relaxed(value, addr);
+}
+EXPORT_SYMBOL_GPL(rust_helper_writeq_relaxed);
+#endif
+
+void rust_helper_memcpy_fromio(void *to, const volatile void __iomem *from, long count)
+{
+ memcpy_fromio(to, from, count);
+}
+EXPORT_SYMBOL_GPL(rust_helper_memcpy_fromio);
+
+void *
+rust_helper_platform_get_drvdata(const struct platform_device *pdev)
+{
+ return platform_get_drvdata(pdev);
+}
+EXPORT_SYMBOL_GPL(rust_helper_platform_get_drvdata);
+
+void
+rust_helper_platform_set_drvdata(struct platform_device *pdev,
+ void *data)
+{
+ platform_set_drvdata(pdev, data);
+}
+EXPORT_SYMBOL_GPL(rust_helper_platform_set_drvdata);
+
+const struct of_device_id *rust_helper_of_match_device(
+ const struct of_device_id *matches, const struct device *dev)
+{
+ return of_match_device(matches, dev);
+}
+EXPORT_SYMBOL_GPL(rust_helper_of_match_device);
+
+bool rust_helper_of_node_is_root(const struct device_node *np)
+{
+ return of_node_is_root(np);
+}
+EXPORT_SYMBOL_GPL(rust_helper_of_node_is_root);
+
+struct device_node *rust_helper_of_parse_phandle(const struct device_node *np,
+ const char *phandle_name,
+ int index)
+{
+ return of_parse_phandle(np, phandle_name, index);
+}
+EXPORT_SYMBOL_GPL(rust_helper_of_parse_phandle);
+
+int rust_helper_dma_set_mask_and_coherent(struct device *dev, u64 mask)
+{
+ return dma_set_mask_and_coherent(dev, mask);
+}
+EXPORT_SYMBOL_GPL(rust_helper_dma_set_mask_and_coherent);
+
+resource_size_t rust_helper_resource_size(const struct resource *res)
+{
+ return resource_size(res);
+}
+EXPORT_SYMBOL_GPL(rust_helper_resource_size);
+
+void rust_helper_init_completion(struct completion *c)
+{
+ init_completion(c);
+}
+EXPORT_SYMBOL_GPL(rust_helper_init_completion);
+
+dma_addr_t rust_helper_sg_dma_address(const struct scatterlist *sg)
+{
+ return sg_dma_address(sg);
+}
+EXPORT_SYMBOL_GPL(rust_helper_sg_dma_address);
+
+int rust_helper_sg_dma_len(const struct scatterlist *sg)
+{
+ return sg_dma_len(sg);
+}
+EXPORT_SYMBOL_GPL(rust_helper_sg_dma_len);
+
+void rust_helper___spin_lock_init(spinlock_t *lock, const char *name,
+ struct lock_class_key *key)
+{
+#ifdef CONFIG_DEBUG_SPINLOCK
+# ifndef CONFIG_PREEMPT_RT
+ __raw_spin_lock_init(spinlock_check(lock), name, key, LD_WAIT_CONFIG);
+# else
+ rt_mutex_base_init(&lock->lock);
+ __rt_spin_lock_init(lock, name, key, false);
+# endif
+#else
+ spin_lock_init(lock);
+#endif
+}
+EXPORT_SYMBOL_GPL(rust_helper___spin_lock_init);
+
+unsigned long rust_helper_msecs_to_jiffies(const unsigned int m)
+{
+ return msecs_to_jiffies(m);
+}
+EXPORT_SYMBOL_GPL(rust_helper_msecs_to_jiffies);
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+
+void rust_helper_dma_fence_get(struct dma_fence *fence)
+{
+ dma_fence_get(fence);
+}
+EXPORT_SYMBOL_GPL(rust_helper_dma_fence_get);
+
+void rust_helper_dma_fence_put(struct dma_fence *fence)
+{
+ dma_fence_put(fence);
+}
+EXPORT_SYMBOL_GPL(rust_helper_dma_fence_put);
+
+struct dma_fence_chain *rust_helper_dma_fence_chain_alloc(void)
+{
+ return dma_fence_chain_alloc();
+}
+EXPORT_SYMBOL_GPL(rust_helper_dma_fence_chain_alloc);
+
+void rust_helper_dma_fence_chain_free(struct dma_fence_chain *chain)
+{
+ dma_fence_chain_free(chain);
+}
+EXPORT_SYMBOL_GPL(rust_helper_dma_fence_chain_free);
+
+void rust_helper_dma_fence_set_error(struct dma_fence *fence, int error)
+{
+ dma_fence_set_error(fence, error);
+}
+EXPORT_SYMBOL_GPL(rust_helper_dma_fence_set_error);
+
+#endif
+
+#ifdef CONFIG_DRM
+
+void rust_helper_drm_gem_object_get(struct drm_gem_object *obj)
+{
+ drm_gem_object_get(obj);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_gem_object_get);
+
+void rust_helper_drm_gem_object_put(struct drm_gem_object *obj)
+{
+ drm_gem_object_put(obj);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_gem_object_put);
+
+__u64 rust_helper_drm_vma_node_offset_addr(struct drm_vma_offset_node *node)
+{
+ return drm_vma_node_offset_addr(node);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_vma_node_offset_addr);
+
+void rust_helper_drm_syncobj_get(struct drm_syncobj *obj)
+{
+ drm_syncobj_get(obj);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_syncobj_get);
+
+void rust_helper_drm_syncobj_put(struct drm_syncobj *obj)
+{
+ drm_syncobj_put(obj);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_syncobj_put);
+
+struct dma_fence *rust_helper_drm_syncobj_fence_get(struct drm_syncobj *syncobj)
+{
+ return drm_syncobj_fence_get(syncobj);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_syncobj_fence_get);
+
+#ifdef CONFIG_DRM_GEM_SHMEM_HELPER
+
+void rust_helper_drm_gem_shmem_object_free(struct drm_gem_object *obj)
+{
+ return drm_gem_shmem_object_free(obj);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_free);
+
+void rust_helper_drm_gem_shmem_object_print_info(struct drm_printer *p, unsigned int indent,
+ const struct drm_gem_object *obj)
+{
+ drm_gem_shmem_object_print_info(p, indent, obj);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_print_info);
+
+int rust_helper_drm_gem_shmem_object_pin(struct drm_gem_object *obj)
+{
+ return drm_gem_shmem_object_pin(obj);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_pin);
+
+void rust_helper_drm_gem_shmem_object_unpin(struct drm_gem_object *obj)
+{
+ drm_gem_shmem_object_unpin(obj);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_unpin);
+
+struct sg_table *rust_helper_drm_gem_shmem_object_get_sg_table(struct drm_gem_object *obj)
+{
+ return drm_gem_shmem_object_get_sg_table(obj);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_get_sg_table);
+
+int rust_helper_drm_gem_shmem_object_vmap(struct drm_gem_object *obj,
+ struct iosys_map *map)
+{
+ return drm_gem_shmem_object_vmap(obj, map);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_vmap);
+
+void rust_helper_drm_gem_shmem_object_vunmap(struct drm_gem_object *obj,
+ struct iosys_map *map)
+{
+ drm_gem_shmem_object_vunmap(obj, map);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_vunmap);
+
+int rust_helper_drm_gem_shmem_object_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ return drm_gem_shmem_object_mmap(obj, vma);
+}
+EXPORT_SYMBOL_GPL(rust_helper_drm_gem_shmem_object_mmap);
+
+#endif
+#endif
+
+#ifdef mutex_lock
+void rust_helper_mutex_lock(struct mutex *lock)
+{
+ return mutex_lock(lock);
+}
+EXPORT_SYMBOL_GPL(rust_helper_mutex_lock);
+#endif
+
+#ifdef mutex_unlock
+void rust_helper_mutex_unlock(struct mutex *lock)
+{
+ return mutex_unlock(lock);
+}
+EXPORT_SYMBOL_GPL(rust_helper_mutex_unlock);
+#endif
+
/*
* We use `bindgen`'s `--size_t-is-usize` option to bind the C `size_t` type
* as the Rust `usize` type, so we can use it in contexts where Rust
diff --git a/rust/kernel/build_assert.rs b/rust/kernel/build_assert.rs
index 659542393c09..970980827214 100644
--- a/rust/kernel/build_assert.rs
+++ b/rust/kernel/build_assert.rs
@@ -67,6 +67,7 @@ macro_rules! build_error {
/// assert!(n > 1); // Run-time check
/// }
/// ```
+#[allow(rustdoc::broken_intra_doc_links)]
#[macro_export]
macro_rules! build_assert {
($cond:expr $(,)?) => {{
diff --git a/rust/kernel/delay.rs b/rust/kernel/delay.rs
new file mode 100644
index 000000000000..1e987fa65941
--- /dev/null
+++ b/rust/kernel/delay.rs
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Delay functions for operations like sleeping.
+//!
+//! C header: [`include/linux/delay.h`](../../../../include/linux/delay.h)
+
+use crate::bindings;
+use core::{cmp::min, time::Duration};
+
+const MILLIS_PER_SEC: u64 = 1_000;
+
+fn coarse_sleep_conversion(duration: Duration) -> core::ffi::c_uint {
+ let milli_as_nanos = Duration::MILLISECOND.subsec_nanos();
+
+ // Rounds the nanosecond component of `duration` up to the nearest millisecond.
+ let nanos_as_millis = duration.subsec_nanos().wrapping_add(milli_as_nanos - 1) / milli_as_nanos;
+
+ // Saturates the second component of `duration` to `c_uint::MAX`.
+ let seconds_as_millis = min(
+ duration.as_secs().saturating_mul(MILLIS_PER_SEC),
+ u64::from(core::ffi::c_uint::MAX),
+ ) as core::ffi::c_uint;
+
+ seconds_as_millis.saturating_add(nanos_as_millis)
+}
+
+/// Sleeps safely even with waitqueue interruptions.
+///
+/// This function forwards the call to the C side `msleep` function. As a result,
+/// `duration` will be rounded up to the nearest millisecond if granularity less
+/// than a millisecond is provided. Any [`Duration`] that exceeds
+/// [`c_uint::MAX`][core::ffi::c_uint::MAX] in milliseconds is saturated.
+///
+/// # Examples
+///
+// Keep these in sync with `test_coarse_sleep_examples`.
+/// ```
+/// # use core::time::Duration;
+/// # use kernel::delay::coarse_sleep;
+/// coarse_sleep(Duration::ZERO); // Equivalent to `msleep(0)`.
+/// coarse_sleep(Duration::from_nanos(1)); // Equivalent to `msleep(1)`.
+///
+/// coarse_sleep(Duration::from_nanos(1_000_000)); // Equivalent to `msleep(1)`.
+/// coarse_sleep(Duration::from_nanos(1_000_001)); // Equivalent to `msleep(2)`.
+/// coarse_sleep(Duration::from_nanos(1_999_999)); // Equivalent to `msleep(2)`.
+///
+/// coarse_sleep(Duration::from_millis(1)); // Equivalent to `msleep(1)`.
+/// coarse_sleep(Duration::from_millis(2)); // Equivalent to `msleep(2)`.
+///
+/// coarse_sleep(Duration::from_secs(1)); // Equivalent to `msleep(1000)`.
+/// coarse_sleep(Duration::new(1, 1)); // Equivalent to `msleep(1001)`.
+/// coarse_sleep(Duration::new(1, 2)); // Equivalent to `msleep(1001)`.
+/// ```
+pub fn coarse_sleep(duration: Duration) {
+ // SAFETY: `msleep` is safe for all values of its argument.
+ unsafe { bindings::msleep(coarse_sleep_conversion(duration)) }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::{coarse_sleep_conversion, MILLIS_PER_SEC};
+ use core::time::Duration;
+
+ #[test]
+ fn test_coarse_sleep_examples() {
+ // Keep these in sync with `coarse_sleep`'s `# Examples` section.
+
+ assert_eq!(coarse_sleep_conversion(Duration::ZERO), 0);
+ assert_eq!(coarse_sleep_conversion(Duration::from_nanos(1)), 1);
+
+ assert_eq!(coarse_sleep_conversion(Duration::from_nanos(1_000_000)), 1);
+ assert_eq!(coarse_sleep_conversion(Duration::from_nanos(1_000_001)), 2);
+ assert_eq!(coarse_sleep_conversion(Duration::from_nanos(1_999_999)), 2);
+
+ assert_eq!(coarse_sleep_conversion(Duration::from_millis(1)), 1);
+ assert_eq!(coarse_sleep_conversion(Duration::from_millis(2)), 2);
+
+ assert_eq!(coarse_sleep_conversion(Duration::from_secs(1)), 1000);
+ assert_eq!(coarse_sleep_conversion(Duration::new(1, 1)), 1001);
+ assert_eq!(coarse_sleep_conversion(Duration::new(1, 2)), 1001);
+ }
+
+ #[test]
+ fn test_coarse_sleep_saturation() {
+ assert!(
+ coarse_sleep_conversion(Duration::new(
+ core::ffi::c_uint::MAX as u64 / MILLIS_PER_SEC,
+ 0
+ )) < core::ffi::c_uint::MAX
+ );
+ assert_eq!(
+ coarse_sleep_conversion(Duration::new(
+ core::ffi::c_uint::MAX as u64 / MILLIS_PER_SEC,
+ 999_999_999
+ )),
+ core::ffi::c_uint::MAX
+ );
+
+ assert_eq!(
+ coarse_sleep_conversion(Duration::MAX),
+ core::ffi::c_uint::MAX
+ );
+ }
+}
diff --git a/rust/kernel/device.rs b/rust/kernel/device.rs
new file mode 100644
index 000000000000..c8b7d69b1426
--- /dev/null
+++ b/rust/kernel/device.rs
@@ -0,0 +1,536 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Generic devices that are part of the kernel's driver model.
+//!
+//! C header: [`include/linux/device.h`](../../../../include/linux/device.h)
+
+use crate::{
+ bindings,
+ error::Result,
+ of,
+ revocable::{Revocable, RevocableGuard},
+ str::CStr,
+ sync::{LockClassKey, NeedsLockClass, RevocableMutex, RevocableMutexGuard, UniqueArc},
+};
+use core::{
+ fmt,
+ ops::{Deref, DerefMut},
+ pin::Pin,
+};
+
+#[cfg(CONFIG_PRINTK)]
+use crate::c_str;
+
+/// A raw device.
+///
+/// # Safety
+///
+/// Implementers must ensure that the `*mut device` returned by [`RawDevice::raw_device`] is
+/// related to `self`, that is, actions on it will affect `self`. For example, if one calls
+/// `get_device`, then the refcount on the device represented by `self` will be incremented.
+///
+/// Additionally, implementers must ensure that the device is never renamed. Commit a5462516aa99
+/// ("driver-core: document restrictions on device_rename()") has details on why `device_rename`
+/// should not be used.
+pub unsafe trait RawDevice {
+ /// Returns the raw `struct device` related to `self`.
+ fn raw_device(&self) -> *mut bindings::device;
+
+ /// Returns the name of the device.
+ fn name(&self) -> &CStr {
+ let ptr = self.raw_device();
+
+ // SAFETY: `ptr` is valid because `self` keeps it alive.
+ let name = unsafe { bindings::dev_name(ptr) };
+
+ // SAFETY: The name of the device remains valid while it is alive (because the device is
+ // never renamed, per the safety requirement of this trait). This is guaranteed to be the
+ // case because the reference to `self` outlives the one of the returned `CStr` (enforced
+ // by the compiler because of their lifetimes).
+ unsafe { CStr::from_char_ptr(name) }
+ }
+
+ /// Gets the OpenFirmware node attached to this device
+ fn of_node(&self) -> Option<of::Node> {
+ let ptr = self.raw_device();
+
+ unsafe { of::Node::get_from_raw((*ptr).of_node) }
+ }
+
+ /// Prints an emergency-level message (level 0) prefixed with device information.
+ ///
+ /// More details are available from [`dev_emerg`].
+ ///
+ /// [`dev_emerg`]: crate::dev_emerg
+ fn pr_emerg(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_EMERG, args) };
+ }
+
+ /// Prints an alert-level message (level 1) prefixed with device information.
+ ///
+ /// More details are available from [`dev_alert`].
+ ///
+ /// [`dev_alert`]: crate::dev_alert
+ fn pr_alert(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_ALERT, args) };
+ }
+
+ /// Prints a critical-level message (level 2) prefixed with device information.
+ ///
+ /// More details are available from [`dev_crit`].
+ ///
+ /// [`dev_crit`]: crate::dev_crit
+ fn pr_crit(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_CRIT, args) };
+ }
+
+ /// Prints an error-level message (level 3) prefixed with device information.
+ ///
+ /// More details are available from [`dev_err`].
+ ///
+ /// [`dev_err`]: crate::dev_err
+ fn pr_err(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_ERR, args) };
+ }
+
+ /// Prints a warning-level message (level 4) prefixed with device information.
+ ///
+ /// More details are available from [`dev_warn`].
+ ///
+ /// [`dev_warn`]: crate::dev_warn
+ fn pr_warn(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_WARNING, args) };
+ }
+
+ /// Prints a notice-level message (level 5) prefixed with device information.
+ ///
+ /// More details are available from [`dev_notice`].
+ ///
+ /// [`dev_notice`]: crate::dev_notice
+ fn pr_notice(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_NOTICE, args) };
+ }
+
+ /// Prints an info-level message (level 6) prefixed with device information.
+ ///
+ /// More details are available from [`dev_info`].
+ ///
+ /// [`dev_info`]: crate::dev_info
+ fn pr_info(&self, args: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_INFO, args) };
+ }
+
+ /// Prints a debug-level message (level 7) prefixed with device information.
+ ///
+ /// More details are available from [`dev_dbg`].
+ ///
+ /// [`dev_dbg`]: crate::dev_dbg
+ fn pr_dbg(&self, args: fmt::Arguments<'_>) {
+ if cfg!(debug_assertions) {
+ // SAFETY: `klevel` is null-terminated, uses one of the kernel constants.
+ unsafe { self.printk(bindings::KERN_DEBUG, args) };
+ }
+ }
+
+ /// Prints the provided message to the console.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that `klevel` is null-terminated; in particular, one of the
+ /// `KERN_*`constants, for example, `KERN_CRIT`, `KERN_ALERT`, etc.
+ #[cfg_attr(not(CONFIG_PRINTK), allow(unused_variables))]
+ unsafe fn printk(&self, klevel: &[u8], msg: fmt::Arguments<'_>) {
+ // SAFETY: `klevel` is null-terminated and one of the kernel constants. `self.raw_device`
+ // is valid because `self` is valid. The "%pA" format string expects a pointer to
+ // `fmt::Arguments`, which is what we're passing as the last argument.
+ #[cfg(CONFIG_PRINTK)]
+ unsafe {
+ bindings::_dev_printk(
+ klevel as *const _ as *const core::ffi::c_char,
+ self.raw_device(),
+ c_str!("%pA").as_char_ptr(),
+ &msg as *const _ as *const core::ffi::c_void,
+ )
+ };
+ }
+}
+
+/// A ref-counted device.
+///
+/// # Invariants
+///
+/// `ptr` is valid, non-null, and has a non-zero reference count. One of the references is owned by
+/// `self`, and will be decremented when `self` is dropped.
+pub struct Device {
+ pub(crate) ptr: *mut bindings::device,
+}
+
+// SAFETY: `Device` only holds a pointer to a C device, which is safe to be used from any thread.
+unsafe impl Send for Device {}
+
+// SAFETY: `Device` only holds a pointer to a C device, references to which are safe to be used
+// from any thread.
+unsafe impl Sync for Device {}
+
+impl Device {
+ /// Creates a new device instance.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that `ptr` is valid, non-null, and has a non-zero reference count.
+ pub unsafe fn new(ptr: *mut bindings::device) -> Self {
+ // SAFETY: By the safety requirements, ptr is valid and its refcounted will be incremented.
+ unsafe { bindings::get_device(ptr) };
+ // INVARIANT: The safety requirements satisfy all but one invariant, which is that `self`
+ // owns a reference. This is satisfied by the call to `get_device` above.
+ Self { ptr }
+ }
+
+ /// Creates a new device instance from an existing [`RawDevice`] instance.
+ pub fn from_dev(dev: &dyn RawDevice) -> Self {
+ // SAFETY: The requirements are satisfied by the existence of `RawDevice` and its safety
+ // requirements.
+ unsafe { Self::new(dev.raw_device()) }
+ }
+}
+
+// SAFETY: The device returned by `raw_device` is the one for which we hold a reference.
+unsafe impl RawDevice for Device {
+ fn raw_device(&self) -> *mut bindings::device {
+ self.ptr
+ }
+}
+
+impl Drop for Device {
+ fn drop(&mut self) {
+ // SAFETY: By the type invariants, we know that `self` owns a reference, so it is safe to
+ // relinquish it now.
+ unsafe { bindings::put_device(self.ptr) };
+ }
+}
+
+impl Clone for Device {
+ fn clone(&self) -> Self {
+ Device::from_dev(self)
+ }
+}
+
+/// Device data.
+///
+/// When a device is removed (for whatever reason, for example, because the device was unplugged or
+/// because the user decided to unbind the driver), the driver is given a chance to clean its state
+/// up, and all io resources should ideally not be used anymore.
+///
+/// However, the device data is reference-counted because other subsystems hold pointers to it. So
+/// some device state must be freed and not used anymore, while others must remain accessible.
+///
+/// This struct separates the device data into three categories:
+/// 1. Registrations: are destroyed when the device is removed, but before the io resources
+/// become inaccessible.
+/// 2. Io resources: are available until the device is removed.
+/// 3. General data: remain available as long as the ref count is nonzero.
+///
+/// This struct implements the `DeviceRemoval` trait so that it can clean resources up even if not
+/// explicitly called by the device drivers.
+pub struct Data<T, U, V> {
+ registrations: RevocableMutex<T>,
+ resources: Revocable<U>,
+ general: V,
+}
+
+/// Safely creates an new reference-counted instance of [`Data`].
+#[doc(hidden)]
+#[macro_export]
+macro_rules! new_device_data {
+ ($reg:expr, $res:expr, $gen:expr, $name:literal) => {{
+ static CLASS1: $crate::sync::LockClassKey = $crate::sync::LockClassKey::new();
+ static CLASS2: $crate::sync::LockClassKey = $crate::sync::LockClassKey::new();
+ let regs = $reg;
+ let res = $res;
+ let gen = $gen;
+ let name = $crate::c_str!($name);
+ $crate::device::Data::try_new(regs, res, gen, name, &CLASS1, &CLASS2)
+ }};
+}
+
+impl<T, U, V> Data<T, U, V> {
+ /// Creates a new instance of `Data`.
+ ///
+ /// It is recommended that the [`new_device_data`] macro be used as it automatically creates
+ /// the lock classes.
+ pub fn try_new(
+ registrations: T,
+ resources: U,
+ general: V,
+ name: &'static CStr,
+ key1: &'static LockClassKey,
+ key2: &'static LockClassKey,
+ ) -> Result<Pin<UniqueArc<Self>>> {
+ let mut ret = Pin::from(UniqueArc::try_new(Self {
+ // SAFETY: We call `RevocableMutex::init` below.
+ registrations: unsafe { RevocableMutex::new(registrations) },
+ resources: Revocable::new(resources),
+ general,
+ })?);
+
+ // SAFETY: `Data::registrations` is pinned when `Data` is.
+ let pinned = unsafe { ret.as_mut().map_unchecked_mut(|d| &mut d.registrations) };
+ pinned.init(name, key1, key2);
+ Ok(ret)
+ }
+
+ /// Returns the resources if they're still available.
+ pub fn resources(&self) -> Option<RevocableGuard<'_, U>> {
+ self.resources.try_access()
+ }
+
+ /// Returns the locked registrations if they're still available.
+ pub fn registrations(&self) -> Option<RevocableMutexGuard<'_, T>> {
+ self.registrations.try_write()
+ }
+}
+
+impl<T, U, V> crate::driver::DeviceRemoval for Data<T, U, V> {
+ fn device_remove(&self) {
+ // We revoke the registrations first so that resources are still available to them during
+ // unregistration.
+ self.registrations.revoke();
+
+ // Release resources now. General data remains available.
+ self.resources.revoke();
+ }
+}
+
+impl<T, U, V> Deref for Data<T, U, V> {
+ type Target = V;
+
+ fn deref(&self) -> &V {
+ &self.general
+ }
+}
+
+impl<T, U, V> DerefMut for Data<T, U, V> {
+ fn deref_mut(&mut self) -> &mut V {
+ &mut self.general
+ }
+}
+
+#[doc(hidden)]
+#[macro_export]
+macro_rules! dev_printk {
+ ($method:ident, $dev:expr, $($f:tt)*) => {
+ {
+ // We have an explicity `use` statement here so that callers of this macro are not
+ // required to explicitly use the `RawDevice` trait to use its functions.
+ use $crate::device::RawDevice;
+ ($dev).$method(core::format_args!($($f)*));
+ }
+ }
+}
+
+/// Prints an emergency-level message (level 0) prefixed with device information.
+///
+/// This level should be used if the system is unusable.
+///
+/// Equivalent to the kernel's `dev_emerg` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and [`alloc::format!`].
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_emerg!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_emerg {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_emerg, $($f)*); }
+}
+
+/// Prints an alert-level message (level 1) prefixed with device information.
+///
+/// This level should be used if action must be taken immediately.
+///
+/// Equivalent to the kernel's `dev_alert` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and [`alloc::format!`].
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_alert!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_alert {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_alert, $($f)*); }
+}
+
+/// Prints a critical-level message (level 2) prefixed with device information.
+///
+/// This level should be used in critical conditions.
+///
+/// Equivalent to the kernel's `dev_crit` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and [`alloc::format!`].
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_crit!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_crit {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_crit, $($f)*); }
+}
+
+/// Prints an error-level message (level 3) prefixed with device information.
+///
+/// This level should be used in error conditions.
+///
+/// Equivalent to the kernel's `dev_err` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and [`alloc::format!`].
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_err!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_err {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_err, $($f)*); }
+}
+
+/// Prints a warning-level message (level 4) prefixed with device information.
+///
+/// This level should be used in warning conditions.
+///
+/// Equivalent to the kernel's `dev_warn` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and [`alloc::format!`].
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_warn!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_warn {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_warn, $($f)*); }
+}
+
+/// Prints a notice-level message (level 5) prefixed with device information.
+///
+/// This level should be used in normal but significant conditions.
+///
+/// Equivalent to the kernel's `dev_notice` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and [`alloc::format!`].
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_notice!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_notice {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_notice, $($f)*); }
+}
+
+/// Prints an info-level message (level 6) prefixed with device information.
+///
+/// This level should be used for informational messages.
+///
+/// Equivalent to the kernel's `dev_info` macro.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and [`alloc::format!`].
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_info!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_info {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_info, $($f)*); }
+}
+
+/// Prints a debug-level message (level 7) prefixed with device information.
+///
+/// This level should be used for debug messages.
+///
+/// Equivalent to the kernel's `dev_dbg` macro, except that it doesn't support dynamic debug yet.
+///
+/// Mimics the interface of [`std::print!`]. More information about the syntax is available from
+/// [`core::fmt`] and [`alloc::format!`].
+///
+/// [`std::print!`]: https://doc.rust-lang.org/std/macro.print.html
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::device::Device;
+///
+/// fn example(dev: &Device) {
+/// dev_dbg!(dev, "hello {}\n", "there");
+/// }
+/// ```
+#[macro_export]
+macro_rules! dev_dbg {
+ ($($f:tt)*) => { $crate::dev_printk!(pr_dbg, $($f)*); }
+}
diff --git a/rust/kernel/dma_fence.rs b/rust/kernel/dma_fence.rs
new file mode 100644
index 000000000000..ca93380d9da2
--- /dev/null
+++ b/rust/kernel/dma_fence.rs
@@ -0,0 +1,532 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! DMA fence abstraction.
+//!
+//! C header: [`include/linux/dma_fence.h`](../../include/linux/dma_fence.h)
+
+use crate::{
+ bindings,
+ error::{to_result, Result},
+ prelude::*,
+ sync::LockClassKey,
+ types::Opaque,
+};
+use core::fmt::Write;
+use core::ops::{Deref, DerefMut};
+use core::ptr::addr_of_mut;
+use core::sync::atomic::{AtomicU64, Ordering};
+
+/// Any kind of DMA Fence Object
+///
+/// # Invariants
+/// raw() returns a valid pointer to a dma_fence and we own a reference to it.
+pub trait RawDmaFence: crate::private::Sealed {
+ /// Returns the raw `struct dma_fence` pointer.
+ fn raw(&self) -> *mut bindings::dma_fence;
+
+ /// Returns the raw `struct dma_fence` pointer and consumes the object.
+ ///
+ /// The caller is responsible for dropping the reference.
+ fn into_raw(self) -> *mut bindings::dma_fence
+ where
+ Self: Sized,
+ {
+ let ptr = self.raw();
+ core::mem::forget(self);
+ ptr
+ }
+
+ /// Advances this fence to the chain node which will signal this sequence number.
+ /// If no sequence number is provided, this returns `self` again.
+ fn chain_find_seqno(self, seqno: u64) -> Result<Fence>
+ where
+ Self: Sized,
+ {
+ let mut ptr = self.into_raw();
+
+ // SAFETY: This will safely fail if this DmaFence is not a chain.
+ // `ptr` is valid per the type invariant.
+ let ret = unsafe { bindings::dma_fence_chain_find_seqno(&mut ptr, seqno) };
+
+ if ret != 0 {
+ // SAFETY: This is either an owned reference or NULL, dma_fence_put can handle both.
+ unsafe { bindings::dma_fence_put(ptr) };
+ Err(Error::from_kernel_errno(ret))
+ } else if ptr.is_null() {
+ Err(EINVAL) // When can this happen?
+ } else {
+ // SAFETY: ptr is valid and non-NULL as checked above.
+ Ok(unsafe { Fence::from_raw(ptr) })
+ }
+ }
+
+ /// Signal completion of this fence
+ fn signal(&self) -> Result {
+ to_result(unsafe { bindings::dma_fence_signal(self.raw()) })
+ }
+
+ /// Set the error flag on this fence
+ fn set_error(&self, err: Error) {
+ unsafe { bindings::dma_fence_set_error(self.raw(), err.to_kernel_errno()) };
+ }
+}
+
+/// A generic DMA Fence Object
+///
+/// # Invariants
+/// ptr is a valid pointer to a dma_fence and we own a reference to it.
+pub struct Fence {
+ ptr: *mut bindings::dma_fence,
+}
+
+impl Fence {
+ /// Create a new Fence object from a raw pointer to a dma_fence.
+ ///
+ /// # Safety
+ /// The caller must own a reference to the dma_fence, which is transferred to the new object.
+ pub(crate) unsafe fn from_raw(ptr: *mut bindings::dma_fence) -> Fence {
+ Fence { ptr }
+ }
+
+ /// Create a new Fence object from a raw pointer to a dma_fence.
+ ///
+ /// # Safety
+ /// Takes a borrowed reference to the dma_fence, and increments the reference count.
+ pub(crate) unsafe fn get_raw(ptr: *mut bindings::dma_fence) -> Fence {
+ // SAFETY: Pointer is valid per the safety contract
+ unsafe { bindings::dma_fence_get(ptr) };
+ Fence { ptr }
+ }
+
+ /// Create a new Fence object from a RawDmaFence.
+ pub fn from_fence(fence: &dyn RawDmaFence) -> Fence {
+ // SAFETY: Pointer is valid per the RawDmaFence contract
+ unsafe { Self::get_raw(fence.raw()) }
+ }
+}
+
+impl crate::private::Sealed for Fence {}
+
+impl RawDmaFence for Fence {
+ fn raw(&self) -> *mut bindings::dma_fence {
+ self.ptr
+ }
+}
+
+impl Drop for Fence {
+ fn drop(&mut self) {
+ // SAFETY: We own a reference to this syncobj.
+ unsafe { bindings::dma_fence_put(self.ptr) };
+ }
+}
+
+impl Clone for Fence {
+ fn clone(&self) -> Self {
+ // SAFETY: `ptr` is valid per the type invariant and we own a reference to it.
+ unsafe {
+ bindings::dma_fence_get(self.ptr);
+ Self::from_raw(self.ptr)
+ }
+ }
+}
+
+unsafe impl Sync for Fence {}
+unsafe impl Send for Fence {}
+
+/// Trait which must be implemented by driver-specific fence objects.
+#[vtable]
+pub trait FenceOps: Sized + Send + Sync {
+ /// True if this dma_fence implementation uses 64bit seqno, false otherwise.
+ const USE_64BIT_SEQNO: bool;
+
+ /// Returns the driver name. This is a callback to allow drivers to compute the name at
+ /// runtime, without having it to store permanently for each fence, or build a cache of
+ /// some sort.
+ fn get_driver_name<'a>(self: &'a FenceObject<Self>) -> &'a CStr;
+
+ /// Return the name of the context this fence belongs to. This is a callback to allow drivers
+ /// to compute the name at runtime, without having it to store permanently for each fence, or
+ /// build a cache of some sort.
+ fn get_timeline_name<'a>(self: &'a FenceObject<Self>) -> &'a CStr;
+
+ /// Enable software signaling of fence.
+ fn enable_signaling(self: &FenceObject<Self>) -> bool {
+ false
+ }
+
+ /// Peek whether the fence is signaled, as a fastpath optimization for e.g. dma_fence_wait() or
+ /// dma_fence_add_callback().
+ fn signaled(self: &FenceObject<Self>) -> bool {
+ false
+ }
+
+ /// Callback to fill in free-form debug info specific to this fence, like the sequence number.
+ fn fence_value_str(self: &FenceObject<Self>, _output: &mut dyn Write) {}
+
+ /// Fills in the current value of the timeline as a string, like the sequence number. Note that
+ /// the specific fence passed to this function should not matter, drivers should only use it to
+ /// look up the corresponding timeline structures.
+ fn timeline_value_str(self: &FenceObject<Self>, _output: &mut dyn Write) {}
+}
+
+unsafe extern "C" fn get_driver_name_cb<T: FenceOps>(
+ fence: *mut bindings::dma_fence,
+) -> *const core::ffi::c_char {
+ // SAFETY: All of our fences are FenceObject<T>.
+ let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>;
+
+ // SAFETY: The caller is responsible for passing a valid dma_fence subtype
+ T::get_driver_name(unsafe { &mut *p }).as_char_ptr()
+}
+
+unsafe extern "C" fn get_timeline_name_cb<T: FenceOps>(
+ fence: *mut bindings::dma_fence,
+) -> *const core::ffi::c_char {
+ // SAFETY: All of our fences are FenceObject<T>.
+ let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>;
+
+ // SAFETY: The caller is responsible for passing a valid dma_fence subtype
+ T::get_timeline_name(unsafe { &mut *p }).as_char_ptr()
+}
+
+unsafe extern "C" fn enable_signaling_cb<T: FenceOps>(fence: *mut bindings::dma_fence) -> bool {
+ // SAFETY: All of our fences are FenceObject<T>.
+ let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>;
+
+ // SAFETY: The caller is responsible for passing a valid dma_fence subtype
+ T::enable_signaling(unsafe { &mut *p })
+}
+
+unsafe extern "C" fn signaled_cb<T: FenceOps>(fence: *mut bindings::dma_fence) -> bool {
+ // SAFETY: All of our fences are FenceObject<T>.
+ let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>;
+
+ // SAFETY: The caller is responsible for passing a valid dma_fence subtype
+ T::signaled(unsafe { &mut *p })
+}
+
+unsafe extern "C" fn release_cb<T: FenceOps>(fence: *mut bindings::dma_fence) {
+ // SAFETY: All of our fences are FenceObject<T>.
+ let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>;
+
+ // SAFETY: p is never used after this
+ unsafe {
+ core::ptr::drop_in_place(&mut (*p).inner);
+ }
+
+ // SAFETY: All of our fences are allocated using kmalloc, so this is safe.
+ unsafe { bindings::dma_fence_free(fence) };
+}
+
+unsafe extern "C" fn fence_value_str_cb<T: FenceOps>(
+ fence: *mut bindings::dma_fence,
+ string: *mut core::ffi::c_char,
+ size: core::ffi::c_int,
+) {
+ let size: usize = size.try_into().unwrap_or(0);
+
+ if size == 0 {
+ return;
+ }
+
+ // SAFETY: All of our fences are FenceObject<T>.
+ let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>;
+
+ // SAFETY: The caller is responsible for the validity of string/size
+ let mut f = unsafe { crate::str::Formatter::from_buffer(string as *mut _, size) };
+
+ // SAFETY: The caller is responsible for passing a valid dma_fence subtype
+ T::fence_value_str(unsafe { &mut *p }, &mut f);
+ let _ = f.write_str("\0");
+
+ // SAFETY: `size` is at least 1 per the check above
+ unsafe { *string.add(size - 1) = 0 };
+}
+
+unsafe extern "C" fn timeline_value_str_cb<T: FenceOps>(
+ fence: *mut bindings::dma_fence,
+ string: *mut core::ffi::c_char,
+ size: core::ffi::c_int,
+) {
+ let size: usize = size.try_into().unwrap_or(0);
+
+ if size == 0 {
+ return;
+ }
+
+ // SAFETY: All of our fences are FenceObject<T>.
+ let p = crate::container_of!(fence, FenceObject<T>, fence) as *mut FenceObject<T>;
+
+ // SAFETY: The caller is responsible for the validity of string/size
+ let mut f = unsafe { crate::str::Formatter::from_buffer(string as *mut _, size) };
+
+ // SAFETY: The caller is responsible for passing a valid dma_fence subtype
+ T::timeline_value_str(unsafe { &mut *p }, &mut f);
+ let _ = f.write_str("\0");
+
+ // SAFETY: `size` is at least 1 per the check above
+ unsafe { *string.add(size - 1) = 0 };
+}
+
+// Allow FenceObject<Self> to be used as a self argument, for ergonomics
+impl<T: FenceOps> core::ops::Receiver for FenceObject<T> {}
+
+/// A driver-specific DMA Fence Object
+///
+/// # Invariants
+/// ptr is a valid pointer to a dma_fence and we own a reference to it.
+#[repr(C)]
+pub struct FenceObject<T: FenceOps> {
+ fence: bindings::dma_fence,
+ lock: Opaque<bindings::spinlock>,
+ inner: T,
+}
+
+impl<T: FenceOps> FenceObject<T> {
+ const SIZE: usize = core::mem::size_of::<Self>();
+
+ const VTABLE: bindings::dma_fence_ops = bindings::dma_fence_ops {
+ use_64bit_seqno: T::USE_64BIT_SEQNO,
+ get_driver_name: Some(get_driver_name_cb::<T>),
+ get_timeline_name: Some(get_timeline_name_cb::<T>),
+ enable_signaling: if T::HAS_ENABLE_SIGNALING {
+ Some(enable_signaling_cb::<T>)
+ } else {
+ None
+ },
+ signaled: if T::HAS_SIGNALED {
+ Some(signaled_cb::<T>)
+ } else {
+ None
+ },
+ wait: None, // Deprecated
+ release: Some(release_cb::<T>),
+ fence_value_str: if T::HAS_FENCE_VALUE_STR {
+ Some(fence_value_str_cb::<T>)
+ } else {
+ None
+ },
+ timeline_value_str: if T::HAS_TIMELINE_VALUE_STR {
+ Some(timeline_value_str_cb::<T>)
+ } else {
+ None
+ },
+ };
+}
+
+impl<T: FenceOps> Deref for FenceObject<T> {
+ type Target = T;
+
+ fn deref(&self) -> &T {
+ &self.inner
+ }
+}
+
+impl<T: FenceOps> DerefMut for FenceObject<T> {
+ fn deref_mut(&mut self) -> &mut T {
+ &mut self.inner
+ }
+}
+
+impl<T: FenceOps> crate::private::Sealed for FenceObject<T> {}
+impl<T: FenceOps> RawDmaFence for FenceObject<T> {
+ fn raw(&self) -> *mut bindings::dma_fence {
+ &self.fence as *const _ as *mut _
+ }
+}
+
+/// A unique reference to a driver-specific fence object
+pub struct UniqueFence<T: FenceOps>(*mut FenceObject<T>);
+
+impl<T: FenceOps> Deref for UniqueFence<T> {
+ type Target = FenceObject<T>;
+
+ fn deref(&self) -> &FenceObject<T> {
+ unsafe { &*self.0 }
+ }
+}
+
+impl<T: FenceOps> DerefMut for UniqueFence<T> {
+ fn deref_mut(&mut self) -> &mut FenceObject<T> {
+ unsafe { &mut *self.0 }
+ }
+}
+
+impl<T: FenceOps> crate::private::Sealed for UniqueFence<T> {}
+impl<T: FenceOps> RawDmaFence for UniqueFence<T> {
+ fn raw(&self) -> *mut bindings::dma_fence {
+ unsafe { addr_of_mut!((*self.0).fence) }
+ }
+}
+
+impl<T: FenceOps> From<UniqueFence<T>> for UserFence<T> {
+ fn from(value: UniqueFence<T>) -> Self {
+ let ptr = value.0;
+ core::mem::forget(value);
+
+ UserFence(ptr)
+ }
+}
+
+impl<T: FenceOps> Drop for UniqueFence<T> {
+ fn drop(&mut self) {
+ // SAFETY: We own a reference to this fence.
+ unsafe { bindings::dma_fence_put(self.raw()) };
+ }
+}
+
+unsafe impl<T: FenceOps> Sync for UniqueFence<T> {}
+unsafe impl<T: FenceOps> Send for UniqueFence<T> {}
+
+/// A shared reference to a driver-specific fence object
+pub struct UserFence<T: FenceOps>(*mut FenceObject<T>);
+
+impl<T: FenceOps> Deref for UserFence<T> {
+ type Target = FenceObject<T>;
+
+ fn deref(&self) -> &FenceObject<T> {
+ unsafe { &*self.0 }
+ }
+}
+
+impl<T: FenceOps> Clone for UserFence<T> {
+ fn clone(&self) -> Self {
+ // SAFETY: `ptr` is valid per the type invariant and we own a reference to it.
+ unsafe {
+ bindings::dma_fence_get(self.raw());
+ Self(self.0)
+ }
+ }
+}
+
+impl<T: FenceOps> crate::private::Sealed for UserFence<T> {}
+impl<T: FenceOps> RawDmaFence for UserFence<T> {
+ fn raw(&self) -> *mut bindings::dma_fence {
+ unsafe { addr_of_mut!((*self.0).fence) }
+ }
+}
+
+impl<T: FenceOps> Drop for UserFence<T> {
+ fn drop(&mut self) {
+ // SAFETY: We own a reference to this fence.
+ unsafe { bindings::dma_fence_put(self.raw()) };
+ }
+}
+
+unsafe impl<T: FenceOps> Sync for UserFence<T> {}
+unsafe impl<T: FenceOps> Send for UserFence<T> {}
+
+/// An array of fence contexts, out of which fences can be created.
+pub struct FenceContexts {
+ start: u64,
+ count: u32,
+ seqnos: Vec<AtomicU64>,
+ lock_name: &'static CStr,
+ lock_key: &'static LockClassKey,
+}
+
+impl FenceContexts {
+ /// Create a new set of fence contexts.
+ pub fn new(
+ count: u32,
+ name: &'static CStr,
+ key: &'static LockClassKey,
+ ) -> Result<FenceContexts> {
+ let mut seqnos: Vec<AtomicU64> = Vec::new();
+
+ seqnos.try_reserve(count as usize)?;
+
+ for _ in 0..count {
+ seqnos.try_push(Default::default())?;
+ }
+
+ let start = unsafe { bindings::dma_fence_context_alloc(count as core::ffi::c_uint) };
+
+ Ok(FenceContexts {
+ start,
+ count,
+ seqnos,
+ lock_name: name,
+ lock_key: key,
+ })
+ }
+
+ /// Create a new fence in a given context index.
+ pub fn new_fence<T: FenceOps>(&self, context: u32, inner: T) -> Result<UniqueFence<T>> {
+ if context > self.count {
+ return Err(EINVAL);
+ }
+
+ let p = unsafe {
+ bindings::krealloc(
+ core::ptr::null_mut(),
+ FenceObject::<T>::SIZE,
+ bindings::GFP_KERNEL | bindings::__GFP_ZERO,
+ ) as *mut FenceObject<T>
+ };
+
+ if p.is_null() {
+ return Err(ENOMEM);
+ }
+
+ let seqno = self.seqnos[context as usize].fetch_add(1, Ordering::Relaxed);
+
+ // SAFETY: The pointer is valid, so pointers to members are too.
+ // After this, all fields are initialized.
+ unsafe {
+ addr_of_mut!((*p).inner).write(inner);
+ bindings::__spin_lock_init(
+ addr_of_mut!((*p).lock) as *mut _,
+ self.lock_name.as_char_ptr(),
+ self.lock_key.get(),
+ );
+ bindings::dma_fence_init(
+ addr_of_mut!((*p).fence),
+ &FenceObject::<T>::VTABLE,
+ addr_of_mut!((*p).lock) as *mut _,
+ self.start + context as u64,
+ seqno,
+ );
+ };
+
+ Ok(UniqueFence(p))
+ }
+}
+
+/// A DMA Fence Chain Object
+///
+/// # Invariants
+/// ptr is a valid pointer to a dma_fence_chain which we own.
+pub struct FenceChain {
+ ptr: *mut bindings::dma_fence_chain,
+}
+
+impl FenceChain {
+ /// Create a new DmaFenceChain object.
+ pub fn new() -> Result<Self> {
+ // SAFETY: This function is safe to call and takes no arguments.
+ let ptr = unsafe { bindings::dma_fence_chain_alloc() };
+
+ if ptr.is_null() {
+ Err(ENOMEM)
+ } else {
+ Ok(FenceChain { ptr })
+ }
+ }
+
+ /// Convert the DmaFenceChain into the underlying raw pointer.
+ ///
+ /// This assumes the caller will take ownership of the object.
+ pub(crate) fn into_raw(self) -> *mut bindings::dma_fence_chain {
+ let ptr = self.ptr;
+ core::mem::forget(self);
+ ptr
+ }
+}
+
+impl Drop for FenceChain {
+ fn drop(&mut self) {
+ // SAFETY: We own this dma_fence_chain.
+ unsafe { bindings::dma_fence_chain_free(self.ptr) };
+ }
+}
diff --git a/rust/kernel/driver.rs b/rust/kernel/driver.rs
new file mode 100644
index 000000000000..aa1441ae809b
--- /dev/null
+++ b/rust/kernel/driver.rs
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Generic support for drivers of different buses (e.g., PCI, Platform, Amba, etc.).
+//!
+//! Each bus/subsystem is expected to implement [`DriverOps`], which allows drivers to register
+//! using the [`Registration`] class.
+
+use crate::{error::code::*, error::Result, str::CStr, sync::Arc, ThisModule};
+use alloc::boxed::Box;
+use core::{cell::UnsafeCell, marker::PhantomData, ops::Deref, pin::Pin};
+
+/// A subsystem (e.g., PCI, Platform, Amba, etc.) that allows drivers to be written for it.
+pub trait DriverOps {
+ /// The type that holds information about the registration. This is typically a struct defined
+ /// by the C portion of the kernel.
+ type RegType: Default;
+
+ /// Registers a driver.
+ ///
+ /// # Safety
+ ///
+ /// `reg` must point to valid, initialised, and writable memory. It may be modified by this
+ /// function to hold registration state.
+ ///
+ /// On success, `reg` must remain pinned and valid until the matching call to
+ /// [`DriverOps::unregister`].
+ unsafe fn register(
+ reg: *mut Self::RegType,
+ name: &'static CStr,
+ module: &'static ThisModule,
+ ) -> Result;
+
+ /// Unregisters a driver previously registered with [`DriverOps::register`].
+ ///
+ /// # Safety
+ ///
+ /// `reg` must point to valid writable memory, initialised by a previous successful call to
+ /// [`DriverOps::register`].
+ unsafe fn unregister(reg: *mut Self::RegType);
+}
+
+/// The registration of a driver.
+pub struct Registration<T: DriverOps> {
+ is_registered: bool,
+ concrete_reg: UnsafeCell<T::RegType>,
+}
+
+// SAFETY: `Registration` has no fields or methods accessible via `&Registration`, so it is safe to
+// share references to it with multiple threads as nothing can be done.
+unsafe impl<T: DriverOps> Sync for Registration<T> {}
+
+impl<T: DriverOps> Registration<T> {
+ /// Creates a new instance of the registration object.
+ pub fn new() -> Self {
+ Self {
+ is_registered: false,
+ concrete_reg: UnsafeCell::new(T::RegType::default()),
+ }
+ }
+
+ /// Allocates a pinned registration object and registers it.
+ ///
+ /// Returns a pinned heap-allocated representation of the registration.
+ pub fn new_pinned(name: &'static CStr, module: &'static ThisModule) -> Result<Pin<Box<Self>>> {
+ let mut reg = Pin::from(Box::try_new(Self::new())?);
+ reg.as_mut().register(name, module)?;
+ Ok(reg)
+ }
+
+ /// Registers a driver with its subsystem.
+ ///
+ /// It must be pinned because the memory block that represents the registration is potentially
+ /// self-referential.
+ pub fn register(
+ self: Pin<&mut Self>,
+ name: &'static CStr,
+ module: &'static ThisModule,
+ ) -> Result {
+ // SAFETY: We never move out of `this`.
+ let this = unsafe { self.get_unchecked_mut() };
+ if this.is_registered {
+ // Already registered.
+ return Err(EINVAL);
+ }
+
+ // SAFETY: `concrete_reg` was initialised via its default constructor. It is only freed
+ // after `Self::drop` is called, which first calls `T::unregister`.
+ unsafe { T::register(this.concrete_reg.get(), name, module) }?;
+
+ this.is_registered = true;
+ Ok(())
+ }
+}
+
+impl<T: DriverOps> Default for Registration<T> {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl<T: DriverOps> Drop for Registration<T> {
+ fn drop(&mut self) {
+ if self.is_registered {
+ // SAFETY: This path only runs if a previous call to `T::register` completed
+ // successfully.
+ unsafe { T::unregister(self.concrete_reg.get()) };
+ }
+ }
+}
+
+/// Conversion from a device id to a raw device id.
+///
+/// This is meant to be implemented by buses/subsystems so that they can use [`IdTable`] to
+/// guarantee (at compile-time) zero-termination of device id tables provided by drivers.
+///
+/// # Safety
+///
+/// Implementers must ensure that:
+/// - [`RawDeviceId::ZERO`] is actually a zeroed-out version of the raw device id.
+/// - [`RawDeviceId::to_rawid`] stores `offset` in the context/data field of the raw device id so
+/// that buses can recover the pointer to the data.
+#[const_trait]
+pub unsafe trait RawDeviceId {
+ /// The raw type that holds the device id.
+ ///
+ /// Id tables created from [`Self`] are going to hold this type in its zero-terminated array.
+ type RawType: Copy;
+
+ /// A zeroed-out representation of the raw device id.
+ ///
+ /// Id tables created from [`Self`] use [`Self::ZERO`] as the sentinel to indicate the end of
+ /// the table.
+ const ZERO: Self::RawType;
+
+ /// Converts an id into a raw id.
+ ///
+ /// `offset` is the offset from the memory location where the raw device id is stored to the
+ /// location where its associated context information is stored. Implementations must store
+ /// this in the appropriate context/data field of the raw type.
+ fn to_rawid(&self, offset: isize) -> Self::RawType;
+}
+
+/// A zero-terminated device id array.
+#[derive(Copy, Clone)]
+#[repr(C)]
+pub struct IdArrayIds<T: RawDeviceId, const N: usize> {
+ ids: [T::RawType; N],
+ sentinel: T::RawType,
+}
+
+unsafe impl<T: RawDeviceId, const N: usize> Sync for IdArrayIds<T, N> {}
+
+/// A zero-terminated device id array, followed by context data.
+#[repr(C)]
+pub struct IdArray<T: RawDeviceId, U, const N: usize> {
+ ids: IdArrayIds<T, N>,
+ id_infos: [Option<U>; N],
+}
+
+impl<T: RawDeviceId, U, const N: usize> IdArray<T, U, N> {
+ /// Creates a new instance of the array.
+ ///
+ /// The contents are derived from the given identifiers and context information.
+ pub const fn new(ids: [T; N], infos: [Option<U>; N]) -> Self
+ where
+ T: ~const RawDeviceId + Copy,
+ T::RawType: Copy + Clone,
+ {
+ let mut array = Self {
+ ids: IdArrayIds {
+ ids: [T::ZERO; N],
+ sentinel: T::ZERO,
+ },
+ id_infos: infos,
+ };
+ let mut i = 0usize;
+ while i < N {
+ // SAFETY: Both pointers are within `array` (or one byte beyond), consequently they are
+ // derived from the same allocated object. We are using a `u8` pointer, whose size 1,
+ // so the pointers are necessarily 1-byte aligned.
+ let offset = unsafe {
+ (&array.id_infos[i] as *const _ as *const u8)
+ .offset_from(&array.ids.ids[i] as *const _ as _)
+ };
+ array.ids.ids[i] = ids[i].to_rawid(offset);
+ i += 1;
+ }
+ array
+ }
+
+ /// Returns an `IdTable` backed by `self`.
+ ///
+ /// This is used to essentially erase the array size.
+ pub const fn as_table(&self) -> IdTable<'_, T, U> {
+ IdTable {
+ first: &self.ids.ids[0],
+ _p: PhantomData,
+ }
+ }
+
+ /// Returns the number of items in the ID table.
+ pub const fn count(&self) -> usize {
+ self.ids.ids.len()
+ }
+
+ /// Returns the inner IdArrayIds array, without the context data.
+ pub const fn as_ids(&self) -> IdArrayIds<T, N>
+ where
+ T: ~const RawDeviceId + Copy,
+ {
+ self.ids
+ }
+}
+
+/// A device id table.
+///
+/// The table is guaranteed to be zero-terminated and to be followed by an array of context data of
+/// type `Option<U>`.
+pub struct IdTable<'a, T: RawDeviceId, U> {
+ first: &'a T::RawType,
+ _p: PhantomData<&'a U>,
+}
+
+impl<T: RawDeviceId, U> const AsRef<T::RawType> for IdTable<'_, T, U> {
+ fn as_ref(&self) -> &T::RawType {
+ self.first
+ }
+}
+
+/// Counts the number of parenthesis-delimited, comma-separated items.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::count_paren_items;
+///
+/// assert_eq!(0, count_paren_items!());
+/// assert_eq!(1, count_paren_items!((A)));
+/// assert_eq!(1, count_paren_items!((A),));
+/// assert_eq!(2, count_paren_items!((A), (B)));
+/// assert_eq!(2, count_paren_items!((A), (B),));
+/// assert_eq!(3, count_paren_items!((A), (B), (C)));
+/// assert_eq!(3, count_paren_items!((A), (B), (C),));
+/// ```
+#[macro_export]
+macro_rules! count_paren_items {
+ (($($item:tt)*), $($remaining:tt)*) => { 1 + $crate::count_paren_items!($($remaining)*) };
+ (($($item:tt)*)) => { 1 };
+ () => { 0 };
+}
+
+/// Converts a comma-separated list of pairs into an array with the first element. That is, it
+/// discards the second element of the pair.
+///
+/// Additionally, it automatically introduces a type if the first element is warpped in curly
+/// braces, for example, if it's `{v: 10}`, it becomes `X { v: 10 }`; this is to avoid repeating
+/// the type.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::first_item;
+///
+/// #[derive(PartialEq, Debug)]
+/// struct X {
+/// v: u32,
+/// }
+///
+/// assert_eq!([] as [X; 0], first_item!(X, ));
+/// assert_eq!([X { v: 10 }], first_item!(X, ({ v: 10 }, Y)));
+/// assert_eq!([X { v: 10 }], first_item!(X, ({ v: 10 }, Y),));
+/// assert_eq!([X { v: 10 }], first_item!(X, (X { v: 10 }, Y)));
+/// assert_eq!([X { v: 10 }], first_item!(X, (X { v: 10 }, Y),));
+/// assert_eq!([X { v: 10 }, X { v: 20 }], first_item!(X, ({ v: 10 }, Y), ({ v: 20 }, Y)));
+/// assert_eq!([X { v: 10 }, X { v: 20 }], first_item!(X, ({ v: 10 }, Y), ({ v: 20 }, Y),));
+/// assert_eq!([X { v: 10 }, X { v: 20 }], first_item!(X, (X { v: 10 }, Y), (X { v: 20 }, Y)));
+/// assert_eq!([X { v: 10 }, X { v: 20 }], first_item!(X, (X { v: 10 }, Y), (X { v: 20 }, Y),));
+/// assert_eq!([X { v: 10 }, X { v: 20 }, X { v: 30 }],
+/// first_item!(X, ({ v: 10 }, Y), ({ v: 20 }, Y), ({v: 30}, Y)));
+/// assert_eq!([X { v: 10 }, X { v: 20 }, X { v: 30 }],
+/// first_item!(X, ({ v: 10 }, Y), ({ v: 20 }, Y), ({v: 30}, Y),));
+/// assert_eq!([X { v: 10 }, X { v: 20 }, X { v: 30 }],
+/// first_item!(X, (X { v: 10 }, Y), (X { v: 20 }, Y), (X {v: 30}, Y)));
+/// assert_eq!([X { v: 10 }, X { v: 20 }, X { v: 30 }],
+/// first_item!(X, (X { v: 10 }, Y), (X { v: 20 }, Y), (X {v: 30}, Y),));
+/// ```
+#[macro_export]
+macro_rules! first_item {
+ ($id_type:ty, $(({$($first:tt)*}, $second:expr)),* $(,)?) => {
+ {
+ type IdType = $id_type;
+ [$(IdType{$($first)*},)*]
+ }
+ };
+ ($id_type:ty, $(($first:expr, $second:expr)),* $(,)?) => { [$($first,)*] };
+}
+
+/// Converts a comma-separated list of pairs into an array with the second element. That is, it
+/// discards the first element of the pair.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::second_item;
+///
+/// assert_eq!([] as [u32; 0], second_item!());
+/// assert_eq!([10u32], second_item!((X, 10u32)));
+/// assert_eq!([10u32], second_item!((X, 10u32),));
+/// assert_eq!([10u32], second_item!(({ X }, 10u32)));
+/// assert_eq!([10u32], second_item!(({ X }, 10u32),));
+/// assert_eq!([10u32, 20], second_item!((X, 10u32), (X, 20)));
+/// assert_eq!([10u32, 20], second_item!((X, 10u32), (X, 20),));
+/// assert_eq!([10u32, 20], second_item!(({ X }, 10u32), ({ X }, 20)));
+/// assert_eq!([10u32, 20], second_item!(({ X }, 10u32), ({ X }, 20),));
+/// assert_eq!([10u32, 20, 30], second_item!((X, 10u32), (X, 20), (X, 30)));
+/// assert_eq!([10u32, 20, 30], second_item!((X, 10u32), (X, 20), (X, 30),));
+/// assert_eq!([10u32, 20, 30], second_item!(({ X }, 10u32), ({ X }, 20), ({ X }, 30)));
+/// assert_eq!([10u32, 20, 30], second_item!(({ X }, 10u32), ({ X }, 20), ({ X }, 30),));
+/// ```
+#[macro_export]
+macro_rules! second_item {
+ ($(({$($first:tt)*}, $second:expr)),* $(,)?) => { [$($second,)*] };
+ ($(($first:expr, $second:expr)),* $(,)?) => { [$($second,)*] };
+}
+
+/// Defines a new constant [`IdArray`] with a concise syntax.
+///
+/// It is meant to be used by buses and subsystems to create a similar macro with their device id
+/// type already specified, i.e., with fewer parameters to the end user.
+///
+/// # Examples
+///
+// TODO: Exported but not usable by kernel modules (requires `const_trait_impl`).
+/// ```ignore
+/// #![feature(const_trait_impl)]
+/// # use kernel::{define_id_array, driver::RawDeviceId};
+///
+/// #[derive(Copy, Clone)]
+/// struct Id(u32);
+///
+/// // SAFETY: `ZERO` is all zeroes and `to_rawid` stores `offset` as the second element of the raw
+/// // device id pair.
+/// unsafe impl const RawDeviceId for Id {
+/// type RawType = (u64, isize);
+/// const ZERO: Self::RawType = (0, 0);
+/// fn to_rawid(&self, offset: isize) -> Self::RawType {
+/// (self.0 as u64 + 1, offset)
+/// }
+/// }
+///
+/// define_id_array!(A1, Id, (), []);
+/// define_id_array!(A2, Id, &'static [u8], [(Id(10), None)]);
+/// define_id_array!(A3, Id, &'static [u8], [(Id(10), Some(b"id1")), ]);
+/// define_id_array!(A4, Id, &'static [u8], [(Id(10), Some(b"id1")), (Id(20), Some(b"id2"))]);
+/// define_id_array!(A5, Id, &'static [u8], [(Id(10), Some(b"id1")), (Id(20), Some(b"id2")), ]);
+/// define_id_array!(A6, Id, &'static [u8], [(Id(10), None), (Id(20), Some(b"id2")), ]);
+/// define_id_array!(A7, Id, &'static [u8], [(Id(10), Some(b"id1")), (Id(20), None), ]);
+/// define_id_array!(A8, Id, &'static [u8], [(Id(10), None), (Id(20), None), ]);
+///
+/// // Within a bus driver:
+/// driver_id_table!(BUS_ID_TABLE, Id, &'static [u8], A1);
+/// // At the top level:
+/// module_id_table!(MODULE_ID_TABLE, "mybus", Id, A1);
+/// ```
+#[macro_export]
+macro_rules! define_id_array {
+ ($table_name:ident, $id_type:ty, $data_type:ty, [ $($t:tt)* ]) => {
+ const $table_name:
+ $crate::driver::IdArray<$id_type, $data_type, { $crate::count_paren_items!($($t)*) }> =
+ $crate::driver::IdArray::new(
+ $crate::first_item!($id_type, $($t)*), $crate::second_item!($($t)*));
+ };
+}
+
+/// Declares an [`IdArray`] as an [`IdTable`] for a bus driver with a concise syntax.
+///
+/// It is meant to be used by buses and subsystems to create a similar macro with their device id
+/// type already specified, i.e., with fewer parameters to the end user.
+///
+/// # Examples
+///
+// TODO: Exported but not usable by kernel modules (requires `const_trait_impl`).
+/// ```ignore
+/// #![feature(const_trait_impl)]
+/// # use kernel::{driver_id_table};
+
+/// driver_id_table!(BUS_ID_TABLE, Id, &'static [u8], MY_ID_ARRAY);
+/// ```
+#[macro_export]
+macro_rules! driver_id_table {
+ ($table_name:ident, $id_type:ty, $data_type:ty, $target:expr) => {
+ const $table_name: Option<$crate::driver::IdTable<'static, $id_type, $data_type>> =
+ Some($target.as_table());
+ };
+}
+
+/// Declares an [`IdArray`] as a module-level ID tablewith a concise syntax.
+///
+/// It is meant to be used by buses and subsystems to create a similar macro with their device id
+/// type already specified, i.e., with fewer parameters to the end user.
+///
+/// # Examples
+///
+// TODO: Exported but not usable by kernel modules (requires `const_trait_impl`).
+/// ```ignore
+/// #![feature(const_trait_impl)]
+/// # use kernel::{driver_id_table};
+
+/// driver_id_table!(BUS_ID_TABLE, Id, &'static [u8], MY_ID_ARRAY);
+/// ```
+#[macro_export]
+macro_rules! module_id_table {
+ ($item_name:ident, $table_type:literal, $id_type:ty, $table_name:ident) => {
+ #[export_name = concat!("__mod_", $table_type, "__", stringify!($table_name), "_device_table")]
+ static $item_name: $crate::driver::IdArrayIds<$id_type, { $table_name.count() }> =
+ $table_name.as_ids();
+ };
+}
+
+/// Custom code within device removal.
+pub trait DeviceRemoval {
+ /// Cleans resources up when the device is removed.
+ ///
+ /// This is called when a device is removed and offers implementers the chance to run some code
+ /// that cleans state up.
+ fn device_remove(&self);
+}
+
+impl DeviceRemoval for () {
+ fn device_remove(&self) {}
+}
+
+impl<T: DeviceRemoval> DeviceRemoval for Arc<T> {
+ fn device_remove(&self) {
+ self.deref().device_remove();
+ }
+}
+
+impl<T: DeviceRemoval> DeviceRemoval for Box<T> {
+ fn device_remove(&self) {
+ self.deref().device_remove();
+ }
+}
+
+/// A kernel module that only registers the given driver on init.
+///
+/// This is a helper struct to make it easier to define single-functionality modules, in this case,
+/// modules that offer a single driver.
+pub struct Module<T: DriverOps> {
+ _driver: Pin<Box<Registration<T>>>,
+}
+
+impl<T: DriverOps> crate::Module for Module<T> {
+ fn init(name: &'static CStr, module: &'static ThisModule) -> Result<Self> {
+ Ok(Self {
+ _driver: Registration::new_pinned(name, module)?,
+ })
+ }
+}
+
+/// Declares a kernel module that exposes a single driver.
+///
+/// It is meant to be used as a helper by other subsystems so they can more easily expose their own
+/// macros.
+#[macro_export]
+macro_rules! module_driver {
+ (<$gen_type:ident>, $driver_ops:ty, { type: $type:ty, $($f:tt)* }) => {
+ type Ops<$gen_type> = $driver_ops;
+ type ModuleType = $crate::driver::Module<Ops<$type>>;
+ $crate::prelude::module! {
+ type: ModuleType,
+ $($f)*
+ }
+ }
+}
diff --git a/rust/kernel/drm/device.rs b/rust/kernel/drm/device.rs
new file mode 100644
index 000000000000..6007f941137a
--- /dev/null
+++ b/rust/kernel/drm/device.rs
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+//! DRM device.
+//!
+//! C header: [`include/linux/drm/drm_device.h`](../../../../include/linux/drm/drm_device.h)
+
+use crate::{bindings, device, drm, types::ForeignOwnable};
+use core::marker::PhantomData;
+
+/// Represents a reference to a DRM device. The device is reference-counted and is guaranteed to
+/// not be dropped while this object is alive.
+pub struct Device<T: drm::drv::Driver> {
+ // Type invariant: ptr must be a valid and initialized drm_device,
+ // and this value must either own a reference to it or the caller
+ // must ensure that it is never dropped if the reference is borrowed.
+ pub(super) ptr: *mut bindings::drm_device,
+ _p: PhantomData<T>,
+}
+
+impl<T: drm::drv::Driver> Device<T> {
+ // Not intended to be called externally, except via declare_drm_ioctls!()
+ #[doc(hidden)]
+ pub unsafe fn from_raw(raw: *mut bindings::drm_device) -> Device<T> {
+ Device {
+ ptr: raw,
+ _p: PhantomData,
+ }
+ }
+
+ #[allow(dead_code)]
+ pub(crate) fn raw(&self) -> *const bindings::drm_device {
+ self.ptr
+ }
+
+ pub(crate) fn raw_mut(&mut self) -> *mut bindings::drm_device {
+ self.ptr
+ }
+
+ /// Returns a borrowed reference to the user data associated with this Device.
+ pub fn data(&self) -> <T::Data as ForeignOwnable>::Borrowed<'_> {
+ unsafe { T::Data::borrow((*self.ptr).dev_private) }
+ }
+}
+
+impl<T: drm::drv::Driver> Drop for Device<T> {
+ fn drop(&mut self) {
+ // SAFETY: By the type invariants, we know that `self` owns a reference, so it is safe to
+ // relinquish it now.
+ unsafe { bindings::drm_dev_put(self.ptr) };
+ }
+}
+
+impl<T: drm::drv::Driver> Clone for Device<T> {
+ fn clone(&self) -> Self {
+ // SAFETY: We get a new reference and then create a new owning object from the raw pointer
+ unsafe {
+ bindings::drm_dev_get(self.ptr);
+ Device::from_raw(self.ptr)
+ }
+ }
+}
+
+// SAFETY: `Device` only holds a pointer to a C device, which is safe to be used from any thread.
+unsafe impl<T: drm::drv::Driver> Send for Device<T> {}
+
+// SAFETY: `Device` only holds a pointer to a C device, references to which are safe to be used
+// from any thread.
+unsafe impl<T: drm::drv::Driver> Sync for Device<T> {}
+
+// Make drm::Device work for dev_info!() and friends
+unsafe impl<T: drm::drv::Driver> device::RawDevice for Device<T> {
+ fn raw_device(&self) -> *mut bindings::device {
+ // SAFETY: ptr must be valid per the type invariant
+ unsafe { (*self.ptr).dev }
+ }
+}
diff --git a/rust/kernel/drm/drv.rs b/rust/kernel/drm/drv.rs
new file mode 100644
index 000000000000..c138352cb489
--- /dev/null
+++ b/rust/kernel/drm/drv.rs
@@ -0,0 +1,342 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+//! DRM driver core.
+//!
+//! C header: [`include/linux/drm/drm_drv.h`](../../../../include/linux/drm/drm_drv.h)
+
+use crate::{
+ bindings, device, drm,
+ error::code::*,
+ error::from_kernel_err_ptr,
+ error::{Error, Result},
+ prelude::*,
+ private::Sealed,
+ str::CStr,
+ types::ForeignOwnable,
+ ThisModule,
+};
+use core::{
+ marker::{PhantomData, PhantomPinned},
+ pin::Pin,
+};
+use macros::vtable;
+
+/// Driver use the GEM memory manager. This should be set for all modern drivers.
+pub const FEAT_GEM: u32 = bindings::drm_driver_feature_DRIVER_GEM;
+/// Driver supports mode setting interfaces (KMS).
+pub const FEAT_MODESET: u32 = bindings::drm_driver_feature_DRIVER_MODESET;
+/// Driver supports dedicated render nodes.
+pub const FEAT_RENDER: u32 = bindings::drm_driver_feature_DRIVER_RENDER;
+/// Driver supports the full atomic modesetting userspace API.
+///
+/// Drivers which only use atomic internally, but do not support the full userspace API (e.g. not
+/// all properties converted to atomic, or multi-plane updates are not guaranteed to be tear-free)
+/// should not set this flag.
+pub const FEAT_ATOMIC: u32 = bindings::drm_driver_feature_DRIVER_ATOMIC;
+/// Driver supports DRM sync objects for explicit synchronization of command submission.
+pub const FEAT_SYNCOBJ: u32 = bindings::drm_driver_feature_DRIVER_SYNCOBJ;
+/// Driver supports the timeline flavor of DRM sync objects for explicit synchronization of command
+/// submission.
+pub const FEAT_SYNCOBJ_TIMELINE: u32 = bindings::drm_driver_feature_DRIVER_SYNCOBJ_TIMELINE;
+
+/// Information data for a DRM Driver.
+pub struct DriverInfo {
+ /// Driver major version.
+ pub major: i32,
+ /// Driver minor version.
+ pub minor: i32,
+ /// Driver patchlevel version.
+ pub patchlevel: i32,
+ /// Driver name.
+ pub name: &'static CStr,
+ /// Driver description.
+ pub desc: &'static CStr,
+ /// Driver date.
+ pub date: &'static CStr,
+}
+
+/// Internal memory management operation set, normally created by memory managers (e.g. GEM).
+///
+/// See `kernel::drm::gem` and `kernel::drm::gem::shmem`.
+pub struct AllocOps {
+ pub(crate) gem_create_object: Option<
+ unsafe extern "C" fn(
+ dev: *mut bindings::drm_device,
+ size: usize,
+ ) -> *mut bindings::drm_gem_object,
+ >,
+ pub(crate) prime_handle_to_fd: Option<
+ unsafe extern "C" fn(
+ dev: *mut bindings::drm_device,
+ file_priv: *mut bindings::drm_file,
+ handle: u32,
+ flags: u32,
+ prime_fd: *mut core::ffi::c_int,
+ ) -> core::ffi::c_int,
+ >,
+ pub(crate) prime_fd_to_handle: Option<
+ unsafe extern "C" fn(
+ dev: *mut bindings::drm_device,
+ file_priv: *mut bindings::drm_file,
+ prime_fd: core::ffi::c_int,
+ handle: *mut u32,
+ ) -> core::ffi::c_int,
+ >,
+ pub(crate) gem_prime_import: Option<
+ unsafe extern "C" fn(
+ dev: *mut bindings::drm_device,
+ dma_buf: *mut bindings::dma_buf,
+ ) -> *mut bindings::drm_gem_object,
+ >,
+ pub(crate) gem_prime_import_sg_table: Option<
+ unsafe extern "C" fn(
+ dev: *mut bindings::drm_device,
+ attach: *mut bindings::dma_buf_attachment,
+ sgt: *mut bindings::sg_table,
+ ) -> *mut bindings::drm_gem_object,
+ >,
+ pub(crate) gem_prime_mmap: Option<
+ unsafe extern "C" fn(
+ obj: *mut bindings::drm_gem_object,
+ vma: *mut bindings::vm_area_struct,
+ ) -> core::ffi::c_int,
+ >,
+ pub(crate) dumb_create: Option<
+ unsafe extern "C" fn(
+ file_priv: *mut bindings::drm_file,
+ dev: *mut bindings::drm_device,
+ args: *mut bindings::drm_mode_create_dumb,
+ ) -> core::ffi::c_int,
+ >,
+ pub(crate) dumb_map_offset: Option<
+ unsafe extern "C" fn(
+ file_priv: *mut bindings::drm_file,
+ dev: *mut bindings::drm_device,
+ handle: u32,
+ offset: *mut u64,
+ ) -> core::ffi::c_int,
+ >,
+ pub(crate) dumb_destroy: Option<
+ unsafe extern "C" fn(
+ file_priv: *mut bindings::drm_file,
+ dev: *mut bindings::drm_device,
+ handle: u32,
+ ) -> core::ffi::c_int,
+ >,
+}
+
+/// Trait for memory manager implementations. Implemented internally.
+pub trait AllocImpl: Sealed + drm::gem::IntoGEMObject {
+ /// The C callback operations for this memory manager.
+ const ALLOC_OPS: AllocOps;
+}
+
+/// A DRM driver implementation.
+#[vtable]
+pub trait Driver {
+ /// Context data associated with the DRM driver
+ ///
+ /// Determines the type of the context data passed to each of the methods of the trait.
+ type Data: ForeignOwnable + Sync + Send;
+
+ /// The type used to manage memory for this driver.
+ ///
+ /// Should be either `drm::gem::Object<T>` or `drm::gem::shmem::Object<T>`.
+ type Object: AllocImpl;
+
+ /// The type used to represent a DRM File (client)
+ type File: drm::file::DriverFile;
+
+ /// Driver metadata
+ const INFO: DriverInfo;
+
+ /// Feature flags
+ const FEATURES: u32;
+
+ /// IOCTL list. See `kernel::drm::ioctl::declare_drm_ioctls!{}`.
+ const IOCTLS: &'static [drm::ioctl::DrmIoctlDescriptor];
+}
+
+/// A registration of a DRM device
+///
+/// # Invariants:
+///
+/// drm is always a valid pointer to an allocated drm_device
+pub struct Registration<T: Driver> {
+ drm: drm::device::Device<T>,
+ registered: bool,
+ fops: bindings::file_operations,
+ vtable: Pin<Box<bindings::drm_driver>>,
+ _p: PhantomData<T>,
+ _pin: PhantomPinned,
+}
+
+#[cfg(CONFIG_DRM_LEGACY)]
+macro_rules! drm_legacy_fields {
+ ( $($field:ident: $val:expr),* $(,)? ) => {
+ bindings::drm_driver {
+ $( $field: $val ),*,
+ firstopen: None,
+ preclose: None,
+ dma_ioctl: None,
+ dma_quiescent: None,
+ context_dtor: None,
+ irq_handler: None,
+ irq_preinstall: None,
+ irq_postinstall: None,
+ irq_uninstall: None,
+ get_vblank_counter: None,
+ enable_vblank: None,
+ disable_vblank: None,
+ dev_priv_size: 0,
+ }
+ }
+}
+
+#[cfg(not(CONFIG_DRM_LEGACY))]
+macro_rules! drm_legacy_fields {
+ ( $($field:ident: $val:expr),* $(,)? ) => {
+ bindings::drm_driver {
+ $( $field: $val ),*
+ }
+ }
+}
+
+/// Registers a DRM device with the rest of the kernel.
+///
+/// It automatically picks up THIS_MODULE.
+#[allow(clippy::crate_in_macro_def)]
+#[macro_export]
+macro_rules! drm_device_register {
+ ($reg:expr, $data:expr, $flags:expr $(,)?) => {{
+ $crate::drm::drv::Registration::register($reg, $data, $flags, &crate::THIS_MODULE)
+ }};
+}
+
+impl<T: Driver> Registration<T> {
+ const VTABLE: bindings::drm_driver = drm_legacy_fields! {
+ load: None,
+ open: Some(drm::file::open_callback::<T::File>),
+ postclose: Some(drm::file::postclose_callback::<T::File>),
+ lastclose: None,
+ unload: None,
+ release: None,
+ master_set: None,
+ master_drop: None,
+ debugfs_init: None,
+ gem_create_object: T::Object::ALLOC_OPS.gem_create_object,
+ prime_handle_to_fd: T::Object::ALLOC_OPS.prime_handle_to_fd,
+ prime_fd_to_handle: T::Object::ALLOC_OPS.prime_fd_to_handle,
+ gem_prime_import: T::Object::ALLOC_OPS.gem_prime_import,
+ gem_prime_import_sg_table: T::Object::ALLOC_OPS.gem_prime_import_sg_table,
+ gem_prime_mmap: T::Object::ALLOC_OPS.gem_prime_mmap,
+ dumb_create: T::Object::ALLOC_OPS.dumb_create,
+ dumb_map_offset: T::Object::ALLOC_OPS.dumb_map_offset,
+ dumb_destroy: T::Object::ALLOC_OPS.dumb_destroy,
+
+ major: T::INFO.major,
+ minor: T::INFO.minor,
+ patchlevel: T::INFO.patchlevel,
+ name: T::INFO.name.as_char_ptr() as *mut _,
+ desc: T::INFO.desc.as_char_ptr() as *mut _,
+ date: T::INFO.date.as_char_ptr() as *mut _,
+
+ driver_features: T::FEATURES,
+ ioctls: T::IOCTLS.as_ptr(),
+ num_ioctls: T::IOCTLS.len() as i32,
+ fops: core::ptr::null_mut(),
+ };
+
+ /// Creates a new [`Registration`] but does not register it yet.
+ ///
+ /// It is allowed to move.
+ pub fn new(parent: &dyn device::RawDevice) -> Result<Self> {
+ let vtable = Pin::new(Box::try_new(Self::VTABLE)?);
+ let raw_drm = unsafe { bindings::drm_dev_alloc(&*vtable, parent.raw_device()) };
+ let raw_drm = from_kernel_err_ptr(raw_drm)?;
+
+ // The reference count is one, and now we take ownership of that reference as a
+ // drm::device::Device.
+ let drm = unsafe { drm::device::Device::from_raw(raw_drm) };
+
+ Ok(Self {
+ drm,
+ registered: false,
+ vtable,
+ fops: drm::gem::create_fops(),
+ _pin: PhantomPinned,
+ _p: PhantomData,
+ })
+ }
+
+ /// Registers a DRM device with the rest of the kernel.
+ ///
+ /// Users are encouraged to use the [`drm_device_register!()`] macro because it automatically
+ /// picks up the current module.
+ pub fn register(
+ self: Pin<&mut Self>,
+ data: T::Data,
+ flags: usize,
+ module: &'static ThisModule,
+ ) -> Result {
+ if self.registered {
+ // Already registered.
+ return Err(EINVAL);
+ }
+
+ // SAFETY: We never move out of `this`.
+ let this = unsafe { self.get_unchecked_mut() };
+ let data_pointer = <T::Data as ForeignOwnable>::into_foreign(data);
+ // SAFETY: `drm` is valid per the type invariant
+ unsafe {
+ (*this.drm.raw_mut()).dev_private = data_pointer as *mut _;
+ }
+
+ this.fops.owner = module.0;
+ this.vtable.fops = &this.fops;
+
+ // SAFETY: The device is now initialized and ready to be registered.
+ let ret = unsafe { bindings::drm_dev_register(this.drm.raw_mut(), flags as u64) };
+ if ret < 0 {
+ // SAFETY: `data_pointer` was returned by `into_foreign` above.
+ unsafe { T::Data::from_foreign(data_pointer) };
+ return Err(Error::from_kernel_errno(ret));
+ }
+
+ this.registered = true;
+ Ok(())
+ }
+
+ /// Returns a reference to the `Device` instance for this registration.
+ pub fn device(&self) -> &drm::device::Device<T> {
+ &self.drm
+ }
+}
+
+// SAFETY: `Registration` doesn't offer any methods or access to fields when shared between threads
+// or CPUs, so it is safe to share it.
+unsafe impl<T: Driver> Sync for Registration<T> {}
+
+// SAFETY: Registration with and unregistration from the drm subsystem can happen from any thread.
+// Additionally, `T::Data` (which is dropped during unregistration) is `Send`, so it is ok to move
+// `Registration` to different threads.
+#[allow(clippy::non_send_fields_in_send_ty)]
+unsafe impl<T: Driver> Send for Registration<T> {}
+
+impl<T: Driver> Drop for Registration<T> {
+ /// Removes the registration from the kernel if it has completed successfully before.
+ fn drop(&mut self) {
+ if self.registered {
+ // Get a pointer to the data stored in device before destroying it.
+ // SAFETY: `drm` is valid per the type invariant
+ let data_pointer = unsafe { (*self.drm.raw_mut()).dev_private };
+
+ // SAFETY: Since `registered` is true, `self.drm` is both valid and registered.
+ unsafe { bindings::drm_dev_unregister(self.drm.raw_mut()) };
+
+ // Free data as well.
+ // SAFETY: `data_pointer` was returned by `into_foreign` during registration.
+ unsafe { <T::Data as ForeignOwnable>::from_foreign(data_pointer) };
+ }
+ }
+}
diff --git a/rust/kernel/drm/file.rs b/rust/kernel/drm/file.rs
new file mode 100644
index 000000000000..48751e93c38a
--- /dev/null
+++ b/rust/kernel/drm/file.rs
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+//! DRM File objects.
+//!
+//! C header: [`include/linux/drm/drm_file.h`](../../../../include/linux/drm/drm_file.h)
+
+use crate::{bindings, drm, error::Result};
+use alloc::boxed::Box;
+use core::marker::PhantomData;
+use core::ops::Deref;
+
+/// Trait that must be implemented by DRM drivers to represent a DRM File (a client instance).
+pub trait DriverFile {
+ /// The parent `Driver` implementation for this `DriverFile`.
+ type Driver: drm::drv::Driver;
+
+ /// Open a new file (called when a client opens the DRM device).
+ fn open(device: &drm::device::Device<Self::Driver>) -> Result<Box<Self>>;
+}
+
+/// An open DRM File.
+///
+/// # Invariants
+/// `raw` is a valid pointer to a `drm_file` struct.
+#[repr(transparent)]
+pub struct File<T: DriverFile> {
+ raw: *mut bindings::drm_file,
+ _p: PhantomData<T>,
+}
+
+pub(super) unsafe extern "C" fn open_callback<T: DriverFile>(
+ raw_dev: *mut bindings::drm_device,
+ raw_file: *mut bindings::drm_file,
+) -> core::ffi::c_int {
+ let drm = core::mem::ManuallyDrop::new(unsafe { drm::device::Device::from_raw(raw_dev) });
+ // SAFETY: This reference won't escape this function
+ let file = unsafe { &mut *raw_file };
+
+ let inner = match T::open(&drm) {
+ Err(e) => {
+ return e.to_kernel_errno();
+ }
+ Ok(i) => i,
+ };
+
+ file.driver_priv = Box::into_raw(inner) as *mut _;
+
+ 0
+}
+
+pub(super) unsafe extern "C" fn postclose_callback<T: DriverFile>(
+ _dev: *mut bindings::drm_device,
+ raw_file: *mut bindings::drm_file,
+) {
+ // SAFETY: This reference won't escape this function
+ let file = unsafe { &*raw_file };
+
+ // Drop the DriverFile
+ unsafe { Box::from_raw(file.driver_priv as *mut T) };
+}
+
+impl<T: DriverFile> File<T> {
+ // Not intended to be called externally, except via declare_drm_ioctls!()
+ #[doc(hidden)]
+ pub unsafe fn from_raw(raw_file: *mut bindings::drm_file) -> File<T> {
+ File {
+ raw: raw_file,
+ _p: PhantomData,
+ }
+ }
+
+ #[allow(dead_code)]
+ /// Return the raw pointer to the underlying `drm_file`.
+ pub(super) fn raw(&self) -> *const bindings::drm_file {
+ self.raw
+ }
+
+ /// Return an immutable reference to the raw `drm_file` structure.
+ pub(super) fn file(&self) -> &bindings::drm_file {
+ unsafe { &*self.raw }
+ }
+}
+
+impl<T: DriverFile> Deref for File<T> {
+ type Target = T;
+
+ fn deref(&self) -> &T {
+ unsafe { &*(self.file().driver_priv as *const T) }
+ }
+}
+
+impl<T: DriverFile> crate::private::Sealed for File<T> {}
+
+/// Generic trait to allow users that don't care about driver specifics to accept any File<T>.
+///
+/// # Safety
+/// Must only be implemented for File<T> and return the pointer, following the normal invariants
+/// of that type.
+pub unsafe trait GenericFile: crate::private::Sealed {
+ /// Returns the raw const pointer to the `struct drm_file`
+ fn raw(&self) -> *const bindings::drm_file;
+ /// Returns the raw mut pointer to the `struct drm_file`
+ fn raw_mut(&mut self) -> *mut bindings::drm_file;
+}
+
+unsafe impl<T: DriverFile> GenericFile for File<T> {
+ fn raw(&self) -> *const bindings::drm_file {
+ self.raw
+ }
+ fn raw_mut(&mut self) -> *mut bindings::drm_file {
+ self.raw
+ }
+}
diff --git a/rust/kernel/drm/gem/mod.rs b/rust/kernel/drm/gem/mod.rs
new file mode 100644
index 000000000000..c4a42bb2f718
--- /dev/null
+++ b/rust/kernel/drm/gem/mod.rs
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+//! DRM GEM API
+//!
+//! C header: [`include/linux/drm/drm_gem.h`](../../../../include/linux/drm/drm_gem.h)
+
+#[cfg(CONFIG_DRM_GEM_SHMEM_HELPER = "y")]
+pub mod shmem;
+
+use alloc::boxed::Box;
+
+use crate::{
+ bindings,
+ drm::{device, drv, file},
+ error::{to_result, Result},
+ prelude::*,
+};
+use core::{mem, mem::ManuallyDrop, ops::Deref, ops::DerefMut};
+
+/// GEM object functions, which must be implemented by drivers.
+pub trait BaseDriverObject<T: BaseObject>: Sync + Send + Sized {
+ /// Create a new driver data object for a GEM object of a given size.
+ fn new(dev: &device::Device<T::Driver>, size: usize) -> Result<Self>;
+
+ /// Open a new handle to an existing object, associated with a File.
+ fn open(
+ _obj: &<<T as IntoGEMObject>::Driver as drv::Driver>::Object,
+ _file: &file::File<<<T as IntoGEMObject>::Driver as drv::Driver>::File>,
+ ) -> Result {
+ Ok(())
+ }
+
+ /// Close a handle to an existing object, associated with a File.
+ fn close(
+ _obj: &<<T as IntoGEMObject>::Driver as drv::Driver>::Object,
+ _file: &file::File<<<T as IntoGEMObject>::Driver as drv::Driver>::File>,
+ ) {
+ }
+}
+
+/// Trait that represents a GEM object subtype
+pub trait IntoGEMObject: Sized + crate::private::Sealed {
+ /// Owning driver for this type
+ type Driver: drv::Driver;
+
+ /// Returns a reference to the raw `drm_gem_object` structure, which must be valid as long as
+ /// this owning object is valid.
+ fn gem_obj(&self) -> &bindings::drm_gem_object;
+
+ /// Returns a reference to the raw `drm_gem_object` structure, which must be valid as long as
+ /// this owning object is valid.
+ fn mut_gem_obj(&mut self) -> &mut bindings::drm_gem_object;
+
+ /// Converts a pointer to a `drm_gem_object` into a pointer to this type.
+ fn from_gem_obj(obj: *mut bindings::drm_gem_object) -> *mut Self;
+}
+
+/// Trait which must be implemented by drivers using base GEM objects.
+pub trait DriverObject: BaseDriverObject<Object<Self>> {
+ /// Parent `Driver` for this object.
+ type Driver: drv::Driver;
+}
+
+unsafe extern "C" fn free_callback<T: DriverObject>(obj: *mut bindings::drm_gem_object) {
+ // SAFETY: All of our objects are Object<T>.
+ let this = crate::container_of!(obj, Object<T>, obj) as *mut Object<T>;
+
+ // SAFETY: The pointer we got has to be valid
+ unsafe { bindings::drm_gem_object_release(obj) };
+
+ // SAFETY: All of our objects are allocated via Box<>, and we're in the
+ // free callback which guarantees this object has zero remaining references,
+ // so we can drop it
+ unsafe { Box::from_raw(this) };
+}
+
+unsafe extern "C" fn open_callback<T: BaseDriverObject<U>, U: BaseObject>(
+ raw_obj: *mut bindings::drm_gem_object,
+ raw_file: *mut bindings::drm_file,
+) -> core::ffi::c_int {
+ // SAFETY: The pointer we got has to be valid.
+ let file = unsafe {
+ file::File::<<<U as IntoGEMObject>::Driver as drv::Driver>::File>::from_raw(raw_file)
+ };
+ let obj =
+ <<<U as IntoGEMObject>::Driver as drv::Driver>::Object as IntoGEMObject>::from_gem_obj(
+ raw_obj,
+ );
+
+ // SAFETY: from_gem_obj() returns a valid pointer as long as the type is
+ // correct and the raw_obj we got is valid.
+ match T::open(unsafe { &*obj }, &file) {
+ Err(e) => e.to_kernel_errno(),
+ Ok(()) => 0,
+ }
+}
+
+unsafe extern "C" fn close_callback<T: BaseDriverObject<U>, U: BaseObject>(
+ raw_obj: *mut bindings::drm_gem_object,
+ raw_file: *mut bindings::drm_file,
+) {
+ // SAFETY: The pointer we got has to be valid.
+ let file = unsafe {
+ file::File::<<<U as IntoGEMObject>::Driver as drv::Driver>::File>::from_raw(raw_file)
+ };
+ let obj =
+ <<<U as IntoGEMObject>::Driver as drv::Driver>::Object as IntoGEMObject>::from_gem_obj(
+ raw_obj,
+ );
+
+ // SAFETY: from_gem_obj() returns a valid pointer as long as the type is
+ // correct and the raw_obj we got is valid.
+ T::close(unsafe { &*obj }, &file);
+}
+
+impl<T: DriverObject> IntoGEMObject for Object<T> {
+ type Driver = T::Driver;
+
+ fn gem_obj(&self) -> &bindings::drm_gem_object {
+ &self.obj
+ }
+
+ fn mut_gem_obj(&mut self) -> &mut bindings::drm_gem_object {
+ &mut self.obj
+ }
+
+ fn from_gem_obj(obj: *mut bindings::drm_gem_object) -> *mut Object<T> {
+ crate::container_of!(obj, Object<T>, obj) as *mut Object<T>
+ }
+}
+
+/// Base operations shared by all GEM object classes
+pub trait BaseObject: IntoGEMObject {
+ /// Returns the size of the object in bytes.
+ fn size(&self) -> usize {
+ self.gem_obj().size
+ }
+
+ /// Sets the exportable flag, which controls whether the object can be exported via PRIME.
+ fn set_exportable(&mut self, exportable: bool) {
+ self.mut_gem_obj().exportable = exportable;
+ }
+
+ /// Creates a new reference to the object.
+ fn reference(&self) -> ObjectRef<Self> {
+ // SAFETY: Having a reference to an Object implies holding a GEM reference
+ unsafe {
+ bindings::drm_gem_object_get(self.gem_obj() as *const _ as *mut _);
+ }
+ ObjectRef {
+ ptr: self as *const _,
+ }
+ }
+
+ /// Creates a new handle for the object associated with a given `File`
+ /// (or returns an existing one).
+ fn create_handle(
+ &self,
+ file: &file::File<<<Self as IntoGEMObject>::Driver as drv::Driver>::File>,
+ ) -> Result<u32> {
+ let mut handle: u32 = 0;
+ // SAFETY: The arguments are all valid per the type invariants.
+ to_result(unsafe {
+ bindings::drm_gem_handle_create(
+ file.raw() as *mut _,
+ self.gem_obj() as *const _ as *mut _,
+ &mut handle,
+ )
+ })?;
+ Ok(handle)
+ }
+
+ /// Looks up an object by its handle for a given `File`.
+ fn lookup_handle(
+ file: &file::File<<<Self as IntoGEMObject>::Driver as drv::Driver>::File>,
+ handle: u32,
+ ) -> Result<ObjectRef<Self>> {
+ // SAFETY: The arguments are all valid per the type invariants.
+ let ptr = unsafe { bindings::drm_gem_object_lookup(file.raw() as *mut _, handle) };
+
+ if ptr.is_null() {
+ Err(ENOENT)
+ } else {
+ Ok(ObjectRef {
+ ptr: ptr as *const _,
+ })
+ }
+ }
+
+ /// Creates an mmap offset to map the object from userspace.
+ fn create_mmap_offset(&self) -> Result<u64> {
+ // SAFETY: The arguments are valid per the type invariant.
+ to_result(unsafe {
+ // TODO: is this threadsafe?
+ bindings::drm_gem_create_mmap_offset(self.gem_obj() as *const _ as *mut _)
+ })?;
+ Ok(unsafe {
+ bindings::drm_vma_node_offset_addr(&self.gem_obj().vma_node as *const _ as *mut _)
+ })
+ }
+}
+
+impl<T: IntoGEMObject> BaseObject for T {}
+
+/// A base GEM object.
+#[repr(C)]
+pub struct Object<T: DriverObject> {
+ obj: bindings::drm_gem_object,
+ // The DRM core ensures the Device exists as long as its objects exist, so we don't need to
+ // manage the reference count here.
+ dev: ManuallyDrop<device::Device<T::Driver>>,
+ inner: T,
+}
+
+impl<T: DriverObject> Object<T> {
+ /// The size of this object's structure.
+ pub const SIZE: usize = mem::size_of::<Self>();
+
+ const OBJECT_FUNCS: bindings::drm_gem_object_funcs = bindings::drm_gem_object_funcs {
+ free: Some(free_callback::<T>),
+ open: Some(open_callback::<T, Object<T>>),
+ close: Some(close_callback::<T, Object<T>>),
+ print_info: None,
+ export: None,
+ pin: None,
+ unpin: None,
+ get_sg_table: None,
+ vmap: None,
+ vunmap: None,
+ mmap: None,
+ vm_ops: core::ptr::null_mut(),
+ };
+
+ /// Create a new GEM object.
+ pub fn new(dev: &device::Device<T::Driver>, size: usize) -> Result<UniqueObjectRef<Self>> {
+ let mut obj: Box<Self> = Box::try_new(Self {
+ // SAFETY: This struct is expected to be zero-initialized
+ obj: unsafe { mem::zeroed() },
+ // SAFETY: The drm subsystem guarantees that the drm_device will live as long as
+ // the GEM object lives, so we can conjure a reference out of thin air.
+ dev: ManuallyDrop::new(unsafe { device::Device::from_raw(dev.ptr) }),
+ inner: T::new(dev, size)?,
+ })?;
+
+ obj.obj.funcs = &Self::OBJECT_FUNCS;
+ to_result(unsafe {
+ bindings::drm_gem_object_init(dev.raw() as *mut _, &mut obj.obj, size)
+ })?;
+
+ let obj_ref = UniqueObjectRef {
+ ptr: Box::leak(obj),
+ };
+
+ Ok(obj_ref)
+ }
+
+ /// Returns the `Device` that owns this GEM object.
+ pub fn dev(&self) -> &device::Device<T::Driver> {
+ &self.dev
+ }
+}
+
+impl<T: DriverObject> crate::private::Sealed for Object<T> {}
+
+impl<T: DriverObject> Deref for Object<T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ &self.inner
+ }
+}
+
+impl<T: DriverObject> DerefMut for Object<T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.inner
+ }
+}
+
+impl<T: DriverObject> drv::AllocImpl for Object<T> {
+ const ALLOC_OPS: drv::AllocOps = drv::AllocOps {
+ gem_create_object: None,
+ prime_handle_to_fd: Some(bindings::drm_gem_prime_handle_to_fd),
+ prime_fd_to_handle: Some(bindings::drm_gem_prime_fd_to_handle),
+ gem_prime_import: None,
+ gem_prime_import_sg_table: None,
+ gem_prime_mmap: Some(bindings::drm_gem_prime_mmap),
+ dumb_create: None,
+ dumb_map_offset: None,
+ dumb_destroy: None,
+ };
+}
+
+/// A reference-counted shared reference to a base GEM object.
+pub struct ObjectRef<T: IntoGEMObject> {
+ // Invariant: the pointer is valid and initialized, and this ObjectRef owns a reference to it.
+ ptr: *const T,
+}
+
+/// SAFETY: GEM object references are safe to share between threads.
+unsafe impl<T: IntoGEMObject> Send for ObjectRef<T> {}
+unsafe impl<T: IntoGEMObject> Sync for ObjectRef<T> {}
+
+impl<T: IntoGEMObject> Clone for ObjectRef<T> {
+ fn clone(&self) -> Self {
+ self.reference()
+ }
+}
+
+impl<T: IntoGEMObject> Drop for ObjectRef<T> {
+ fn drop(&mut self) {
+ // SAFETY: Having an ObjectRef implies holding a GEM reference.
+ // The free callback will take care of deallocation.
+ unsafe {
+ bindings::drm_gem_object_put((*self.ptr).gem_obj() as *const _ as *mut _);
+ }
+ }
+}
+
+impl<T: IntoGEMObject> Deref for ObjectRef<T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: The pointer is valid per the invariant
+ unsafe { &*self.ptr }
+ }
+}
+
+/// A unique reference to a base GEM object.
+pub struct UniqueObjectRef<T: IntoGEMObject> {
+ // Invariant: the pointer is valid and initialized, and this ObjectRef owns the only reference
+ // to it.
+ ptr: *mut T,
+}
+
+impl<T: IntoGEMObject> UniqueObjectRef<T> {
+ /// Downgrade this reference to a shared reference.
+ pub fn into_ref(self) -> ObjectRef<T> {
+ let ptr = self.ptr as *const _;
+ core::mem::forget(self);
+
+ ObjectRef { ptr }
+ }
+}
+
+impl<T: IntoGEMObject> Drop for UniqueObjectRef<T> {
+ fn drop(&mut self) {
+ // SAFETY: Having a UniqueObjectRef implies holding a GEM
+ // reference. The free callback will take care of deallocation.
+ unsafe {
+ bindings::drm_gem_object_put((*self.ptr).gem_obj() as *const _ as *mut _);
+ }
+ }
+}
+
+impl<T: IntoGEMObject> Deref for UniqueObjectRef<T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: The pointer is valid per the invariant
+ unsafe { &*self.ptr }
+ }
+}
+
+impl<T: IntoGEMObject> DerefMut for UniqueObjectRef<T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ // SAFETY: The pointer is valid per the invariant
+ unsafe { &mut *self.ptr }
+ }
+}
+
+pub(super) fn create_fops() -> bindings::file_operations {
+ bindings::file_operations {
+ owner: core::ptr::null_mut(),
+ open: Some(bindings::drm_open),
+ release: Some(bindings::drm_release),
+ unlocked_ioctl: Some(bindings::drm_ioctl),
+ #[cfg(CONFIG_COMPAT)]
+ compat_ioctl: Some(bindings::drm_compat_ioctl),
+ #[cfg(not(CONFIG_COMPAT))]
+ compat_ioctl: None,
+ poll: Some(bindings::drm_poll),
+ read: Some(bindings::drm_read),
+ llseek: Some(bindings::noop_llseek),
+ mmap: Some(bindings::drm_gem_mmap),
+ ..Default::default()
+ }
+}
diff --git a/rust/kernel/drm/gem/shmem.rs b/rust/kernel/drm/gem/shmem.rs
new file mode 100644
index 000000000000..8f17eba0be99
--- /dev/null
+++ b/rust/kernel/drm/gem/shmem.rs
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! DRM GEM shmem helper objects
+//!
+//! C header: [`include/linux/drm/drm_gem_shmem_helper.h`](../../../../include/linux/drm/drm_gem_shmem_helper.h)
+
+use crate::drm::{device, drv, gem};
+use crate::{
+ error::{from_kernel_err_ptr, to_result},
+ prelude::*,
+};
+use core::{
+ marker::PhantomData,
+ mem,
+ mem::{ManuallyDrop, MaybeUninit},
+ ops::{Deref, DerefMut},
+ ptr::addr_of_mut,
+ slice,
+};
+
+use gem::BaseObject;
+
+/// Trait which must be implemented by drivers using shmem-backed GEM objects.
+pub trait DriverObject: gem::BaseDriverObject<Object<Self>> {
+ /// Parent `Driver` for this object.
+ type Driver: drv::Driver;
+}
+
+// FIXME: This is terrible and I don't know how to avoid it
+#[cfg(CONFIG_NUMA)]
+macro_rules! vm_numa_fields {
+ ( $($field:ident: $val:expr),* $(,)? ) => {
+ bindings::vm_operations_struct {
+ $( $field: $val ),*,
+ set_policy: None,
+ get_policy: None,
+ }
+ }
+}
+
+#[cfg(not(CONFIG_NUMA))]
+macro_rules! vm_numa_fields {
+ ( $($field:ident: $val:expr),* $(,)? ) => {
+ bindings::vm_operations_struct {
+ $( $field: $val ),*
+ }
+ }
+}
+
+const SHMEM_VM_OPS: bindings::vm_operations_struct = vm_numa_fields! {
+ open: Some(bindings::drm_gem_shmem_vm_open),
+ close: Some(bindings::drm_gem_shmem_vm_close),
+ may_split: None,
+ mremap: None,
+ mprotect: None,
+ fault: Some(bindings::drm_gem_shmem_fault),
+ huge_fault: None,
+ map_pages: None,
+ pagesize: None,
+ page_mkwrite: None,
+ pfn_mkwrite: None,
+ access: None,
+ name: None,
+ find_special_page: None,
+};
+
+/// A shmem-backed GEM object.
+#[repr(C)]
+pub struct Object<T: DriverObject> {
+ obj: bindings::drm_gem_shmem_object,
+ // The DRM core ensures the Device exists as long as its objects exist, so we don't need to
+ // manage the reference count here.
+ dev: ManuallyDrop<device::Device<T::Driver>>,
+ inner: T,
+}
+
+unsafe extern "C" fn gem_create_object<T: DriverObject>(
+ raw_dev: *mut bindings::drm_device,
+ size: usize,
+) -> *mut bindings::drm_gem_object {
+ // SAFETY: GEM ensures the device lives as long as its objects live,
+ // so we can conjure up a reference from thin air and never drop it.
+ let dev = ManuallyDrop::new(unsafe { device::Device::from_raw(raw_dev) });
+
+ let inner = match T::new(&*dev, size) {
+ Ok(v) => v,
+ Err(e) => return e.to_ptr(),
+ };
+
+ let p = unsafe {
+ bindings::krealloc(
+ core::ptr::null(),
+ Object::<T>::SIZE,
+ bindings::GFP_KERNEL | bindings::__GFP_ZERO,
+ ) as *mut Object<T>
+ };
+
+ if p.is_null() {
+ return ENOMEM.to_ptr();
+ }
+
+ // SAFETY: p is valid as long as the alloc succeeded
+ unsafe {
+ addr_of_mut!((*p).dev).write(dev);
+ addr_of_mut!((*p).inner).write(inner);
+ }
+
+ // SAFETY: drm_gem_shmem_object is safe to zero-init, and
+ // the rest of Object has been initialized
+ let new: &mut Object<T> = unsafe { &mut *(p as *mut _) };
+
+ new.obj.base.funcs = &Object::<T>::VTABLE;
+ &mut new.obj.base
+}
+
+unsafe extern "C" fn free_callback<T: DriverObject>(obj: *mut bindings::drm_gem_object) {
+ // SAFETY: All of our objects are Object<T>.
+ let p = crate::container_of!(obj, Object<T>, obj) as *mut Object<T>;
+
+ // SAFETY: p is never used after this
+ unsafe {
+ core::ptr::drop_in_place(&mut (*p).inner);
+ }
+
+ // SAFETY: This pointer has to be valid, since p is valid
+ unsafe {
+ bindings::drm_gem_shmem_free(&mut (*p).obj);
+ }
+}
+
+impl<T: DriverObject> Object<T> {
+ /// The size of this object's structure.
+ const SIZE: usize = mem::size_of::<Self>();
+
+ /// `drm_gem_object_funcs` vtable suitable for GEM shmem objects.
+ const VTABLE: bindings::drm_gem_object_funcs = bindings::drm_gem_object_funcs {
+ free: Some(free_callback::<T>),
+ open: Some(super::open_callback::<T, Object<T>>),
+ close: Some(super::close_callback::<T, Object<T>>),
+ print_info: Some(bindings::drm_gem_shmem_object_print_info),
+ export: None,
+ pin: Some(bindings::drm_gem_shmem_object_pin),
+ unpin: Some(bindings::drm_gem_shmem_object_unpin),
+ get_sg_table: Some(bindings::drm_gem_shmem_object_get_sg_table),
+ vmap: Some(bindings::drm_gem_shmem_object_vmap),
+ vunmap: Some(bindings::drm_gem_shmem_object_vunmap),
+ mmap: Some(bindings::drm_gem_shmem_object_mmap),
+ vm_ops: &SHMEM_VM_OPS,
+ };
+
+ // SAFETY: Must only be used with DRM functions that are thread-safe
+ unsafe fn mut_shmem(&self) -> *mut bindings::drm_gem_shmem_object {
+ &self.obj as *const _ as *mut _
+ }
+
+ /// Create a new shmem-backed DRM object of the given size.
+ pub fn new(dev: &device::Device<T::Driver>, size: usize) -> Result<gem::UniqueObjectRef<Self>> {
+ // SAFETY: This function can be called as long as the ALLOC_OPS are set properly
+ // for this driver, and the gem_create_object is called.
+ let p = unsafe { bindings::drm_gem_shmem_create(dev.raw() as *mut _, size) };
+ let p = crate::container_of!(p, Object<T>, obj) as *mut _;
+
+ // SAFETY: The gem_create_object callback ensures this is a valid Object<T>,
+ // so we can take a unique reference to it.
+ let obj_ref = gem::UniqueObjectRef { ptr: p };
+
+ Ok(obj_ref)
+ }
+
+ /// Returns the `Device` that owns this GEM object.
+ pub fn dev(&self) -> &device::Device<T::Driver> {
+ &self.dev
+ }
+
+ /// Creates (if necessary) and returns a scatter-gather table of DMA pages for this object.
+ ///
+ /// This will pin the object in memory.
+ pub fn sg_table(&self) -> Result<SGTable<T>> {
+ // SAFETY: drm_gem_shmem_get_pages_sgt is thread-safe.
+ let sgt = from_kernel_err_ptr(unsafe {
+ bindings::drm_gem_shmem_get_pages_sgt(self.mut_shmem())
+ })?;
+
+ Ok(SGTable {
+ sgt,
+ _owner: self.reference(),
+ })
+ }
+
+ /// Creates and returns a virtual kernel memory mapping for this object.
+ pub fn vmap(&self) -> Result<VMap<T>> {
+ let mut map: MaybeUninit<bindings::iosys_map> = MaybeUninit::uninit();
+
+ // SAFETY: drm_gem_shmem_vmap is thread-safe
+ to_result(unsafe { bindings::drm_gem_shmem_vmap(self.mut_shmem(), map.as_mut_ptr()) })?;
+
+ // SAFETY: if drm_gem_shmem_vmap did not fail, map is initialized now
+ let map = unsafe { map.assume_init() };
+
+ Ok(VMap {
+ map,
+ owner: self.reference(),
+ })
+ }
+
+ /// Set the write-combine flag for this object.
+ ///
+ /// Should be called before any mappings are made.
+ pub fn set_wc(&mut self, map_wc: bool) {
+ unsafe { (*self.mut_shmem()).map_wc = map_wc };
+ }
+}
+
+impl<T: DriverObject> Deref for Object<T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ &self.inner
+ }
+}
+
+impl<T: DriverObject> DerefMut for Object<T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.inner
+ }
+}
+
+impl<T: DriverObject> crate::private::Sealed for Object<T> {}
+
+impl<T: DriverObject> gem::IntoGEMObject for Object<T> {
+ type Driver = T::Driver;
+
+ fn gem_obj(&self) -> &bindings::drm_gem_object {
+ &self.obj.base
+ }
+
+ fn mut_gem_obj(&mut self) -> &mut bindings::drm_gem_object {
+ &mut self.obj.base
+ }
+
+ fn from_gem_obj(obj: *mut bindings::drm_gem_object) -> *mut Object<T> {
+ crate::container_of!(obj, Object<T>, obj) as *mut Object<T>
+ }
+}
+
+impl<T: DriverObject> drv::AllocImpl for Object<T> {
+ const ALLOC_OPS: drv::AllocOps = drv::AllocOps {
+ gem_create_object: Some(gem_create_object::<T>),
+ prime_handle_to_fd: Some(bindings::drm_gem_prime_handle_to_fd),
+ prime_fd_to_handle: Some(bindings::drm_gem_prime_fd_to_handle),
+ gem_prime_import: None,
+ gem_prime_import_sg_table: Some(bindings::drm_gem_shmem_prime_import_sg_table),
+ gem_prime_mmap: Some(bindings::drm_gem_prime_mmap),
+ dumb_create: Some(bindings::drm_gem_shmem_dumb_create),
+ dumb_map_offset: None,
+ dumb_destroy: None,
+ };
+}
+
+/// A virtual mapping for a shmem-backed GEM object in kernel address space.
+pub struct VMap<T: DriverObject> {
+ map: bindings::iosys_map,
+ owner: gem::ObjectRef<Object<T>>,
+}
+
+impl<T: DriverObject> VMap<T> {
+ /// Returns a const raw pointer to the start of the mapping.
+ pub fn as_ptr(&self) -> *const core::ffi::c_void {
+ // SAFETY: The shmem helpers always return non-iomem maps
+ unsafe { self.map.__bindgen_anon_1.vaddr }
+ }
+
+ /// Returns a mutable raw pointer to the start of the mapping.
+ pub fn as_mut_ptr(&mut self) -> *mut core::ffi::c_void {
+ // SAFETY: The shmem helpers always return non-iomem maps
+ unsafe { self.map.__bindgen_anon_1.vaddr }
+ }
+
+ /// Returns a byte slice view of the mapping.
+ pub fn as_slice(&self) -> &[u8] {
+ // SAFETY: The vmap maps valid memory up to the owner size
+ unsafe { slice::from_raw_parts(self.as_ptr() as *const u8, self.owner.size()) }
+ }
+
+ /// Returns mutable a byte slice view of the mapping.
+ pub fn as_mut_slice(&mut self) -> &mut [u8] {
+ // SAFETY: The vmap maps valid memory up to the owner size
+ unsafe { slice::from_raw_parts_mut(self.as_mut_ptr() as *mut u8, self.owner.size()) }
+ }
+
+ /// Borrows a reference to the object that owns this virtual mapping.
+ pub fn owner(&self) -> &gem::ObjectRef<Object<T>> {
+ &self.owner
+ }
+}
+
+impl<T: DriverObject> Drop for VMap<T> {
+ fn drop(&mut self) {
+ // SAFETY: This function is thread-safe
+ unsafe {
+ bindings::drm_gem_shmem_vunmap(self.owner.mut_shmem(), &mut self.map);
+ }
+ }
+}
+
+/// SAFETY: `iosys_map` objects are safe to send across threads.
+unsafe impl<T: DriverObject> Send for VMap<T> {}
+unsafe impl<T: DriverObject> Sync for VMap<T> {}
+
+/// A single scatter-gather entry, representing a span of pages in the device's DMA address space.
+///
+/// For devices not behind a standalone IOMMU, this corresponds to physical addresses.
+#[repr(transparent)]
+pub struct SGEntry(bindings::scatterlist);
+
+impl SGEntry {
+ /// Returns the starting DMA address of this span
+ pub fn dma_address(&self) -> usize {
+ (unsafe { bindings::sg_dma_address(&self.0) }) as usize
+ }
+
+ /// Returns the length of this span in bytes
+ pub fn dma_len(&self) -> usize {
+ (unsafe { bindings::sg_dma_len(&self.0) }) as usize
+ }
+}
+
+/// A scatter-gather table of DMA address spans for a GEM shmem object.
+///
+/// # Invariants
+/// `sgt` must be a valid pointer to the `sg_table`, which must correspond to the owned
+/// object in `_owner` (which ensures it remains valid).
+pub struct SGTable<T: DriverObject> {
+ sgt: *const bindings::sg_table,
+ _owner: gem::ObjectRef<Object<T>>,
+}
+
+impl<T: DriverObject> SGTable<T> {
+ /// Returns an iterator through the SGTable's entries
+ pub fn iter(&'_ self) -> SGTableIter<'_> {
+ SGTableIter {
+ left: unsafe { (*self.sgt).nents } as usize,
+ sg: unsafe { (*self.sgt).sgl },
+ _p: PhantomData,
+ }
+ }
+}
+
+impl<'a, T: DriverObject> IntoIterator for &'a SGTable<T> {
+ type Item = &'a SGEntry;
+ type IntoIter = SGTableIter<'a>;
+
+ fn into_iter(self) -> Self::IntoIter {
+ self.iter()
+ }
+}
+
+/// SAFETY: `sg_table` objects are safe to send across threads.
+unsafe impl<T: DriverObject> Send for SGTable<T> {}
+unsafe impl<T: DriverObject> Sync for SGTable<T> {}
+
+/// An iterator through `SGTable` entries.
+///
+/// # Invariants
+/// `sg` must be a valid pointer to the scatterlist, which must outlive our lifetime.
+pub struct SGTableIter<'a> {
+ sg: *mut bindings::scatterlist,
+ left: usize,
+ _p: PhantomData<&'a ()>,
+}
+
+impl<'a> Iterator for SGTableIter<'a> {
+ type Item = &'a SGEntry;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.left == 0 {
+ None
+ } else {
+ let sg = self.sg;
+ self.sg = unsafe { bindings::sg_next(self.sg) };
+ self.left -= 1;
+ Some(unsafe { &(*(sg as *const SGEntry)) })
+ }
+ }
+}
diff --git a/rust/kernel/drm/ioctl.rs b/rust/kernel/drm/ioctl.rs
new file mode 100644
index 000000000000..10304efbd5f1
--- /dev/null
+++ b/rust/kernel/drm/ioctl.rs
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+#![allow(non_snake_case)]
+
+//! DRM IOCTL definitions.
+//!
+//! C header: [`include/linux/drm/drm_ioctl.h`](../../../../include/linux/drm/drm_ioctl.h)
+
+use crate::ioctl;
+
+const BASE: u32 = bindings::DRM_IOCTL_BASE as u32;
+
+/// Construct a DRM ioctl number with no argument.
+pub const fn IO(nr: u32) -> u32 {
+ ioctl::_IO(BASE, nr)
+}
+
+/// Construct a DRM ioctl number with a read-only argument.
+pub const fn IOR<T>(nr: u32) -> u32 {
+ ioctl::_IOR::<T>(BASE, nr)
+}
+
+/// Construct a DRM ioctl number with a write-only argument.
+pub const fn IOW<T>(nr: u32) -> u32 {
+ ioctl::_IOW::<T>(BASE, nr)
+}
+
+/// Construct a DRM ioctl number with a read-write argument.
+pub const fn IOWR<T>(nr: u32) -> u32 {
+ ioctl::_IOWR::<T>(BASE, nr)
+}
+
+/// Descriptor type for DRM ioctls. Use the `declare_drm_ioctls!{}` macro to construct them.
+pub type DrmIoctlDescriptor = bindings::drm_ioctl_desc;
+
+/// This is for ioctl which are used for rendering, and require that the file descriptor is either
+/// for a render node, or if it’s a legacy/primary node, then it must be authenticated.
+pub const AUTH: u32 = bindings::drm_ioctl_flags_DRM_AUTH;
+
+/// This must be set for any ioctl which can change the modeset or display state. Userspace must
+/// call the ioctl through a primary node, while it is the active master.
+///
+/// Note that read-only modeset ioctl can also be called by unauthenticated clients, or when a
+/// master is not the currently active one.
+pub const MASTER: u32 = bindings::drm_ioctl_flags_DRM_MASTER;
+
+/// Anything that could potentially wreak a master file descriptor needs to have this flag set.
+///
+/// Current that’s only for the SETMASTER and DROPMASTER ioctl, which e.g. logind can call to force
+/// a non-behaving master (display compositor) into compliance.
+///
+/// This is equivalent to callers with the SYSADMIN capability.
+pub const ROOT_ONLY: u32 = bindings::drm_ioctl_flags_DRM_ROOT_ONLY;
+
+/// Whether drm_ioctl_desc.func should be called with the DRM BKL held or not. Enforced as the
+/// default for all modern drivers, hence there should never be a need to set this flag.
+///
+/// Do not use anywhere else than for the VBLANK_WAIT IOCTL, which is the only legacy IOCTL which
+/// needs this.
+pub const UNLOCKED: u32 = bindings::drm_ioctl_flags_DRM_UNLOCKED;
+
+/// This is used for all ioctl needed for rendering only, for drivers which support render nodes.
+/// This should be all new render drivers, and hence it should be always set for any ioctl with
+/// `AUTH` set. Note though that read-only query ioctl might have this set, but have not set
+/// DRM_AUTH because they do not require authentication.
+pub const RENDER_ALLOW: u32 = bindings::drm_ioctl_flags_DRM_RENDER_ALLOW;
+
+/// Declare the DRM ioctls for a driver.
+///
+/// Each entry in the list should have the form:
+///
+/// `(ioctl_number, argument_type, flags, user_callback),`
+///
+/// `argument_type` is the type name within the `bindings` crate.
+/// `user_callback` should have the following prototype:
+///
+/// ```
+/// fn foo(device: &kernel::drm::device::Device<Self>,
+/// data: &mut bindings::argument_type,
+/// file: &kernel::drm::file::File<Self::File>,
+/// )
+/// ```
+/// where `Self` is the drm::drv::Driver implementation these ioctls are being declared within.
+///
+/// # Examples
+///
+/// ```
+/// kernel::declare_drm_ioctls! {
+/// (FOO_GET_PARAM, drm_foo_get_param, ioctl::RENDER_ALLOW, my_get_param_handler),
+/// }
+/// ```
+///
+#[macro_export]
+macro_rules! declare_drm_ioctls {
+ ( $(($cmd:ident, $struct:ident, $flags:expr, $func:expr)),* $(,)? ) => {
+ const IOCTLS: &'static [$crate::drm::ioctl::DrmIoctlDescriptor] = {
+ const _:() = {
+ let i: u32 = $crate::bindings::DRM_COMMAND_BASE;
+ // Assert that all the IOCTLs are in the right order and there are no gaps,
+ // and that the sizeof of the specified type is correct.
+ $(
+ let cmd: u32 = $crate::macros::concat_idents!($crate::bindings::DRM_IOCTL_, $cmd);
+ ::core::assert!(i == $crate::ioctl::_IOC_NR(cmd));
+ ::core::assert!(core::mem::size_of::<$crate::bindings::$struct>() == $crate::ioctl::_IOC_SIZE(cmd));
+ let i: u32 = i + 1;
+ )*
+ };
+
+ let ioctls = &[$(
+ $crate::bindings::drm_ioctl_desc {
+ cmd: $crate::macros::concat_idents!($crate::bindings::DRM_IOCTL_, $cmd) as u32,
+ func: {
+ #[allow(non_snake_case)]
+ unsafe extern "C" fn $cmd(
+ raw_dev: *mut $crate::bindings::drm_device,
+ raw_data: *mut ::core::ffi::c_void,
+ raw_file_priv: *mut $crate::bindings::drm_file,
+ ) -> core::ffi::c_int {
+ // SAFETY: We never drop this, and the DRM core ensures the device lives
+ // while callbacks are being called.
+ //
+ // FIXME: Currently there is nothing enforcing that the types of the
+ // dev/file match the current driver these ioctls are being declared
+ // for, and it's not clear how to enforce this within the type system.
+ let dev = ::core::mem::ManuallyDrop::new(unsafe {
+ $crate::drm::device::Device::from_raw(raw_dev)
+ });
+ // SAFETY: This is just the ioctl argument, which hopefully has the right type
+ // (we've done our best checking the size).
+ let data = unsafe { &mut *(raw_data as *mut $crate::bindings::$struct) };
+ // SAFETY: This is just the DRM file structure
+ let file = unsafe { $crate::drm::file::File::from_raw(raw_file_priv) };
+
+ match $func(&*dev, data, &file) {
+ Err(e) => e.to_kernel_errno(),
+ Ok(i) => i.try_into().unwrap_or(ERANGE.to_kernel_errno()),
+ }
+ }
+ Some($cmd)
+ },
+ flags: $flags,
+ name: $crate::c_str!(::core::stringify!($cmd)).as_char_ptr(),
+ }
+ ),*];
+ ioctls
+ };
+ };
+}
diff --git a/rust/kernel/drm/mm.rs b/rust/kernel/drm/mm.rs
new file mode 100644
index 000000000000..83e27a7dcc7e
--- /dev/null
+++ b/rust/kernel/drm/mm.rs
@@ -0,0 +1,309 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+//! DRM MM range allocator
+//!
+//! C header: [`include/linux/drm/drm_mm.h`](../../../../include/linux/drm/drm_mm.h)
+
+use crate::{
+ bindings,
+ error::{to_result, Result},
+ str::CStr,
+ sync::{Arc, LockClassKey, LockIniter, Mutex, UniqueArc},
+ types::Opaque,
+};
+
+use alloc::boxed::Box;
+
+use core::{
+ marker::{PhantomData, PhantomPinned},
+ ops::Deref,
+ pin::Pin,
+};
+
+/// Type alias representing a DRM MM node.
+pub type Node<A, T> = Pin<Box<NodeData<A, T>>>;
+
+/// Trait which must be implemented by the inner allocator state type provided by the user.
+pub trait AllocInner<T> {
+ /// Notification that a node was dropped from the allocator.
+ fn drop_object(&mut self, _start: u64, _size: u64, _color: usize, _object: &mut T) {}
+}
+
+impl<T> AllocInner<T> for () {}
+
+/// Wrapper type for a `struct drm_mm` plus user AllocInner object.
+///
+/// # Invariants
+/// The `drm_mm` struct is valid and initialized.
+struct MmInner<A: AllocInner<T>, T>(Opaque<bindings::drm_mm>, A, PhantomData<T>);
+
+/// Represents a single allocated node in the MM allocator
+pub struct NodeData<A: AllocInner<T>, T> {
+ node: bindings::drm_mm_node,
+ mm: Arc<Mutex<MmInner<A, T>>>,
+ valid: bool,
+ /// A drm_mm_node needs to be pinned because nodes reference each other in a linked list.
+ _pin: PhantomPinned,
+ inner: T,
+}
+
+// SAFETY: Allocator ops take the mutex, and there are no mutable actions on the node.
+unsafe impl<A: Send + AllocInner<T>, T: Send> Send for NodeData<A, T> {}
+unsafe impl<A: Send + AllocInner<T>, T: Sync> Sync for NodeData<A, T> {}
+
+/// Available MM node insertion modes
+#[repr(u32)]
+pub enum InsertMode {
+ /// Search for the smallest hole (within the search range) that fits the desired node.
+ ///
+ /// Allocates the node from the bottom of the found hole.
+ Best = bindings::drm_mm_insert_mode_DRM_MM_INSERT_BEST,
+
+ /// Search for the lowest hole (address closest to 0, within the search range) that fits the
+ /// desired node.
+ ///
+ /// Allocates the node from the bottom of the found hole.
+ Low = bindings::drm_mm_insert_mode_DRM_MM_INSERT_LOW,
+
+ /// Search for the highest hole (address closest to U64_MAX, within the search range) that fits
+ /// the desired node.
+ ///
+ /// Allocates the node from the top of the found hole. The specified alignment for the node is
+ /// applied to the base of the node (`Node.start()`).
+ High = bindings::drm_mm_insert_mode_DRM_MM_INSERT_HIGH,
+
+ /// Search for the most recently evicted hole (within the search range) that fits the desired
+ /// node. This is appropriate for use immediately after performing an eviction scan and removing
+ /// the selected nodes to form a hole.
+ ///
+ /// Allocates the node from the bottom of the found hole.
+ Evict = bindings::drm_mm_insert_mode_DRM_MM_INSERT_EVICT,
+}
+
+/// A clonable, interlocked reference to the allocator state.
+///
+/// This is useful to perform actions on the user-supplied `AllocInner<T>` type given just a Node,
+/// without immediately taking the lock.
+#[derive(Clone)]
+pub struct InnerRef<A: AllocInner<T>, T>(Arc<Mutex<MmInner<A, T>>>);
+
+impl<A: AllocInner<T>, T> InnerRef<A, T> {
+ /// Operate on the user `AllocInner<T>` implementation, taking the lock.
+ pub fn with<RetVal>(&self, cb: impl FnOnce(&mut A) -> RetVal) -> RetVal {
+ let mut l = self.0.lock();
+ cb(&mut l.1)
+ }
+}
+
+impl<A: AllocInner<T>, T> NodeData<A, T> {
+ /// Returns the color of the node (an opaque value)
+ pub fn color(&self) -> usize {
+ self.node.color as usize
+ }
+
+ /// Returns the start address of the node
+ pub fn start(&self) -> u64 {
+ self.node.start
+ }
+
+ /// Returns the size of the node in bytes
+ pub fn size(&self) -> u64 {
+ self.node.size
+ }
+
+ /// Operate on the user `AllocInner<T>` implementation associated with this node's allocator.
+ pub fn with_inner<RetVal>(&self, cb: impl FnOnce(&mut A) -> RetVal) -> RetVal {
+ let mut l = self.mm.lock();
+ cb(&mut l.1)
+ }
+
+ /// Return a clonable, detached reference to the allocator inner data.
+ pub fn alloc_ref(&self) -> InnerRef<A, T> {
+ InnerRef(self.mm.clone())
+ }
+
+ /// Return a mutable reference to the inner data.
+ pub fn inner_mut(self: Pin<&mut Self>) -> &mut T {
+ // SAFETY: This is okay because inner is not structural
+ unsafe { &mut self.get_unchecked_mut().inner }
+ }
+}
+
+impl<A: AllocInner<T>, T> Deref for NodeData<A, T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ &self.inner
+ }
+}
+
+impl<A: AllocInner<T>, T> Drop for NodeData<A, T> {
+ fn drop(&mut self) {
+ if self.valid {
+ let mut guard = self.mm.lock();
+
+ // Inform the user allocator that a node is being dropped.
+ guard
+ .1
+ .drop_object(self.start(), self.size(), self.color(), &mut self.inner);
+ // SAFETY: The MM lock is still taken, so we can safely remove the node.
+ unsafe { bindings::drm_mm_remove_node(&mut self.node) };
+ }
+ }
+}
+
+/// An instance of a DRM MM range allocator.
+pub struct Allocator<A: AllocInner<T>, T> {
+ mm: Arc<Mutex<MmInner<A, T>>>,
+ _p: PhantomData<T>,
+}
+
+impl<A: AllocInner<T>, T> Allocator<A, T> {
+ /// Create a new range allocator for the given start and size range of addresses.
+ ///
+ /// The user may optionally provide an inner object representing allocator state, which will
+ /// be protected by the same lock. If not required, `()` can be used.
+ pub fn new(
+ start: u64,
+ size: u64,
+ inner: A,
+ name: &'static CStr,
+ lock_key: &'static LockClassKey,
+ ) -> Result<Allocator<A, T>> {
+ // SAFETY: We call `Mutex::init_lock` below.
+ let mut mm: Pin<UniqueArc<Mutex<MmInner<A, T>>>> = UniqueArc::try_new(unsafe {
+ Mutex::new(MmInner(Opaque::uninit(), inner, PhantomData))
+ })?
+ .into();
+
+ mm.as_mut().init_lock(name, lock_key);
+
+ unsafe {
+ // SAFETY: The Opaque instance provides a valid pointer, and it is initialized after
+ // this call.
+ bindings::drm_mm_init(mm.lock().0.get(), start, size);
+ }
+
+ Ok(Allocator {
+ mm: mm.into(),
+ _p: PhantomData,
+ })
+ }
+
+ /// Insert a new node into the allocator of a given size.
+ ///
+ /// `node` is the user `T` type data to store into the node.
+ pub fn insert_node(&mut self, node: T, size: u64) -> Result<Node<A, T>> {
+ self.insert_node_generic(node, size, 0, 0, InsertMode::Best)
+ }
+
+ /// Insert a new node into the allocator of a given size, with configurable alignment,
+ /// color, and insertion mode.
+ ///
+ /// `node` is the user `T` type data to store into the node.
+ pub fn insert_node_generic(
+ &mut self,
+ node: T,
+ size: u64,
+ alignment: u64,
+ color: usize,
+ mode: InsertMode,
+ ) -> Result<Node<A, T>> {
+ self.insert_node_in_range(node, size, alignment, color, 0, u64::MAX, mode)
+ }
+
+ /// Insert a new node into the allocator of a given size, with configurable alignment,
+ /// color, insertion mode, and sub-range to allocate from.
+ ///
+ /// `node` is the user `T` type data to store into the node.
+ #[allow(clippy::too_many_arguments)]
+ pub fn insert_node_in_range(
+ &mut self,
+ node: T,
+ size: u64,
+ alignment: u64,
+ color: usize,
+ start: u64,
+ end: u64,
+ mode: InsertMode,
+ ) -> Result<Node<A, T>> {
+ let mut mm_node = Box::try_new(NodeData {
+ // SAFETY: This C struct should be zero-initialized.
+ node: unsafe { core::mem::zeroed() },
+ valid: false,
+ inner: node,
+ mm: self.mm.clone(),
+ _pin: PhantomPinned,
+ })?;
+
+ let guard = self.mm.lock();
+ // SAFETY: We hold the lock and all pointers are valid.
+ to_result(unsafe {
+ bindings::drm_mm_insert_node_in_range(
+ guard.0.get(),
+ &mut mm_node.node,
+ size,
+ alignment,
+ color as core::ffi::c_ulong,
+ start,
+ end,
+ mode as u32,
+ )
+ })?;
+
+ mm_node.valid = true;
+
+ Ok(Pin::from(mm_node))
+ }
+
+ /// Insert a node into the allocator at a fixed start address.
+ ///
+ /// `node` is the user `T` type data to store into the node.
+ pub fn reserve_node(
+ &mut self,
+ node: T,
+ start: u64,
+ size: u64,
+ color: usize,
+ ) -> Result<Node<A, T>> {
+ let mut mm_node = Box::try_new(NodeData {
+ // SAFETY: This C struct should be zero-initialized.
+ node: unsafe { core::mem::zeroed() },
+ valid: false,
+ inner: node,
+ mm: self.mm.clone(),
+ _pin: PhantomPinned,
+ })?;
+
+ mm_node.node.start = start;
+ mm_node.node.size = size;
+ mm_node.node.color = color as core::ffi::c_ulong;
+
+ let guard = self.mm.lock();
+ // SAFETY: We hold the lock and all pointers are valid.
+ to_result(unsafe { bindings::drm_mm_reserve_node(guard.0.get(), &mut mm_node.node) })?;
+
+ mm_node.valid = true;
+
+ Ok(Pin::from(mm_node))
+ }
+
+ /// Operate on the inner user type `A`, taking the allocator lock
+ pub fn with_inner<RetVal>(&self, cb: impl FnOnce(&mut A) -> RetVal) -> RetVal {
+ let mut guard = self.mm.lock();
+ cb(&mut guard.1)
+ }
+}
+
+impl<A: AllocInner<T>, T> Drop for MmInner<A, T> {
+ fn drop(&mut self) {
+ // SAFETY: If the MmInner is dropped then all nodes are gone (since they hold references),
+ // so it is safe to tear down the allocator.
+ unsafe {
+ bindings::drm_mm_takedown(self.0.get());
+ }
+ }
+}
+
+// MmInner is safely Send if the AllocInner user type is Send.
+unsafe impl<A: Send + AllocInner<T>, T> Send for MmInner<A, T> {}
diff --git a/rust/kernel/drm/mod.rs b/rust/kernel/drm/mod.rs
new file mode 100644
index 000000000000..b1f182453ec1
--- /dev/null
+++ b/rust/kernel/drm/mod.rs
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+//! DRM subsystem abstractions.
+
+pub mod device;
+pub mod drv;
+pub mod file;
+pub mod gem;
+pub mod ioctl;
+pub mod mm;
+pub mod sched;
+pub mod syncobj;
diff --git a/rust/kernel/drm/sched.rs b/rust/kernel/drm/sched.rs
new file mode 100644
index 000000000000..a5275cc16179
--- /dev/null
+++ b/rust/kernel/drm/sched.rs
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+//! DRM Scheduler
+//!
+//! C header: [`include/linux/drm/gpu_scheduler.h`](../../../../include/linux/drm/gpu_scheduler.h)
+
+use crate::{
+ bindings, device,
+ dma_fence::*,
+ error::{to_result, Result},
+ prelude::*,
+ sync::{Arc, UniqueArc},
+};
+use alloc::boxed::Box;
+use core::marker::PhantomData;
+use core::mem::MaybeUninit;
+use core::ops::{Deref, DerefMut};
+use core::ptr::addr_of_mut;
+
+/// Scheduler status after timeout recovery
+#[repr(u32)]
+pub enum Status {
+ /// Device recovered from the timeout and can execute jobs again
+ Nominal = bindings::drm_gpu_sched_stat_DRM_GPU_SCHED_STAT_NOMINAL,
+ /// Device is no longer available
+ NoDevice = bindings::drm_gpu_sched_stat_DRM_GPU_SCHED_STAT_ENODEV,
+}
+
+/// Scheduler priorities
+#[repr(i32)]
+pub enum Priority {
+ /// Low userspace priority
+ Min = bindings::drm_sched_priority_DRM_SCHED_PRIORITY_MIN,
+ /// Normal userspace priority
+ Normal = bindings::drm_sched_priority_DRM_SCHED_PRIORITY_NORMAL,
+ /// High userspace priority
+ High = bindings::drm_sched_priority_DRM_SCHED_PRIORITY_HIGH,
+ /// Kernel priority (highest)
+ Kernel = bindings::drm_sched_priority_DRM_SCHED_PRIORITY_KERNEL,
+}
+
+/// Trait to be implemented by driver job objects.
+pub trait JobImpl: Sized {
+ /// Called when the scheduler is considering scheduling this job next, to get another Fence
+ /// for this job to block on. Once it returns None, run() may be called.
+ fn prepare(_job: &mut Job<Self>) -> Option<Fence> {
+ None // Equivalent to NULL function pointer
+ }
+
+ /// Called before job execution to check whether the hardware is free enough to run the job.
+ /// This can be used to implement more complex hardware resource policies than the hw_submission
+ /// limit.
+ fn can_run(_job: &mut Job<Self>) -> bool {
+ true
+ }
+
+ /// Called to execute the job once all of the dependencies have been resolved. This may be
+ /// called multiple times, if timed_out() has happened and drm_sched_job_recovery() decides
+ /// to try it again.
+ fn run(job: &mut Job<Self>) -> Result<Option<Fence>>;
+
+ /// Called when a job has taken too long to execute, to trigger GPU recovery.
+ ///
+ /// This method is called in a workqueue context.
+ fn timed_out(job: &mut Job<Self>) -> Status;
+}
+
+unsafe extern "C" fn prepare_job_cb<T: JobImpl>(
+ sched_job: *mut bindings::drm_sched_job,
+ _s_entity: *mut bindings::drm_sched_entity,
+) -> *mut bindings::dma_fence {
+ // SAFETY: All of our jobs are Job<T>.
+ let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>;
+
+ match T::prepare(unsafe { &mut *p }) {
+ None => core::ptr::null_mut(),
+ Some(fence) => fence.into_raw(),
+ }
+}
+
+unsafe extern "C" fn run_job_cb<T: JobImpl>(
+ sched_job: *mut bindings::drm_sched_job,
+) -> *mut bindings::dma_fence {
+ // SAFETY: All of our jobs are Job<T>.
+ let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>;
+
+ match T::run(unsafe { &mut *p }) {
+ Err(e) => e.to_ptr(),
+ Ok(None) => core::ptr::null_mut(),
+ Ok(Some(fence)) => fence.into_raw(),
+ }
+}
+
+unsafe extern "C" fn can_run_job_cb<T: JobImpl>(sched_job: *mut bindings::drm_sched_job) -> bool {
+ // SAFETY: All of our jobs are Job<T>.
+ let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>;
+
+ T::can_run(unsafe { &mut *p })
+}
+
+unsafe extern "C" fn timedout_job_cb<T: JobImpl>(
+ sched_job: *mut bindings::drm_sched_job,
+) -> bindings::drm_gpu_sched_stat {
+ // SAFETY: All of our jobs are Job<T>.
+ let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>;
+
+ T::timed_out(unsafe { &mut *p }) as bindings::drm_gpu_sched_stat
+}
+
+unsafe extern "C" fn free_job_cb<T: JobImpl>(sched_job: *mut bindings::drm_sched_job) {
+ // SAFETY: All of our jobs are Job<T>.
+ let p = crate::container_of!(sched_job, Job<T>, job) as *mut Job<T>;
+
+ // Convert the job back to a Box and drop it
+ // SAFETY: All of our Job<T>s are created inside a box.
+ unsafe { Box::from_raw(p) };
+}
+
+/// A DRM scheduler job.
+pub struct Job<T: JobImpl> {
+ job: bindings::drm_sched_job,
+ inner: T,
+}
+
+impl<T: JobImpl> Deref for Job<T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ &self.inner
+ }
+}
+
+impl<T: JobImpl> DerefMut for Job<T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.inner
+ }
+}
+
+impl<T: JobImpl> Drop for Job<T> {
+ fn drop(&mut self) {
+ // SAFETY: At this point the job has either been submitted and this is being called from
+ // `free_job_cb` above, or it hasn't and it is safe to call `drm_sched_job_cleanup`.
+ unsafe { bindings::drm_sched_job_cleanup(&mut self.job) };
+ }
+}
+
+/// A pending DRM scheduler job (not yet armed)
+pub struct PendingJob<'a, T: JobImpl>(Box<Job<T>>, PhantomData<&'a T>);
+
+impl<'a, T: JobImpl> PendingJob<'a, T> {
+ /// Add a fence as a dependency to the job
+ pub fn add_dependency(&mut self, fence: Fence) -> Result {
+ to_result(unsafe {
+ bindings::drm_sched_job_add_dependency(&mut self.0.job, fence.into_raw())
+ })
+ }
+
+ /// Arm the job to make it ready for execution
+ pub fn arm(mut self) -> ArmedJob<'a, T> {
+ unsafe { bindings::drm_sched_job_arm(&mut self.0.job) };
+ ArmedJob(self.0, PhantomData)
+ }
+}
+
+impl<'a, T: JobImpl> Deref for PendingJob<'a, T> {
+ type Target = Job<T>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl<'a, T: JobImpl> DerefMut for PendingJob<'a, T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.0
+ }
+}
+
+/// An armed DRM scheduler job (not yet submitted)
+pub struct ArmedJob<'a, T: JobImpl>(Box<Job<T>>, PhantomData<&'a T>);
+
+impl<'a, T: JobImpl> ArmedJob<'a, T> {
+ /// Returns the job fences
+ pub fn fences(&self) -> JobFences<'_> {
+ JobFences(unsafe { &mut *self.0.job.s_fence })
+ }
+
+ /// Push the job for execution into the scheduler
+ pub fn push(self) {
+ // After this point, the job is submitted and owned by the scheduler
+ let ptr = match self {
+ ArmedJob(job, _) => Box::<Job<T>>::into_raw(job),
+ };
+
+ // SAFETY: We are passing in ownership of a valid Box raw pointer.
+ unsafe { bindings::drm_sched_entity_push_job(addr_of_mut!((*ptr).job)) };
+ }
+}
+impl<'a, T: JobImpl> Deref for ArmedJob<'a, T> {
+ type Target = Job<T>;
+
+ fn deref(&self) -> &Self::Target {
+ &self.0
+ }
+}
+
+impl<'a, T: JobImpl> DerefMut for ArmedJob<'a, T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ &mut self.0
+ }
+}
+
+/// Reference to the bundle of fences attached to a DRM scheduler job
+pub struct JobFences<'a>(&'a mut bindings::drm_sched_fence);
+
+impl<'a> JobFences<'a> {
+ /// Returns a new reference to the job scheduled fence.
+ pub fn scheduled(&mut self) -> Fence {
+ unsafe { Fence::get_raw(&mut self.0.scheduled) }
+ }
+
+ /// Returns a new reference to the job finished fence.
+ pub fn finished(&mut self) -> Fence {
+ unsafe { Fence::get_raw(&mut self.0.finished) }
+ }
+}
+
+struct EntityInner<T: JobImpl> {
+ entity: bindings::drm_sched_entity,
+ // TODO: Allow users to share guilty flag between entities
+ sched: Arc<SchedulerInner<T>>,
+ guilty: bindings::atomic_t,
+ _p: PhantomData<T>,
+}
+
+impl<T: JobImpl> Drop for EntityInner<T> {
+ fn drop(&mut self) {
+ // SAFETY: The EntityInner is initialized. This will cancel/free all jobs.
+ unsafe { bindings::drm_sched_entity_destroy(&mut self.entity) };
+ }
+}
+
+// SAFETY: TODO
+unsafe impl<T: JobImpl> Sync for EntityInner<T> {}
+unsafe impl<T: JobImpl> Send for EntityInner<T> {}
+
+/// A DRM scheduler entity.
+pub struct Entity<T: JobImpl>(Pin<Box<EntityInner<T>>>);
+
+impl<T: JobImpl> Entity<T> {
+ /// Create a new scheduler entity.
+ pub fn new(sched: &Scheduler<T>, priority: Priority) -> Result<Self> {
+ let mut entity: Box<MaybeUninit<EntityInner<T>>> = Box::try_new_zeroed()?;
+
+ let mut sched_ptr = &sched.0.sched as *const _ as *mut _;
+
+ // SAFETY: The Box is allocated above and valid.
+ unsafe {
+ bindings::drm_sched_entity_init(
+ addr_of_mut!((*entity.as_mut_ptr()).entity),
+ priority as _,
+ &mut sched_ptr,
+ 1,
+ addr_of_mut!((*entity.as_mut_ptr()).guilty),
+ )
+ };
+
+ // SAFETY: The Box is allocated above and valid.
+ unsafe { addr_of_mut!((*entity.as_mut_ptr()).sched).write(sched.0.clone()) };
+
+ // SAFETY: entity is now initialized.
+ Ok(Self(Pin::from(unsafe { entity.assume_init() })))
+ }
+
+ /// Create a new job on this entity.
+ ///
+ /// The entity must outlive the pending job until it transitions into the submitted state,
+ /// after which the scheduler owns it.
+ pub fn new_job(&self, inner: T) -> Result<PendingJob<'_, T>> {
+ let mut job: Box<MaybeUninit<Job<T>>> = Box::try_new_zeroed()?;
+
+ // SAFETY: We hold a reference to the entity (which is a valid pointer),
+ // and the job object was just allocated above.
+ to_result(unsafe {
+ bindings::drm_sched_job_init(
+ addr_of_mut!((*job.as_mut_ptr()).job),
+ &self.0.as_ref().get_ref().entity as *const _ as *mut _,
+ core::ptr::null_mut(),
+ )
+ })?;
+
+ // SAFETY: The Box pointer is valid, and this initializes the inner member.
+ unsafe { addr_of_mut!((*job.as_mut_ptr()).inner).write(inner) };
+
+ // SAFETY: All fields of the Job<T> are now initialized.
+ Ok(PendingJob(unsafe { job.assume_init() }, PhantomData))
+ }
+}
+
+/// DRM scheduler inner data
+pub struct SchedulerInner<T: JobImpl> {
+ sched: bindings::drm_gpu_scheduler,
+ _p: PhantomData<T>,
+}
+
+impl<T: JobImpl> Drop for SchedulerInner<T> {
+ fn drop(&mut self) {
+ // SAFETY: The scheduler is valid. This assumes drm_sched_fini() will take care of
+ // freeing all in-progress jobs.
+ unsafe { bindings::drm_sched_fini(&mut self.sched) };
+ }
+}
+
+// SAFETY: TODO
+unsafe impl<T: JobImpl> Sync for SchedulerInner<T> {}
+unsafe impl<T: JobImpl> Send for SchedulerInner<T> {}
+
+/// A DRM Scheduler
+pub struct Scheduler<T: JobImpl>(Arc<SchedulerInner<T>>);
+
+impl<T: JobImpl> Scheduler<T> {
+ const OPS: bindings::drm_sched_backend_ops = bindings::drm_sched_backend_ops {
+ prepare_job: Some(prepare_job_cb::<T>),
+ can_run_job: Some(can_run_job_cb::<T>),
+ run_job: Some(run_job_cb::<T>),
+ timedout_job: Some(timedout_job_cb::<T>),
+ free_job: Some(free_job_cb::<T>),
+ };
+ /// Creates a new DRM Scheduler object
+ // TODO: Shared timeout workqueues & scores
+ pub fn new(
+ device: &impl device::RawDevice,
+ hw_submission: u32,
+ hang_limit: u32,
+ timeout_ms: usize,
+ name: &'static CStr,
+ ) -> Result<Scheduler<T>> {
+ let mut sched: UniqueArc<MaybeUninit<SchedulerInner<T>>> = UniqueArc::try_new_uninit()?;
+
+ // SAFETY: The drm_sched pointer is valid and pinned as it was just allocated above.
+ to_result(unsafe {
+ bindings::drm_sched_init(
+ addr_of_mut!((*sched.as_mut_ptr()).sched),
+ &Self::OPS,
+ hw_submission,
+ hang_limit,
+ bindings::msecs_to_jiffies(timeout_ms.try_into()?).try_into()?,
+ core::ptr::null_mut(),
+ core::ptr::null_mut(),
+ name.as_char_ptr(),
+ device.raw_device(),
+ )
+ })?;
+
+ // SAFETY: All fields of SchedulerInner are now initialized.
+ Ok(Scheduler(unsafe { sched.assume_init() }.into()))
+ }
+}
diff --git a/rust/kernel/drm/syncobj.rs b/rust/kernel/drm/syncobj.rs
new file mode 100644
index 000000000000..10eed05eb27a
--- /dev/null
+++ b/rust/kernel/drm/syncobj.rs
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+//! DRM Sync Objects
+//!
+//! C header: [`include/linux/drm/drm_syncobj.h`](../../../../include/linux/drm/drm_syncobj.h)
+
+use crate::{bindings, dma_fence::*, drm, error::Result, prelude::*};
+
+/// A DRM Sync Object
+///
+/// # Invariants
+/// ptr is a valid pointer to a drm_syncobj and we own a reference to it.
+pub struct SyncObj {
+ ptr: *mut bindings::drm_syncobj,
+}
+
+impl SyncObj {
+ /// Looks up a sync object by its handle for a given `File`.
+ pub fn lookup_handle(file: &impl drm::file::GenericFile, handle: u32) -> Result<SyncObj> {
+ // SAFETY: The arguments are all valid per the type invariants.
+ let ptr = unsafe { bindings::drm_syncobj_find(file.raw() as *mut _, handle) };
+
+ if ptr.is_null() {
+ Err(ENOENT)
+ } else {
+ Ok(SyncObj { ptr })
+ }
+ }
+
+ /// Returns the DMA fence associated with this sync object, if any.
+ pub fn fence_get(&self) -> Option<Fence> {
+ let fence = unsafe { bindings::drm_syncobj_fence_get(self.ptr) };
+ if fence.is_null() {
+ None
+ } else {
+ // SAFETY: The pointer is non-NULL and drm_syncobj_fence_get acquired an
+ // additional reference.
+ Some(unsafe { Fence::from_raw(fence) })
+ }
+ }
+
+ /// Replaces the DMA fence with a new one, or removes it if fence is None.
+ pub fn replace_fence(&self, fence: Option<&Fence>) {
+ unsafe {
+ bindings::drm_syncobj_replace_fence(
+ self.ptr,
+ fence.map_or(core::ptr::null_mut(), |a| a.raw()),
+ )
+ };
+ }
+
+ /// Adds a new timeline point to the syncobj.
+ pub fn add_point(&self, chain: FenceChain, fence: &Fence, point: u64) {
+ // SAFETY: All arguments should be valid per the respective type invariants.
+ // This takes over the FenceChain ownership.
+ unsafe { bindings::drm_syncobj_add_point(self.ptr, chain.into_raw(), fence.raw(), point) };
+ }
+}
+
+impl Drop for SyncObj {
+ fn drop(&mut self) {
+ // SAFETY: We own a reference to this syncobj.
+ unsafe { bindings::drm_syncobj_put(self.ptr) };
+ }
+}
+
+impl Clone for SyncObj {
+ fn clone(&self) -> Self {
+ // SAFETY: `ptr` is valid per the type invariant and we own a reference to it.
+ unsafe { bindings::drm_syncobj_get(self.ptr) };
+ SyncObj { ptr: self.ptr }
+ }
+}
+
+// SAFETY: drm_syncobj operations are internally locked.
+unsafe impl Sync for SyncObj {}
+unsafe impl Send for SyncObj {}
diff --git a/rust/kernel/error.rs b/rust/kernel/error.rs
index 5b9751d7ff1d..9b9f5e479207 100644
--- a/rust/kernel/error.rs
+++ b/rust/kernel/error.rs
@@ -4,12 +4,15 @@
//!
//! C header: [`include/uapi/asm-generic/errno-base.h`](../../../include/uapi/asm-generic/errno-base.h)
+use crate::str::CStr;
+
use alloc::{
alloc::{AllocError, LayoutError},
collections::TryReserveError,
};
use core::convert::From;
+use core::fmt;
use core::num::TryFromIntError;
use core::str::Utf8Error;
@@ -58,6 +61,117 @@ pub mod code {
declare_err!(EPIPE, "Broken pipe.");
declare_err!(EDOM, "Math argument out of domain of func.");
declare_err!(ERANGE, "Math result not representable.");
+ declare_err!(EDEADLK, "Resource deadlock would occur");
+ declare_err!(ENAMETOOLONG, "File name too long");
+ declare_err!(ENOLCK, "No record locks available");
+ declare_err!(
+ ENOSYS,
+ "Invalid system call number.",
+ "",
+ "This error code is special: arch syscall entry code will return",
+ "[`ENOSYS`] if users try to call a syscall that doesn't exist.",
+ "To keep failures of syscalls that really do exist distinguishable from",
+ "failures due to attempts to use a nonexistent syscall, syscall",
+ "implementations should refrain from returning [`ENOSYS`]."
+ );
+ declare_err!(ENOTEMPTY, "Directory not empty.");
+ declare_err!(ELOOP, "Too many symbolic links encountered.");
+ declare_err!(EWOULDBLOCK, "Operation would block.");
+ declare_err!(ENOMSG, "No message of desired type.");
+ declare_err!(EIDRM, "Identifier removed.");
+ declare_err!(ECHRNG, "Channel number out of range.");
+ declare_err!(EL2NSYNC, "Level 2 not synchronized.");
+ declare_err!(EL3HLT, "Level 3 halted.");
+ declare_err!(EL3RST, "Level 3 reset.");
+ declare_err!(ELNRNG, "Link number out of range.");
+ declare_err!(EUNATCH, "Protocol driver not attached.");
+ declare_err!(ENOCSI, "No CSI structure available.");
+ declare_err!(EL2HLT, "Level 2 halted.");
+ declare_err!(EBADE, "Invalid exchange.");
+ declare_err!(EBADR, "Invalid request descriptor.");
+ declare_err!(EXFULL, "Exchange full.");
+ declare_err!(ENOANO, "No anode.");
+ declare_err!(EBADRQC, "Invalid request code.");
+ declare_err!(EBADSLT, "Invalid slot.");
+ declare_err!(EDEADLOCK, "Resource deadlock would occur.");
+ declare_err!(EBFONT, "Bad font file format.");
+ declare_err!(ENOSTR, "Device not a stream.");
+ declare_err!(ENODATA, "No data available.");
+ declare_err!(ETIME, "Timer expired.");
+ declare_err!(ENOSR, "Out of streams resources.");
+ declare_err!(ENONET, "Machine is not on the network.");
+ declare_err!(ENOPKG, "Package not installed.");
+ declare_err!(EREMOTE, "Object is remote.");
+ declare_err!(ENOLINK, "Link has been severed.");
+ declare_err!(EADV, "Advertise error.");
+ declare_err!(ESRMNT, "Srmount error.");
+ declare_err!(ECOMM, "Communication error on send.");
+ declare_err!(EPROTO, "Protocol error.");
+ declare_err!(EMULTIHOP, "Multihop attempted.");
+ declare_err!(EDOTDOT, "RFS specific error.");
+ declare_err!(EBADMSG, "Not a data message.");
+ declare_err!(EOVERFLOW, "Value too large for defined data type.");
+ declare_err!(ENOTUNIQ, "Name not unique on network.");
+ declare_err!(EBADFD, "File descriptor in bad state.");
+ declare_err!(EREMCHG, "Remote address changed.");
+ declare_err!(ELIBACC, "Can not access a needed shared library.");
+ declare_err!(ELIBBAD, "Accessing a corrupted shared library.");
+ declare_err!(ELIBSCN, ".lib section in a.out corrupted.");
+ declare_err!(ELIBMAX, "Attempting to link in too many shared libraries.");
+ declare_err!(ELIBEXEC, "Cannot exec a shared library directly.");
+ declare_err!(EILSEQ, "Illegal byte sequence.");
+ declare_err!(ERESTART, "Interrupted system call should be restarted.");
+ declare_err!(ESTRPIPE, "Streams pipe error.");
+ declare_err!(EUSERS, "Too many users.");
+ declare_err!(ENOTSOCK, "Socket operation on non-socket.");
+ declare_err!(EDESTADDRREQ, "Destination address required.");
+ declare_err!(EMSGSIZE, "Message too long.");
+ declare_err!(EPROTOTYPE, "Protocol wrong type for socket.");
+ declare_err!(ENOPROTOOPT, "Protocol not available.");
+ declare_err!(EPROTONOSUPPORT, "Protocol not supported.");
+ declare_err!(ESOCKTNOSUPPORT, "Socket type not supported.");
+ declare_err!(EOPNOTSUPP, "Operation not supported on transport endpoint.");
+ declare_err!(EPFNOSUPPORT, "Protocol family not supported.");
+ declare_err!(EAFNOSUPPORT, "Address family not supported by protocol.");
+ declare_err!(EADDRINUSE, "Address already in use.");
+ declare_err!(EADDRNOTAVAIL, "Cannot assign requested address.");
+ declare_err!(ENETDOWN, "Network is down.");
+ declare_err!(ENETUNREACH, "Network is unreachable.");
+ declare_err!(ENETRESET, "Network dropped connection because of reset.");
+ declare_err!(ECONNABORTED, "Software caused connection abort.");
+ declare_err!(ECONNRESET, "Connection reset by peer.");
+ declare_err!(ENOBUFS, "No buffer space available.");
+ declare_err!(EISCONN, "Transport endpoint is already connected.");
+ declare_err!(ENOTCONN, "Transport endpoint is not connected.");
+ declare_err!(ESHUTDOWN, "Cannot send after transport endpoint shutdown.");
+ declare_err!(ETOOMANYREFS, "Too many references: cannot splice.");
+ declare_err!(ETIMEDOUT, "Connection timed out.");
+ declare_err!(ECONNREFUSED, "Connection refused.");
+ declare_err!(EHOSTDOWN, "Host is down.");
+ declare_err!(EHOSTUNREACH, "No route to host.");
+ declare_err!(EALREADY, "Operation already in progress.");
+ declare_err!(EINPROGRESS, "Operation now in progress.");
+ declare_err!(ESTALE, "Stale file handle.");
+ declare_err!(EUCLEAN, "Structure needs cleaning.");
+ declare_err!(ENOTNAM, "Not a XENIX named type file.");
+ declare_err!(ENAVAIL, "No XENIX semaphores available.");
+ declare_err!(EISNAM, "Is a named type file.");
+ declare_err!(EREMOTEIO, "Remote I/O error.");
+ declare_err!(EDQUOT, "Quota exceeded.");
+ declare_err!(ENOMEDIUM, "No medium found.");
+ declare_err!(EMEDIUMTYPE, "Wrong medium type.");
+ declare_err!(ECANCELED, "Operation Canceled.");
+ declare_err!(ENOKEY, "Required key not available.");
+ declare_err!(EKEYEXPIRED, "Key has expired.");
+ declare_err!(EKEYREVOKED, "Key has been revoked.");
+ declare_err!(EKEYREJECTED, "Key was rejected by service.");
+ declare_err!(EOWNERDEAD, "Owner died.", "", "For robust mutexes.");
+ declare_err!(ENOTRECOVERABLE, "State not recoverable.");
+ declare_err!(ERFKILL, "Operation not possible due to RF-kill.");
+ declare_err!(EHWPOISON, "Memory page has hardware error.");
+ declare_err!(ERESTARTSYS, "Restart the system call.");
+ declare_err!(ENOTSUPP, "Operation is not supported.");
+ declare_err!(ENOPARAM, "Parameter not supported.");
}
/// Generic integer kernel error.
@@ -72,10 +186,72 @@ pub mod code {
pub struct Error(core::ffi::c_int);
impl Error {
+ /// Creates an [`Error`] from a kernel error code.
+ ///
+ /// It is a bug to pass an out-of-range `errno`. `EINVAL` would
+ /// be returned in such a case.
+ pub(crate) fn from_kernel_errno(errno: core::ffi::c_int) -> Error {
+ if errno < -(bindings::MAX_ERRNO as i32) || errno >= 0 {
+ // TODO: Make it a `WARN_ONCE` once available.
+ crate::pr_warn!(
+ "attempted to create `Error` with out of range `errno`: {}",
+ errno
+ );
+ return code::EINVAL;
+ }
+
+ // INVARIANT: The check above ensures the type invariant
+ // will hold.
+ Error(errno)
+ }
+
/// Returns the kernel error code.
pub fn to_kernel_errno(self) -> core::ffi::c_int {
self.0
}
+
+ /// Returns the error encoded as a pointer.
+ #[allow(dead_code)]
+ pub(crate) fn to_ptr<T>(self) -> *mut T {
+ // SAFETY: Valid as long as self.0 is a valid error
+ unsafe { bindings::ERR_PTR(self.0.into()) as *mut _ }
+ }
+
+ /// Returns a string representing the error, if one exists.
+ #[cfg(not(testlib))]
+ pub fn name(&self) -> Option<&'static CStr> {
+ // SAFETY: Just an FFI call, there are no extra safety requirements.
+ let ptr = unsafe { bindings::errname(-self.0) };
+ if ptr.is_null() {
+ None
+ } else {
+ // SAFETY: The string returned by `errname` is static and `NUL`-terminated.
+ Some(unsafe { CStr::from_char_ptr(ptr) })
+ }
+ }
+
+ /// Returns a string representing the error, if one exists.
+ ///
+ /// When `testlib` is configured, this always returns `None` to avoid the dependency on a
+ /// kernel function so that tests that use this (e.g., by calling [`Result::unwrap`]) can still
+ /// run in userspace.
+ #[cfg(testlib)]
+ pub fn name(&self) -> Option<&'static CStr> {
+ None
+ }
+}
+
+impl fmt::Debug for Error {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ match self.name() {
+ // Print out number if no name can be found.
+ None => f.debug_tuple("Error").field(&-self.0).finish(),
+ // SAFETY: These strings are ASCII-only.
+ Some(name) => f
+ .debug_tuple(unsafe { core::str::from_utf8_unchecked(name) })
+ .finish(),
+ }
+ }
}
impl From<AllocError> for Error {
@@ -141,3 +317,108 @@ impl From<core::convert::Infallible> for Error {
/// it should still be modeled as returning a `Result` rather than
/// just an [`Error`].
pub type Result<T = ()> = core::result::Result<T, Error>;
+
+/// Converts an integer as returned by a C kernel function to an error if it's negative, and
+/// `Ok(())` otherwise.
+pub fn to_result(err: core::ffi::c_int) -> Result {
+ if err < 0 {
+ Err(Error::from_kernel_errno(err))
+ } else {
+ Ok(())
+ }
+}
+
+/// Transform a kernel "error pointer" to a normal pointer.
+///
+/// Some kernel C API functions return an "error pointer" which optionally
+/// embeds an `errno`. Callers are supposed to check the returned pointer
+/// for errors. This function performs the check and converts the "error pointer"
+/// to a normal pointer in an idiomatic fashion.
+///
+/// # Examples
+///
+/// ```ignore
+/// # use kernel::from_kernel_err_ptr;
+/// # use kernel::bindings;
+/// fn devm_platform_ioremap_resource(
+/// pdev: &mut PlatformDevice,
+/// index: u32,
+/// ) -> Result<*mut core::ffi::c_void> {
+/// // SAFETY: FFI call.
+/// unsafe {
+/// from_kernel_err_ptr(bindings::devm_platform_ioremap_resource(
+/// pdev.to_ptr(),
+/// index,
+/// ))
+/// }
+/// }
+/// ```
+// TODO: Remove `dead_code` marker once an in-kernel client is available.
+#[allow(dead_code)]
+pub(crate) fn from_kernel_err_ptr<T>(ptr: *mut T) -> Result<*mut T> {
+ // CAST: Casting a pointer to `*const core::ffi::c_void` is always valid.
+ let const_ptr: *const core::ffi::c_void = ptr.cast();
+ // SAFETY: The FFI function does not deref the pointer.
+ if unsafe { bindings::IS_ERR(const_ptr) } {
+ // SAFETY: The FFI function does not deref the pointer.
+ let err = unsafe { bindings::PTR_ERR(const_ptr) };
+ // CAST: If `IS_ERR()` returns `true`,
+ // then `PTR_ERR()` is guaranteed to return a
+ // negative value greater-or-equal to `-bindings::MAX_ERRNO`,
+ // which always fits in an `i16`, as per the invariant above.
+ // And an `i16` always fits in an `i32`. So casting `err` to
+ // an `i32` can never overflow, and is always valid.
+ //
+ // SAFETY: `IS_ERR()` ensures `err` is a
+ // negative value greater-or-equal to `-bindings::MAX_ERRNO`.
+ #[cfg_attr(CONFIG_ARM, allow(clippy::unnecessary_cast))]
+ return Err(Error(err as i32));
+ }
+ Ok(ptr)
+}
+
+pub(crate) fn from_kernel_result_helper<T>(r: Result<T>) -> T
+where
+ T: From<i16>,
+{
+ match r {
+ Ok(v) => v,
+ // NO-OVERFLOW: negative `errno`s are no smaller than `-bindings::MAX_ERRNO`,
+ // `-bindings::MAX_ERRNO` fits in an `i16` as per invariant above,
+ // therefore a negative `errno` always fits in an `i16` and will not overflow.
+ Err(e) => T::from(e.to_kernel_errno() as i16),
+ }
+}
+
+/// Transforms a [`crate::error::Result<T>`] to a kernel C integer result.
+///
+/// This is useful when calling Rust functions that return [`crate::error::Result<T>`]
+/// from inside `extern "C"` functions that need to return an integer
+/// error result.
+///
+/// `T` should be convertible to an `i16` via `From<i16>`.
+///
+/// # Examples
+///
+/// ```ignore
+/// # use kernel::from_kernel_result;
+/// # use kernel::bindings;
+/// unsafe extern "C" fn probe_callback(
+/// pdev: *mut bindings::platform_device,
+/// ) -> core::ffi::c_int {
+/// from_kernel_result! {
+/// let ptr = devm_alloc(pdev)?;
+/// bindings::platform_set_drvdata(pdev, ptr);
+/// Ok(0)
+/// }
+/// }
+/// ```
+macro_rules! from_kernel_result {
+ ($($tt:tt)*) => {{
+ $crate::error::from_kernel_result_helper((|| {
+ $($tt)*
+ })())
+ }};
+}
+
+pub(crate) use from_kernel_result;
diff --git a/rust/kernel/io_buffer.rs b/rust/kernel/io_buffer.rs
new file mode 100644
index 000000000000..d5a258a5ff8f
--- /dev/null
+++ b/rust/kernel/io_buffer.rs
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Buffers used in IO.
+
+use crate::error::Result;
+use alloc::vec::Vec;
+use core::mem::{size_of, MaybeUninit};
+
+/// Represents a buffer to be read from during IO.
+pub trait IoBufferReader {
+ /// Returns the number of bytes left to be read from the io buffer.
+ ///
+ /// Note that even reading less than this number of bytes may fail.
+ fn len(&self) -> usize;
+
+ /// Returns `true` if no data is available in the io buffer.
+ fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// Reads raw data from the io buffer into a raw kernel buffer.
+ ///
+ /// # Safety
+ ///
+ /// The output buffer must be valid.
+ unsafe fn read_raw(&mut self, out: *mut u8, len: usize) -> Result;
+
+ /// Reads all data remaining in the io buffer.
+ ///
+ /// Returns `EFAULT` if the address does not currently point to mapped, readable memory.
+ fn read_all(&mut self) -> Result<Vec<u8>> {
+ let mut data = Vec::<u8>::new();
+ data.try_resize(self.len(), 0)?;
+
+ // SAFETY: The output buffer is valid as we just allocated it.
+ unsafe { self.read_raw(data.as_mut_ptr(), data.len())? };
+ Ok(data)
+ }
+
+ /// Reads a byte slice from the io buffer.
+ ///
+ /// Returns `EFAULT` if the byte slice is bigger than the remaining size of the user slice or
+ /// if the address does not currently point to mapped, readable memory.
+ fn read_slice(&mut self, data: &mut [u8]) -> Result {
+ // SAFETY: The output buffer is valid as it's coming from a live reference.
+ unsafe { self.read_raw(data.as_mut_ptr(), data.len()) }
+ }
+
+ /// Reads the contents of a plain old data (POD) type from the io buffer.
+ fn read<T: ReadableFromBytes>(&mut self) -> Result<T> {
+ let mut out = MaybeUninit::<T>::uninit();
+ // SAFETY: The buffer is valid as it was just allocated.
+ unsafe { self.read_raw(out.as_mut_ptr() as _, size_of::<T>()) }?;
+ // SAFETY: We just initialised the data.
+ Ok(unsafe { out.assume_init() })
+ }
+}
+
+/// Represents a buffer to be written to during IO.
+pub trait IoBufferWriter {
+ /// Returns the number of bytes left to be written into the io buffer.
+ ///
+ /// Note that even writing less than this number of bytes may fail.
+ fn len(&self) -> usize;
+
+ /// Returns `true` if the io buffer cannot hold any additional data.
+ fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+
+ /// Writes zeroes to the io buffer.
+ ///
+ /// Differently from the other write functions, `clear` will zero as much as it can and update
+ /// the writer internal state to reflect this. It will, however, return an error if it cannot
+ /// clear `len` bytes.
+ ///
+ /// For example, if a caller requests that 100 bytes be cleared but a segfault happens after
+ /// 20 bytes, then EFAULT is returned and the writer is advanced by 20 bytes.
+ fn clear(&mut self, len: usize) -> Result;
+
+ /// Writes a byte slice into the io buffer.
+ ///
+ /// Returns `EFAULT` if the byte slice is bigger than the remaining size of the io buffer or if
+ /// the address does not currently point to mapped, writable memory.
+ fn write_slice(&mut self, data: &[u8]) -> Result {
+ // SAFETY: The input buffer is valid as it's coming from a live reference.
+ unsafe { self.write_raw(data.as_ptr(), data.len()) }
+ }
+
+ /// Writes raw data to the io buffer from a raw kernel buffer.
+ ///
+ /// # Safety
+ ///
+ /// The input buffer must be valid.
+ unsafe fn write_raw(&mut self, data: *const u8, len: usize) -> Result;
+
+ /// Writes the contents of the given data into the io buffer.
+ fn write<T: WritableToBytes>(&mut self, data: &T) -> Result {
+ // SAFETY: The input buffer is valid as it's coming from a live
+ // reference to a type that implements `WritableToBytes`.
+ unsafe { self.write_raw(data as *const T as _, size_of::<T>()) }
+ }
+}
+
+/// Specifies that a type is safely readable from byte slices.
+///
+/// Not all types can be safely read from byte slices; examples from
+/// <https://doc.rust-lang.org/reference/behavior-considered-undefined.html> include `bool`
+/// that must be either `0` or `1`, and `char` that cannot be a surrogate or above `char::MAX`.
+///
+/// # Safety
+///
+/// Implementers must ensure that the type is made up only of types that can be safely read from
+/// arbitrary byte sequences (e.g., `u32`, `u64`, etc.).
+pub unsafe trait ReadableFromBytes {}
+
+// SAFETY: All bit patterns are acceptable values of the types below.
+unsafe impl ReadableFromBytes for u8 {}
+unsafe impl ReadableFromBytes for u16 {}
+unsafe impl ReadableFromBytes for u32 {}
+unsafe impl ReadableFromBytes for u64 {}
+unsafe impl ReadableFromBytes for usize {}
+unsafe impl ReadableFromBytes for i8 {}
+unsafe impl ReadableFromBytes for i16 {}
+unsafe impl ReadableFromBytes for i32 {}
+unsafe impl ReadableFromBytes for i64 {}
+unsafe impl ReadableFromBytes for isize {}
+
+/// Specifies that a type is safely writable to byte slices.
+///
+/// This means that we don't read undefined values (which leads to UB) in preparation for writing
+/// to the byte slice. It also ensures that no potentially sensitive information is leaked into the
+/// byte slices.
+///
+/// # Safety
+///
+/// A type must not include padding bytes and must be fully initialised to safely implement
+/// [`WritableToBytes`] (i.e., it doesn't contain [`MaybeUninit`] fields). A composition of
+/// writable types in a structure is not necessarily writable because it may result in padding
+/// bytes.
+pub unsafe trait WritableToBytes {}
+
+// SAFETY: Initialised instances of the following types have no uninitialised portions.
+unsafe impl WritableToBytes for u8 {}
+unsafe impl WritableToBytes for u16 {}
+unsafe impl WritableToBytes for u32 {}
+unsafe impl WritableToBytes for u64 {}
+unsafe impl WritableToBytes for usize {}
+unsafe impl WritableToBytes for i8 {}
+unsafe impl WritableToBytes for i16 {}
+unsafe impl WritableToBytes for i32 {}
+unsafe impl WritableToBytes for i64 {}
+unsafe impl WritableToBytes for isize {}
diff --git a/rust/kernel/io_mem.rs b/rust/kernel/io_mem.rs
new file mode 100644
index 000000000000..5bb8800b04f5
--- /dev/null
+++ b/rust/kernel/io_mem.rs
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Memory-mapped IO.
+//!
+//! C header: [`include/asm-generic/io.h`](../../../../include/asm-generic/io.h)
+
+#![allow(dead_code)]
+
+use crate::{bindings, error::code::*, error::Result};
+use core::convert::TryInto;
+
+/// The type of `Resource`.
+pub enum IoResource {
+ /// i/o memory
+ Mem = bindings::IORESOURCE_MEM as _,
+}
+
+/// Represents a memory resource.
+pub struct Resource {
+ offset: bindings::resource_size_t,
+ size: bindings::resource_size_t,
+ flags: core::ffi::c_ulong,
+}
+
+impl Resource {
+ pub(crate) fn new(
+ start: bindings::resource_size_t,
+ end: bindings::resource_size_t,
+ flags: core::ffi::c_ulong,
+ ) -> Option<Self> {
+ if start == 0 {
+ return None;
+ }
+ Some(Self {
+ offset: start,
+ size: end.checked_sub(start)?.checked_add(1)?,
+ flags,
+ })
+ }
+}
+
+/// Represents a memory block of at least `SIZE` bytes.
+///
+/// # Invariants
+///
+/// `ptr` is a non-null and valid address of at least `SIZE` bytes and returned by an `ioremap`
+/// variant. `ptr` is also 8-byte aligned.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::prelude::*;
+/// use kernel::io_mem::{IoMem, Resource};
+///
+/// fn test(res: Resource) -> Result {
+/// // Create an io mem block of at least 100 bytes.
+/// // SAFETY: No DMA operations are initiated through `mem`.
+/// let mem = unsafe { IoMem::<100>::try_new(res) }?;
+///
+/// // Read one byte from offset 10.
+/// let v = mem.readb(10);
+///
+/// // Write value to offset 20.
+/// mem.writeb(v, 20);
+///
+/// Ok(())
+/// }
+/// ```
+pub struct IoMem<const SIZE: usize> {
+ ptr: usize,
+}
+
+macro_rules! define_read {
+ ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => {
+ /// Reads IO data from the given offset known, at compile time.
+ ///
+ /// If the offset is not known at compile time, the build will fail.
+ $(#[$attr])*
+ #[inline]
+ pub fn $name(&self, offset: usize) -> $type_name {
+ Self::check_offset::<$type_name>(offset);
+ let ptr = self.ptr.wrapping_add(offset);
+ // SAFETY: The type invariants guarantee that `ptr` is a valid pointer. The check above
+ // guarantees that the code won't build if `offset` makes the read go out of bounds
+ // (including the type size).
+ unsafe { bindings::$name(ptr as _) }
+ }
+
+ /// Reads IO data from the given offset.
+ ///
+ /// It fails if/when the offset (plus the type size) is out of bounds.
+ $(#[$attr])*
+ pub fn $try_name(&self, offset: usize) -> Result<$type_name> {
+ if !Self::offset_ok::<$type_name>(offset) {
+ return Err(EINVAL);
+ }
+ let ptr = self.ptr.wrapping_add(offset);
+ // SAFETY: The type invariants guarantee that `ptr` is a valid pointer. The check above
+ // returns an error if `offset` would make the read go out of bounds (including the
+ // type size).
+ Ok(unsafe { bindings::$name(ptr as _) })
+ }
+ };
+}
+
+macro_rules! define_write {
+ ($(#[$attr:meta])* $name:ident, $try_name:ident, $type_name:ty) => {
+ /// Writes IO data to the given offset, known at compile time.
+ ///
+ /// If the offset is not known at compile time, the build will fail.
+ $(#[$attr])*
+ #[inline]
+ pub fn $name(&self, value: $type_name, offset: usize) {
+ Self::check_offset::<$type_name>(offset);
+ let ptr = self.ptr.wrapping_add(offset);
+ // SAFETY: The type invariants guarantee that `ptr` is a valid pointer. The check above
+ // guarantees that the code won't link if `offset` makes the write go out of bounds
+ // (including the type size).
+ unsafe { bindings::$name(value, ptr as _) }
+ }
+
+ /// Writes IO data to the given offset.
+ ///
+ /// It fails if/when the offset (plus the type size) is out of bounds.
+ $(#[$attr])*
+ pub fn $try_name(&self, value: $type_name, offset: usize) -> Result {
+ if !Self::offset_ok::<$type_name>(offset) {
+ return Err(EINVAL);
+ }
+ let ptr = self.ptr.wrapping_add(offset);
+ // SAFETY: The type invariants guarantee that `ptr` is a valid pointer. The check above
+ // returns an error if `offset` would make the write go out of bounds (including the
+ // type size).
+ unsafe { bindings::$name(value, ptr as _) };
+ Ok(())
+ }
+ };
+}
+
+impl<const SIZE: usize> IoMem<SIZE> {
+ /// Tries to create a new instance of a memory block.
+ ///
+ /// The resource described by `res` is mapped into the CPU's address space so that it can be
+ /// accessed directly. It is also consumed by this function so that it can't be mapped again
+ /// to a different address.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that either (a) the resulting interface cannot be used to initiate DMA
+ /// operations, or (b) that DMA operations initiated via the returned interface use DMA handles
+ /// allocated through the `dma` module.
+ pub unsafe fn try_new(res: Resource) -> Result<Self> {
+ // Check that the resource has at least `SIZE` bytes in it.
+ if res.size < SIZE.try_into()? {
+ return Err(EINVAL);
+ }
+
+ // To be able to check pointers at compile time based only on offsets, we need to guarantee
+ // that the base pointer is minimally aligned. So we conservatively expect at least 8 bytes.
+ if res.offset % 8 != 0 {
+ crate::pr_err!("Physical address is not 64-bit aligned: {:x}", res.offset);
+ return Err(EDOM);
+ }
+
+ // Try to map the resource.
+ // SAFETY: Just mapping the memory range.
+ let addr = if res.flags & (bindings::IORESOURCE_MEM_NONPOSTED as core::ffi::c_ulong) != 0 {
+ unsafe { bindings::ioremap_np(res.offset, res.size as _) }
+ } else {
+ unsafe { bindings::ioremap(res.offset, res.size as _) }
+ };
+
+ if addr.is_null() {
+ Err(ENOMEM)
+ } else {
+ // INVARIANT: `addr` is non-null and was returned by `ioremap`, so it is valid. It is
+ // also 8-byte aligned because we checked it above.
+ Ok(Self { ptr: addr as usize })
+ }
+ }
+
+ #[inline]
+ const fn offset_ok<T>(offset: usize) -> bool {
+ let type_size = core::mem::size_of::<T>();
+ if let Some(end) = offset.checked_add(type_size) {
+ end <= SIZE && offset % type_size == 0
+ } else {
+ false
+ }
+ }
+
+ fn offset_ok_of_val<T: ?Sized>(offset: usize, value: &T) -> bool {
+ let value_size = core::mem::size_of_val(value);
+ let value_alignment = core::mem::align_of_val(value);
+ if let Some(end) = offset.checked_add(value_size) {
+ end <= SIZE && offset % value_alignment == 0
+ } else {
+ false
+ }
+ }
+
+ #[inline]
+ const fn check_offset<T>(offset: usize) {
+ crate::build_assert!(Self::offset_ok::<T>(offset), "IoMem offset overflow");
+ }
+
+ /// Copy memory block from an i/o memory by filling the specified buffer with it.
+ ///
+ /// # Examples
+ /// ```
+ /// use kernel::io_mem::{self, IoMem, Resource};
+ ///
+ /// fn test(res: Resource) -> Result {
+ /// // Create an i/o memory block of at least 100 bytes.
+ /// let mem = unsafe { IoMem::<100>::try_new(res) }?;
+ ///
+ /// let mut buffer: [u8; 32] = [0; 32];
+ ///
+ /// // Memcpy 16 bytes from an offset 10 of i/o memory block into the buffer.
+ /// mem.try_memcpy_fromio(&mut buffer[..16], 10)?;
+ ///
+ /// Ok(())
+ /// }
+ /// ```
+ pub fn try_memcpy_fromio(&self, buffer: &mut [u8], offset: usize) -> Result {
+ if !Self::offset_ok_of_val(offset, buffer) {
+ return Err(EINVAL);
+ }
+
+ let ptr = self.ptr.wrapping_add(offset);
+
+ // SAFETY:
+ // - The type invariants guarantee that `ptr` is a valid pointer.
+ // - The bounds of `buffer` are checked with a call to `offset_ok_of_val()`.
+ unsafe {
+ bindings::memcpy_fromio(
+ buffer.as_mut_ptr() as *mut _,
+ ptr as *const _,
+ buffer.len() as _,
+ )
+ };
+ Ok(())
+ }
+
+ define_read!(readb, try_readb, u8);
+ define_read!(readw, try_readw, u16);
+ define_read!(readl, try_readl, u32);
+ define_read!(
+ #[cfg(CONFIG_64BIT)]
+ readq,
+ try_readq,
+ u64
+ );
+
+ define_read!(readb_relaxed, try_readb_relaxed, u8);
+ define_read!(readw_relaxed, try_readw_relaxed, u16);
+ define_read!(readl_relaxed, try_readl_relaxed, u32);
+ define_read!(
+ #[cfg(CONFIG_64BIT)]
+ readq_relaxed,
+ try_readq_relaxed,
+ u64
+ );
+
+ define_write!(writeb, try_writeb, u8);
+ define_write!(writew, try_writew, u16);
+ define_write!(writel, try_writel, u32);
+ define_write!(
+ #[cfg(CONFIG_64BIT)]
+ writeq,
+ try_writeq,
+ u64
+ );
+
+ define_write!(writeb_relaxed, try_writeb_relaxed, u8);
+ define_write!(writew_relaxed, try_writew_relaxed, u16);
+ define_write!(writel_relaxed, try_writel_relaxed, u32);
+ define_write!(
+ #[cfg(CONFIG_64BIT)]
+ writeq_relaxed,
+ try_writeq_relaxed,
+ u64
+ );
+}
+
+impl<const SIZE: usize> Drop for IoMem<SIZE> {
+ fn drop(&mut self) {
+ // SAFETY: By the type invariant, `self.ptr` is a value returned by a previous successful
+ // call to `ioremap`.
+ unsafe { bindings::iounmap(self.ptr as _) };
+ }
+}
diff --git a/rust/kernel/io_pgtable.rs b/rust/kernel/io_pgtable.rs
new file mode 100644
index 000000000000..4c90adefcd24
--- /dev/null
+++ b/rust/kernel/io_pgtable.rs
@@ -0,0 +1,353 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! IOMMU page table management
+//!
+//! C header: [`include/io-pgtable.h`](../../../../include/io-pgtable.h)
+
+use crate::{
+ bindings, device,
+ error::{code::*, to_result, Result},
+ types::{ForeignOwnable, ScopeGuard},
+};
+
+use core::marker::PhantomData;
+use core::mem;
+use core::num::NonZeroU64;
+
+/// Protection flags used with IOMMU mappings.
+pub mod prot {
+ /// Read access.
+ pub const READ: u32 = bindings::IOMMU_READ;
+ /// Write access.
+ pub const WRITE: u32 = bindings::IOMMU_WRITE;
+ /// Request cache coherency.
+ pub const CACHE: u32 = bindings::IOMMU_CACHE;
+ /// Request no-execute permission.
+ pub const NOEXEC: u32 = bindings::IOMMU_NOEXEC;
+ /// MMIO peripheral mapping.
+ pub const MMIO: u32 = bindings::IOMMU_MMIO;
+ /// Privileged mapping.
+ pub const PRIV: u32 = bindings::IOMMU_PRIV;
+}
+
+/// Represents a requested io_pgtable configuration.
+pub struct Config {
+ /// Quirk bitmask (type-specific).
+ pub quirks: usize,
+ /// Valid page sizes, as a bitmask of powers of two.
+ pub pgsize_bitmap: usize,
+ /// Input address space size in bits.
+ pub ias: usize,
+ /// Output address space size in bits.
+ pub oas: usize,
+ /// IOMMU uses coherent accesses for page table walks.
+ pub coherent_walk: bool,
+}
+
+/// IOMMU callbacks for TLB and page table management.
+///
+/// Users must implement this trait to perform the TLB flush actions for this IOMMU, if
+/// required.
+pub trait FlushOps {
+ /// User-specified type owned by the IOPagetable that will be passed to TLB operations.
+ type Data: ForeignOwnable + Send + Sync;
+
+ /// Synchronously invalidate the entire TLB context.
+ fn tlb_flush_all(data: <Self::Data as ForeignOwnable>::Borrowed<'_>);
+
+ /// Synchronously invalidate all intermediate TLB state (sometimes referred to as the "walk
+ /// cache") for a virtual address range.
+ fn tlb_flush_walk(
+ data: <Self::Data as ForeignOwnable>::Borrowed<'_>,
+ iova: usize,
+ size: usize,
+ granule: usize,
+ );
+
+ /// Optional callback to queue up leaf TLB invalidation for a single page.
+ ///
+ /// IOMMUs that cannot batch TLB invalidation operations efficiently will typically issue
+ /// them here, but others may decide to update the iommu_iotlb_gather structure and defer
+ /// the invalidation until iommu_iotlb_sync() instead.
+ ///
+ /// TODO: Implement the gather argument for batching.
+ fn tlb_add_page(
+ data: <Self::Data as ForeignOwnable>::Borrowed<'_>,
+ iova: usize,
+ granule: usize,
+ );
+}
+
+/// Inner page table info shared across all table types.
+/// # Invariants
+///
+/// - [`self.ops`] is valid and non-null.
+/// - [`self.cfg`] is valid and non-null.
+#[doc(hidden)]
+pub struct IoPageTableInner {
+ ops: *mut bindings::io_pgtable_ops,
+ cfg: bindings::io_pgtable_cfg,
+ data: *mut core::ffi::c_void,
+}
+
+/// Helper trait to get the config type for a single page table type from the union.
+pub trait GetConfig {
+ /// Returns the specific output configuration for this page table type.
+ fn cfg(iopt: &impl IoPageTable) -> &Self
+ where
+ Self: Sized;
+}
+
+/// A generic IOMMU page table
+pub trait IoPageTable: crate::private::Sealed {
+ #[doc(hidden)]
+ const FLUSH_OPS: bindings::iommu_flush_ops;
+
+ #[doc(hidden)]
+ fn new_fmt<T: FlushOps>(
+ dev: &dyn device::RawDevice,
+ format: u32,
+ config: Config,
+ data: T::Data,
+ ) -> Result<IoPageTableInner> {
+ let ptr = data.into_foreign() as *mut _;
+ let guard = ScopeGuard::new(|| {
+ // SAFETY: `ptr` came from a previous call to `into_foreign`.
+ unsafe { T::Data::from_foreign(ptr) };
+ });
+
+ let mut raw_cfg = bindings::io_pgtable_cfg {
+ quirks: config.quirks.try_into()?,
+ pgsize_bitmap: config.pgsize_bitmap.try_into()?,
+ ias: config.ias.try_into()?,
+ oas: config.oas.try_into()?,
+ coherent_walk: config.coherent_walk,
+ tlb: &Self::FLUSH_OPS,
+ iommu_dev: dev.raw_device(),
+ __bindgen_anon_1: unsafe { mem::zeroed() },
+ };
+
+ let ops = unsafe {
+ bindings::alloc_io_pgtable_ops(format as bindings::io_pgtable_fmt, &mut raw_cfg, ptr)
+ };
+
+ if ops.is_null() {
+ return Err(EINVAL);
+ }
+
+ guard.dismiss();
+ Ok(IoPageTableInner {
+ ops,
+ cfg: raw_cfg,
+ data: ptr,
+ })
+ }
+
+ /// Map a range of pages.
+ fn map_pages(
+ &mut self,
+ iova: usize,
+ paddr: usize,
+ pgsize: usize,
+ pgcount: usize,
+ prot: u32,
+ ) -> Result<usize> {
+ let mut mapped: usize = 0;
+
+ to_result(unsafe {
+ (*self.inner_mut().ops).map_pages.unwrap()(
+ self.inner_mut().ops,
+ iova as u64,
+ paddr as u64,
+ pgsize,
+ pgcount,
+ prot as i32,
+ bindings::GFP_KERNEL,
+ &mut mapped,
+ )
+ })?;
+
+ Ok(mapped)
+ }
+
+ /// Unmap a range of pages.
+ fn unmap_pages(
+ &mut self,
+ iova: usize,
+ pgsize: usize,
+ pgcount: usize,
+ // TODO: gather: *mut iommu_iotlb_gather,
+ ) -> usize {
+ unsafe {
+ (*self.inner_mut().ops).unmap_pages.unwrap()(
+ self.inner_mut().ops,
+ iova as u64,
+ pgsize,
+ pgcount,
+ core::ptr::null_mut(),
+ )
+ }
+ }
+
+ /// Translate an IOVA to the corresponding physical address, if mapped.
+ fn iova_to_phys(&mut self, iova: usize) -> Option<NonZeroU64> {
+ NonZeroU64::new(unsafe {
+ (*self.inner_mut().ops).iova_to_phys.unwrap()(self.inner_mut().ops, iova as u64)
+ })
+ }
+
+ #[doc(hidden)]
+ fn inner_mut(&mut self) -> &mut IoPageTableInner;
+
+ #[doc(hidden)]
+ fn inner(&self) -> &IoPageTableInner;
+
+ #[doc(hidden)]
+ fn raw_cfg(&self) -> &bindings::io_pgtable_cfg {
+ &self.inner().cfg
+ }
+}
+
+unsafe impl Send for IoPageTableInner {}
+unsafe impl Sync for IoPageTableInner {}
+
+unsafe extern "C" fn tlb_flush_all_callback<T: FlushOps>(cookie: *mut core::ffi::c_void) {
+ T::tlb_flush_all(unsafe { T::Data::borrow(cookie) });
+}
+
+unsafe extern "C" fn tlb_flush_walk_callback<T: FlushOps>(
+ iova: core::ffi::c_ulong,
+ size: usize,
+ granule: usize,
+ cookie: *mut core::ffi::c_void,
+) {
+ T::tlb_flush_walk(
+ unsafe { T::Data::borrow(cookie) },
+ iova as usize,
+ size,
+ granule,
+ );
+}
+
+unsafe extern "C" fn tlb_add_page_callback<T: FlushOps>(
+ _gather: *mut bindings::iommu_iotlb_gather,
+ iova: core::ffi::c_ulong,
+ granule: usize,
+ cookie: *mut core::ffi::c_void,
+) {
+ T::tlb_add_page(unsafe { T::Data::borrow(cookie) }, iova as usize, granule);
+}
+
+macro_rules! iopt_cfg {
+ ($name:ident, $field:ident, $type:ident) => {
+ /// An IOMMU page table configuration for a specific kind of pagetable.
+ pub type $name = bindings::$type;
+
+ impl GetConfig for $name {
+ fn cfg(iopt: &impl IoPageTable) -> &$name {
+ unsafe { &iopt.raw_cfg().__bindgen_anon_1.$field }
+ }
+ }
+ };
+}
+
+impl GetConfig for () {
+ fn cfg(_iopt: &impl IoPageTable) -> &() {
+ &()
+ }
+}
+
+macro_rules! iopt_type {
+ ($type:ident, $cfg:ty, $fmt:ident) => {
+ /// Represents an IOPagetable of this type.
+ pub struct $type<T: FlushOps>(IoPageTableInner, PhantomData<T>);
+
+ impl<T: FlushOps> $type<T> {
+ /// Creates a new IOPagetable implementation of this type.
+ pub fn new(dev: &dyn device::RawDevice, config: Config, data: T::Data) -> Result<Self> {
+ Ok(Self(
+ <Self as IoPageTable>::new_fmt::<T>(dev, bindings::$fmt, config, data)?,
+ PhantomData,
+ ))
+ }
+
+ /// Get the configuration for this IOPagetable.
+ pub fn cfg(&self) -> &$cfg {
+ <$cfg as GetConfig>::cfg(self)
+ }
+ }
+
+ impl<T: FlushOps> crate::private::Sealed for $type<T> {}
+
+ impl<T: FlushOps> IoPageTable for $type<T> {
+ const FLUSH_OPS: bindings::iommu_flush_ops = bindings::iommu_flush_ops {
+ tlb_flush_all: Some(tlb_flush_all_callback::<T>),
+ tlb_flush_walk: Some(tlb_flush_walk_callback::<T>),
+ tlb_add_page: Some(tlb_add_page_callback::<T>),
+ };
+
+ fn inner(&self) -> &IoPageTableInner {
+ &self.0
+ }
+
+ fn inner_mut(&mut self) -> &mut IoPageTableInner {
+ &mut self.0
+ }
+ }
+
+ impl<T: FlushOps> Drop for $type<T> {
+ fn drop(&mut self) {
+ // SAFETY: The pointer is valid by the type invariant.
+ unsafe { bindings::free_io_pgtable_ops(self.0.ops) };
+
+ // Free context data.
+ //
+ // SAFETY: This matches the call to `into_foreign` from `new` in the success case.
+ unsafe { T::Data::from_foreign(self.0.data) };
+ }
+ }
+ };
+}
+
+// Ew...
+iopt_cfg!(
+ ARMLPAES1Cfg,
+ arm_lpae_s1_cfg,
+ io_pgtable_cfg__bindgen_ty_1__bindgen_ty_1
+);
+iopt_cfg!(
+ ARMLPAES2Cfg,
+ arm_lpae_s2_cfg,
+ io_pgtable_cfg__bindgen_ty_1__bindgen_ty_2
+);
+iopt_cfg!(
+ ARMv7SCfg,
+ arm_v7s_cfg,
+ io_pgtable_cfg__bindgen_ty_1__bindgen_ty_3
+);
+iopt_cfg!(
+ ARMMaliLPAECfg,
+ arm_mali_lpae_cfg,
+ io_pgtable_cfg__bindgen_ty_1__bindgen_ty_4
+);
+iopt_cfg!(
+ AppleDARTCfg,
+ apple_dart_cfg,
+ io_pgtable_cfg__bindgen_ty_1__bindgen_ty_5
+);
+iopt_cfg!(
+ AppleUATCfg,
+ apple_uat_cfg,
+ io_pgtable_cfg__bindgen_ty_1__bindgen_ty_6
+);
+
+iopt_type!(ARM32LPAES1, ARMLPAES1Cfg, io_pgtable_fmt_ARM_32_LPAE_S1);
+iopt_type!(ARM32LPAES2, ARMLPAES2Cfg, io_pgtable_fmt_ARM_32_LPAE_S2);
+iopt_type!(ARM64LPAES1, ARMLPAES1Cfg, io_pgtable_fmt_ARM_64_LPAE_S1);
+iopt_type!(ARM64LPAES2, ARMLPAES2Cfg, io_pgtable_fmt_ARM_64_LPAE_S2);
+iopt_type!(ARMv7S, ARMv7SCfg, io_pgtable_fmt_ARM_V7S);
+iopt_type!(ARMMaliLPAE, ARMMaliLPAECfg, io_pgtable_fmt_ARM_MALI_LPAE);
+iopt_type!(AMDIOMMUV1, (), io_pgtable_fmt_AMD_IOMMU_V1);
+iopt_type!(AppleDART, AppleDARTCfg, io_pgtable_fmt_APPLE_DART);
+iopt_type!(AppleDART2, AppleDARTCfg, io_pgtable_fmt_APPLE_DART2);
+iopt_type!(AppleUAT, AppleUATCfg, io_pgtable_fmt_APPLE_UAT);
diff --git a/rust/kernel/ioctl.rs b/rust/kernel/ioctl.rs
new file mode 100644
index 000000000000..6cd8e5738b91
--- /dev/null
+++ b/rust/kernel/ioctl.rs
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+#![allow(non_snake_case)]
+
+//! ioctl() number definitions
+//!
+//! C header: [`include/asm-generic/ioctl.h`](../../../../include/asm-generic/ioctl.h)
+
+/// Build an ioctl number, analogous to the C macro of the same name.
+const fn _IOC(dir: u32, ty: u32, nr: u32, size: usize) -> u32 {
+ core::assert!(dir <= bindings::_IOC_DIRMASK);
+ core::assert!(ty <= bindings::_IOC_TYPEMASK);
+ core::assert!(nr <= bindings::_IOC_NRMASK);
+ core::assert!(size <= (bindings::_IOC_SIZEMASK as usize));
+
+ (dir << bindings::_IOC_DIRSHIFT)
+ | (ty << bindings::_IOC_TYPESHIFT)
+ | (nr << bindings::_IOC_NRSHIFT)
+ | ((size as u32) << bindings::_IOC_SIZESHIFT)
+}
+
+/// Build an ioctl number for an argumentless ioctl.
+pub const fn _IO(ty: u32, nr: u32) -> u32 {
+ _IOC(bindings::_IOC_NONE, ty, nr, 0)
+}
+
+/// Build an ioctl number for an read-only ioctl.
+pub const fn _IOR<T>(ty: u32, nr: u32) -> u32 {
+ _IOC(bindings::_IOC_READ, ty, nr, core::mem::size_of::<T>())
+}
+
+/// Build an ioctl number for an write-only ioctl.
+pub const fn _IOW<T>(ty: u32, nr: u32) -> u32 {
+ _IOC(bindings::_IOC_WRITE, ty, nr, core::mem::size_of::<T>())
+}
+
+/// Build an ioctl number for a read-write ioctl.
+pub const fn _IOWR<T>(ty: u32, nr: u32) -> u32 {
+ _IOC(
+ bindings::_IOC_READ | bindings::_IOC_WRITE,
+ ty,
+ nr,
+ core::mem::size_of::<T>(),
+ )
+}
+
+/// Get the ioctl direction from an ioctl number.
+pub const fn _IOC_DIR(nr: u32) -> u32 {
+ (nr >> bindings::_IOC_DIRSHIFT) & bindings::_IOC_DIRMASK
+}
+
+/// Get the ioctl type from an ioctl number.
+pub const fn _IOC_TYPE(nr: u32) -> u32 {
+ (nr >> bindings::_IOC_TYPESHIFT) & bindings::_IOC_TYPEMASK
+}
+
+/// Get the ioctl number from an ioctl number.
+pub const fn _IOC_NR(nr: u32) -> u32 {
+ (nr >> bindings::_IOC_NRSHIFT) & bindings::_IOC_NRMASK
+}
+
+/// Get the ioctl size from an ioctl number.
+pub const fn _IOC_SIZE(nr: u32) -> usize {
+ ((nr >> bindings::_IOC_SIZESHIFT) & bindings::_IOC_SIZEMASK) as usize
+}
diff --git a/rust/kernel/lib.rs b/rust/kernel/lib.rs
index 53040fa9e897..390f9f7b6f9c 100644
--- a/rust/kernel/lib.rs
+++ b/rust/kernel/lib.rs
@@ -13,7 +13,16 @@
#![no_std]
#![feature(allocator_api)]
-#![feature(core_ffi_c)]
+#![feature(associated_type_defaults)]
+#![feature(coerce_unsized)]
+#![feature(const_mut_refs)]
+#![feature(const_refs_to_cell)]
+#![feature(const_trait_impl)]
+#![feature(dispatch_from_dyn)]
+#![feature(duration_constants)]
+#![feature(new_uninit)]
+#![feature(receiver_trait)]
+#![feature(unsize)]
// Ensure conditional compilation based on the kernel configuration works;
// otherwise we may silently break things like initcall handling.
@@ -23,15 +32,36 @@ compile_error!("Missing kernel configuration for conditional compilation");
#[cfg(not(test))]
#[cfg(not(testlib))]
mod allocator;
+
mod build_assert;
+pub mod delay;
+pub mod device;
+#[cfg(CONFIG_DMA_SHARED_BUFFER)]
+pub mod dma_fence;
+pub mod driver;
+#[cfg(CONFIG_DRM = "y")]
+pub mod drm;
pub mod error;
+pub mod io_buffer;
+pub mod io_mem;
+pub mod io_pgtable;
+pub mod ioctl;
+pub mod module_param;
+pub mod of;
+pub mod platform;
pub mod prelude;
pub mod print;
+pub mod revocable;
+pub mod soc;
mod static_assert;
#[doc(hidden)]
pub mod std_vendor;
pub mod str;
+pub mod sync;
+pub mod time;
pub mod types;
+pub mod user_ptr;
+pub mod xarray;
#[doc(hidden)]
pub use bindings;
@@ -40,6 +70,16 @@ pub use macros;
#[doc(hidden)]
pub use build_error::build_error;
+pub(crate) mod private {
+ #[allow(unreachable_pub)]
+ pub trait Sealed {}
+}
+
+/// Page size defined in terms of the `PAGE_SHIFT` macro from C.
+///
+/// [`PAGE_SHIFT`]: ../../../include/asm-generic/page.h
+pub const PAGE_SIZE: usize = 1 << bindings::PAGE_SHIFT;
+
/// Prefix to appear before log messages printed from within the `kernel` crate.
const __LOG_PREFIX: &[u8] = b"rust_kernel\0";
@@ -53,7 +93,7 @@ pub trait Module: Sized + Sync {
/// should do.
///
/// Equivalent to the `module_init` macro in the C API.
- fn init(module: &'static ThisModule) -> error::Result<Self>;
+ fn init(name: &'static crate::str::CStr, module: &'static ThisModule) -> error::Result<Self>;
}
/// Equivalent to `THIS_MODULE` in the C API.
@@ -73,6 +113,43 @@ impl ThisModule {
pub const unsafe fn from_ptr(ptr: *mut bindings::module) -> ThisModule {
ThisModule(ptr)
}
+
+ /// Locks the module parameters to access them.
+ ///
+ /// Returns a [`KParamGuard`] that will release the lock when dropped.
+ pub fn kernel_param_lock(&self) -> KParamGuard<'_> {
+ // SAFETY: `kernel_param_lock` will check if the pointer is null and
+ // use the built-in mutex in that case.
+ #[cfg(CONFIG_SYSFS)]
+ unsafe {
+ bindings::kernel_param_lock(self.0)
+ }
+
+ KParamGuard {
+ #[cfg(CONFIG_SYSFS)]
+ this_module: self,
+ phantom: core::marker::PhantomData,
+ }
+ }
+}
+
+/// Scoped lock on the kernel parameters of [`ThisModule`].
+///
+/// Lock will be released when this struct is dropped.
+pub struct KParamGuard<'a> {
+ #[cfg(CONFIG_SYSFS)]
+ this_module: &'a ThisModule,
+ phantom: core::marker::PhantomData<&'a ()>,
+}
+
+#[cfg(CONFIG_SYSFS)]
+impl<'a> Drop for KParamGuard<'a> {
+ fn drop(&mut self) {
+ // SAFETY: `kernel_param_lock` will check if the pointer is null and
+ // use the built-in mutex in that case. The existence of `self`
+ // guarantees that the lock is held.
+ unsafe { bindings::kernel_param_unlock(self.this_module.0) }
+ }
}
#[cfg(not(any(testlib, test)))]
@@ -85,3 +162,66 @@ fn panic(info: &core::panic::PanicInfo<'_>) -> ! {
// instead of `!`. See <https://github.com/rust-lang/rust-bindgen/issues/2094>.
loop {}
}
+
+/// Calculates the offset of a field from the beginning of the struct it belongs to.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::prelude::*;
+/// # use kernel::offset_of;
+/// struct Test {
+/// a: u64,
+/// b: u32,
+/// }
+///
+/// assert_eq!(offset_of!(Test, b), 8);
+/// ```
+#[macro_export]
+macro_rules! offset_of {
+ ($type:ty, $($f:tt)*) => {{
+ let tmp = core::mem::MaybeUninit::<$type>::uninit();
+ let outer = tmp.as_ptr();
+ // To avoid warnings when nesting `unsafe` blocks.
+ #[allow(unused_unsafe)]
+ // SAFETY: The pointer is valid and aligned, just not initialised; `addr_of` ensures that
+ // we don't actually read from `outer` (which would be UB) nor create an intermediate
+ // reference.
+ let inner = unsafe { core::ptr::addr_of!((*outer).$($f)*) } as *const u8;
+ // To avoid warnings when nesting `unsafe` blocks.
+ #[allow(unused_unsafe)]
+ // SAFETY: The two pointers are within the same allocation block.
+ unsafe { inner.offset_from(outer as *const u8) }
+ }}
+}
+
+/// Produces a pointer to an object from a pointer to one of its fields.
+///
+/// # Safety
+///
+/// Callers must ensure that the pointer to the field is in fact a pointer to the specified field,
+/// as opposed to a pointer to another object of the same type. If this condition is not met,
+/// any dereference of the resulting pointer is UB.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::container_of;
+/// struct Test {
+/// a: u64,
+/// b: u32,
+/// }
+///
+/// let test = Test { a: 10, b: 20 };
+/// let b_ptr = &test.b;
+/// let test_alias = container_of!(b_ptr, Test, b);
+/// assert!(core::ptr::eq(&test, test_alias));
+/// ```
+#[macro_export]
+macro_rules! container_of {
+ ($ptr:expr, $type:ty, $($f:tt)*) => {{
+ let ptr = $ptr as *const _ as *const u8;
+ let offset = $crate::offset_of!($type, $($f)*);
+ ptr.wrapping_offset(-offset) as *const $type
+ }}
+}
diff --git a/rust/kernel/module_param.rs b/rust/kernel/module_param.rs
new file mode 100644
index 000000000000..d587f1036349
--- /dev/null
+++ b/rust/kernel/module_param.rs
@@ -0,0 +1,501 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Types for module parameters.
+//!
+//! C header: [`include/linux/moduleparam.h`](../../../include/linux/moduleparam.h)
+
+use crate::error::{code::*, from_kernel_result};
+use crate::str::{CStr, Formatter};
+use core::fmt::Write;
+
+/// Types that can be used for module parameters.
+///
+/// Note that displaying the type in `sysfs` will fail if
+/// [`alloc::string::ToString::to_string`] (as implemented through the
+/// [`core::fmt::Display`] trait) writes more than [`PAGE_SIZE`]
+/// bytes (including an additional null terminator).
+///
+/// [`PAGE_SIZE`]: `crate::PAGE_SIZE`
+pub trait ModuleParam: core::fmt::Display + core::marker::Sized {
+ /// The `ModuleParam` will be used by the kernel module through this type.
+ ///
+ /// This may differ from `Self` if, for example, `Self` needs to track
+ /// ownership without exposing it or allocate extra space for other possible
+ /// parameter values. See [`StringParam`] or [`ArrayParam`] for examples.
+ type Value: ?Sized;
+
+ /// Whether the parameter is allowed to be set without an argument.
+ ///
+ /// Setting this to `true` allows the parameter to be passed without an
+ /// argument (e.g. just `module.param` instead of `module.param=foo`).
+ const NOARG_ALLOWED: bool;
+
+ /// Convert a parameter argument into the parameter value.
+ ///
+ /// `None` should be returned when parsing of the argument fails.
+ /// `arg == None` indicates that the parameter was passed without an
+ /// argument. If `NOARG_ALLOWED` is set to `false` then `arg` is guaranteed
+ /// to always be `Some(_)`.
+ ///
+ /// Parameters passed at boot time will be set before [`kmalloc`] is
+ /// available (even if the module is loaded at a later time). However, in
+ /// this case, the argument buffer will be valid for the entire lifetime of
+ /// the kernel. So implementations of this method which need to allocate
+ /// should first check that the allocator is available (with
+ /// [`crate::bindings::slab_is_available`]) and when it is not available
+ /// provide an alternative implementation which doesn't allocate. In cases
+ /// where the allocator is not available it is safe to save references to
+ /// `arg` in `Self`, but in other cases a copy should be made.
+ ///
+ /// [`kmalloc`]: ../../../include/linux/slab.h
+ fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option<Self>;
+
+ /// Get the current value of the parameter for use in the kernel module.
+ ///
+ /// This function should not be used directly. Instead use the wrapper
+ /// `read` which will be generated by [`macros::module`].
+ fn value(&self) -> &Self::Value;
+
+ /// Set the module parameter from a string.
+ ///
+ /// Used to set the parameter value when loading the module or when set
+ /// through `sysfs`.
+ ///
+ /// # Safety
+ ///
+ /// If `val` is non-null then it must point to a valid null-terminated
+ /// string. The `arg` field of `param` must be an instance of `Self`.
+ unsafe extern "C" fn set_param(
+ val: *const core::ffi::c_char,
+ param: *const crate::bindings::kernel_param,
+ ) -> core::ffi::c_int {
+ let arg = if val.is_null() {
+ None
+ } else {
+ Some(unsafe { CStr::from_char_ptr(val).as_bytes() })
+ };
+ match Self::try_from_param_arg(arg) {
+ Some(new_value) => {
+ let old_value = unsafe { (*param).__bindgen_anon_1.arg as *mut Self };
+ let _ = unsafe { core::ptr::replace(old_value, new_value) };
+ 0
+ }
+ None => EINVAL.to_kernel_errno(),
+ }
+ }
+
+ /// Write a string representation of the current parameter value to `buf`.
+ ///
+ /// Used for displaying the current parameter value in `sysfs`.
+ ///
+ /// # Safety
+ ///
+ /// `buf` must be a buffer of length at least `kernel::PAGE_SIZE` that is
+ /// writeable. The `arg` field of `param` must be an instance of `Self`.
+ unsafe extern "C" fn get_param(
+ buf: *mut core::ffi::c_char,
+ param: *const crate::bindings::kernel_param,
+ ) -> core::ffi::c_int {
+ from_kernel_result! {
+ // SAFETY: The C contracts guarantees that the buffer is at least `PAGE_SIZE` bytes.
+ let mut f = unsafe { Formatter::from_buffer(buf.cast(), crate::PAGE_SIZE) };
+ unsafe { write!(f, "{}\0", *((*param).__bindgen_anon_1.arg as *mut Self)) }?;
+ Ok(f.bytes_written().try_into()?)
+ }
+ }
+
+ /// Drop the parameter.
+ ///
+ /// Called when unloading a module.
+ ///
+ /// # Safety
+ ///
+ /// The `arg` field of `param` must be an instance of `Self`.
+ unsafe extern "C" fn free(arg: *mut core::ffi::c_void) {
+ unsafe { core::ptr::drop_in_place(arg as *mut Self) };
+ }
+}
+
+/// Trait for parsing integers.
+///
+/// Strings beginning with `0x`, `0o`, or `0b` are parsed as hex, octal, or
+/// binary respectively. Strings beginning with `0` otherwise are parsed as
+/// octal. Anything else is parsed as decimal. A leading `+` or `-` is also
+/// permitted. The string may contain a trailing newline. Any string parsed
+/// by [`kstrtol()`] or [`kstrtoul()`] will be successfully parsed.
+///
+/// [`kstrtol()`]: https://www.kernel.org/doc/html/latest/core-api/kernel-api.html#c.kstrtol
+/// [`kstrtoul()`]: https://www.kernel.org/doc/html/latest/core-api/kernel-api.html#c.kstrtoul
+trait ParseInt: Sized {
+ fn from_str_radix(src: &str, radix: u32) -> Result<Self, core::num::ParseIntError>;
+ fn checked_neg(self) -> Option<Self>;
+
+ fn from_str_unsigned(src: &str) -> Result<Self, core::num::ParseIntError> {
+ let src = src.strip_suffix('\n').unwrap_or(src);
+ let (radix, digits) = if let Some(n) = src.strip_prefix("0x") {
+ (16, n)
+ } else if let Some(n) = src.strip_prefix("0X") {
+ (16, n)
+ } else if let Some(n) = src.strip_prefix("0o") {
+ (8, n)
+ } else if let Some(n) = src.strip_prefix("0O") {
+ (8, n)
+ } else if let Some(n) = src.strip_prefix("0b") {
+ (2, n)
+ } else if let Some(n) = src.strip_prefix("0B") {
+ (2, n)
+ } else if src.starts_with('0') {
+ (8, src)
+ } else {
+ (10, src)
+ };
+ Self::from_str_radix(digits, radix)
+ }
+
+ fn from_str(src: &str) -> Option<Self> {
+ match src.bytes().next() {
+ None => None,
+ Some(b'-') => Self::from_str_unsigned(&src[1..]).ok()?.checked_neg(),
+ Some(b'+') => Some(Self::from_str_unsigned(&src[1..]).ok()?),
+ Some(_) => Some(Self::from_str_unsigned(src).ok()?),
+ }
+ }
+}
+
+macro_rules! impl_parse_int {
+ ($ty:ident) => {
+ impl ParseInt for $ty {
+ fn from_str_radix(src: &str, radix: u32) -> Result<Self, core::num::ParseIntError> {
+ $ty::from_str_radix(src, radix)
+ }
+
+ fn checked_neg(self) -> Option<Self> {
+ self.checked_neg()
+ }
+ }
+ };
+}
+
+impl_parse_int!(i8);
+impl_parse_int!(u8);
+impl_parse_int!(i16);
+impl_parse_int!(u16);
+impl_parse_int!(i32);
+impl_parse_int!(u32);
+impl_parse_int!(i64);
+impl_parse_int!(u64);
+impl_parse_int!(isize);
+impl_parse_int!(usize);
+
+macro_rules! impl_module_param {
+ ($ty:ident) => {
+ impl ModuleParam for $ty {
+ type Value = $ty;
+
+ const NOARG_ALLOWED: bool = false;
+
+ fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option<Self> {
+ let bytes = arg?;
+ let utf8 = core::str::from_utf8(bytes).ok()?;
+ <$ty as crate::module_param::ParseInt>::from_str(utf8)
+ }
+
+ fn value(&self) -> &Self::Value {
+ self
+ }
+ }
+ };
+}
+
+#[doc(hidden)]
+#[macro_export]
+/// Generate a static [`kernel_param_ops`](../../../include/linux/moduleparam.h) struct.
+///
+/// # Examples
+///
+/// ```ignore
+/// make_param_ops!(
+/// /// Documentation for new param ops.
+/// PARAM_OPS_MYTYPE, // Name for the static.
+/// MyType // A type which implements [`ModuleParam`].
+/// );
+/// ```
+macro_rules! make_param_ops {
+ ($ops:ident, $ty:ty) => {
+ $crate::make_param_ops!(
+ #[doc=""]
+ $ops,
+ $ty
+ );
+ };
+ ($(#[$meta:meta])* $ops:ident, $ty:ty) => {
+ $(#[$meta])*
+ ///
+ /// Static [`kernel_param_ops`](../../../include/linux/moduleparam.h)
+ /// struct generated by [`make_param_ops`].
+ pub static $ops: $crate::bindings::kernel_param_ops = $crate::bindings::kernel_param_ops {
+ flags: if <$ty as $crate::module_param::ModuleParam>::NOARG_ALLOWED {
+ $crate::bindings::KERNEL_PARAM_OPS_FL_NOARG
+ } else {
+ 0
+ },
+ set: Some(<$ty as $crate::module_param::ModuleParam>::set_param),
+ get: Some(<$ty as $crate::module_param::ModuleParam>::get_param),
+ free: Some(<$ty as $crate::module_param::ModuleParam>::free),
+ };
+ };
+}
+
+impl_module_param!(i8);
+impl_module_param!(u8);
+impl_module_param!(i16);
+impl_module_param!(u16);
+impl_module_param!(i32);
+impl_module_param!(u32);
+impl_module_param!(i64);
+impl_module_param!(u64);
+impl_module_param!(isize);
+impl_module_param!(usize);
+
+make_param_ops!(
+ /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h)
+ /// for [`i8`].
+ PARAM_OPS_I8,
+ i8
+);
+make_param_ops!(
+ /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h)
+ /// for [`u8`].
+ PARAM_OPS_U8,
+ u8
+);
+make_param_ops!(
+ /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h)
+ /// for [`i16`].
+ PARAM_OPS_I16,
+ i16
+);
+make_param_ops!(
+ /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h)
+ /// for [`u16`].
+ PARAM_OPS_U16,
+ u16
+);
+make_param_ops!(
+ /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h)
+ /// for [`i32`].
+ PARAM_OPS_I32,
+ i32
+);
+make_param_ops!(
+ /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h)
+ /// for [`u32`].
+ PARAM_OPS_U32,
+ u32
+);
+make_param_ops!(
+ /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h)
+ /// for [`i64`].
+ PARAM_OPS_I64,
+ i64
+);
+make_param_ops!(
+ /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h)
+ /// for [`u64`].
+ PARAM_OPS_U64,
+ u64
+);
+make_param_ops!(
+ /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h)
+ /// for [`isize`].
+ PARAM_OPS_ISIZE,
+ isize
+);
+make_param_ops!(
+ /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h)
+ /// for [`usize`].
+ PARAM_OPS_USIZE,
+ usize
+);
+
+impl ModuleParam for bool {
+ type Value = bool;
+
+ const NOARG_ALLOWED: bool = true;
+
+ fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option<Self> {
+ match arg {
+ None => Some(true),
+ Some(b"y") | Some(b"Y") | Some(b"1") | Some(b"true") => Some(true),
+ Some(b"n") | Some(b"N") | Some(b"0") | Some(b"false") => Some(false),
+ _ => None,
+ }
+ }
+
+ fn value(&self) -> &Self::Value {
+ self
+ }
+}
+
+make_param_ops!(
+ /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h)
+ /// for [`bool`].
+ PARAM_OPS_BOOL,
+ bool
+);
+
+/// An array of at __most__ `N` values.
+///
+/// # Invariant
+///
+/// The first `self.used` elements of `self.values` are initialized.
+pub struct ArrayParam<T, const N: usize> {
+ values: [core::mem::MaybeUninit<T>; N],
+ used: usize,
+}
+
+impl<T, const N: usize> ArrayParam<T, { N }> {
+ fn values(&self) -> &[T] {
+ // SAFETY: The invariant maintained by `ArrayParam` allows us to cast
+ // the first `self.used` elements to `T`.
+ unsafe {
+ &*(&self.values[0..self.used] as *const [core::mem::MaybeUninit<T>] as *const [T])
+ }
+ }
+}
+
+impl<T: Copy, const N: usize> ArrayParam<T, { N }> {
+ const fn new() -> Self {
+ // INVARIANT: The first `self.used` elements of `self.values` are
+ // initialized.
+ ArrayParam {
+ values: [core::mem::MaybeUninit::uninit(); N],
+ used: 0,
+ }
+ }
+
+ const fn push(&mut self, val: T) {
+ if self.used < N {
+ // INVARIANT: The first `self.used` elements of `self.values` are
+ // initialized.
+ self.values[self.used] = core::mem::MaybeUninit::new(val);
+ self.used += 1;
+ }
+ }
+
+ /// Create an instance of `ArrayParam` initialized with `vals`.
+ ///
+ /// This function is only meant to be used in the [`module::module`] macro.
+ pub const fn create(vals: &[T]) -> Self {
+ let mut result = ArrayParam::new();
+ let mut i = 0;
+ while i < vals.len() {
+ result.push(vals[i]);
+ i += 1;
+ }
+ result
+ }
+}
+
+impl<T: core::fmt::Display, const N: usize> core::fmt::Display for ArrayParam<T, { N }> {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ for val in self.values() {
+ write!(f, "{},", val)?;
+ }
+ Ok(())
+ }
+}
+
+impl<T: Copy + core::fmt::Display + ModuleParam, const N: usize> ModuleParam
+ for ArrayParam<T, { N }>
+{
+ type Value = [T];
+
+ const NOARG_ALLOWED: bool = false;
+
+ fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option<Self> {
+ arg.and_then(|args| {
+ let mut result = Self::new();
+ for arg in args.split(|b| *b == b',') {
+ result.push(T::try_from_param_arg(Some(arg))?);
+ }
+ Some(result)
+ })
+ }
+
+ fn value(&self) -> &Self::Value {
+ self.values()
+ }
+}
+
+/// A C-style string parameter.
+///
+/// The Rust version of the [`charp`] parameter. This type is meant to be
+/// used by the [`macros::module`] macro, not handled directly. Instead use the
+/// `read` method generated by that macro.
+///
+/// [`charp`]: ../../../include/linux/moduleparam.h
+pub enum StringParam {
+ /// A borrowed parameter value.
+ ///
+ /// Either the default value (which is static in the module) or borrowed
+ /// from the original argument buffer used to set the value.
+ Ref(&'static [u8]),
+
+ /// A value that was allocated when the parameter was set.
+ ///
+ /// The value needs to be freed when the parameter is reset or the module is
+ /// unloaded.
+ Owned(alloc::vec::Vec<u8>),
+}
+
+impl StringParam {
+ fn bytes(&self) -> &[u8] {
+ match self {
+ #[allow(clippy::explicit_auto_deref)]
+ StringParam::Ref(bytes) => *bytes,
+ StringParam::Owned(vec) => &vec[..],
+ }
+ }
+}
+
+impl core::fmt::Display for StringParam {
+ fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+ let bytes = self.bytes();
+ match core::str::from_utf8(bytes) {
+ Ok(utf8) => write!(f, "{}", utf8),
+ Err(_) => write!(f, "{:?}", bytes),
+ }
+ }
+}
+
+impl ModuleParam for StringParam {
+ type Value = [u8];
+
+ const NOARG_ALLOWED: bool = false;
+
+ fn try_from_param_arg(arg: Option<&'static [u8]>) -> Option<Self> {
+ // SAFETY: It is always safe to call [`slab_is_available`](../../../include/linux/slab.h).
+ let slab_available = unsafe { crate::bindings::slab_is_available() };
+ arg.and_then(|arg| {
+ if slab_available {
+ let mut vec = alloc::vec::Vec::new();
+ vec.try_extend_from_slice(arg).ok()?;
+ Some(StringParam::Owned(vec))
+ } else {
+ Some(StringParam::Ref(arg))
+ }
+ })
+ }
+
+ fn value(&self) -> &Self::Value {
+ self.bytes()
+ }
+}
+
+make_param_ops!(
+ /// Rust implementation of [`kernel_param_ops`](../../../include/linux/moduleparam.h)
+ /// for [`StringParam`].
+ PARAM_OPS_STR,
+ StringParam
+);
diff --git a/rust/kernel/of.rs b/rust/kernel/of.rs
new file mode 100644
index 000000000000..a27621b57fbb
--- /dev/null
+++ b/rust/kernel/of.rs
@@ -0,0 +1,546 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Devicetree and Open Firmware abstractions.
+//!
+//! C header: [`include/linux/of_*.h`](../../../../include/linux/of_*.h)
+
+// Note: Most OF functions turn into inline dummies with CONFIG_OF(_*) disabled.
+// We have to either add config conditionals to helpers.c or here; let's do it
+// here for now. In the future, once bindgen can auto-generate static inline
+// helpers, this can go away if desired.
+
+use core::marker::PhantomData;
+use core::num::NonZeroU32;
+
+use crate::{
+ bindings, driver,
+ prelude::*,
+ str::{BStr, CStr},
+};
+
+/// An open firmware device id.
+#[derive(Clone, Copy)]
+pub enum DeviceId {
+ /// An open firmware device id where only a compatible string is specified.
+ Compatible(&'static BStr),
+}
+
+/// Defines a const open firmware device id table that also carries per-entry data/context/info.
+///
+/// # Example
+///
+/// ```
+/// # use kernel::{define_of_id_table, module_of_id_table, driver_of_id_table};
+/// use kernel::of;
+///
+/// define_of_id_table! {MY_ID_TABLE, u32, [
+/// (of::DeviceId::Compatible(b"test-device1,test-device2"), Some(0xff)),
+/// (of::DeviceId::Compatible(b"test-device3"), None),
+/// ]};
+///
+/// module_of_id_table!(MOD_TABLE, ASAHI_ID_TABLE);
+///
+/// // Within the `Driver` implementation:
+/// driver_of_id_table!(MY_ID_TABLE);
+/// ```
+#[macro_export]
+macro_rules! define_of_id_table {
+ ($name:ident, $data_type:ty, $($t:tt)*) => {
+ $crate::define_id_array!($name, $crate::of::DeviceId, $data_type, $($t)*);
+ };
+}
+
+/// Convenience macro to declare which device ID table to use for a bus driver.
+#[macro_export]
+macro_rules! driver_of_id_table {
+ ($name:expr) => {
+ $crate::driver_id_table!(
+ OF_DEVICE_ID_TABLE,
+ $crate::of::DeviceId,
+ Self::IdInfo,
+ $name
+ );
+ };
+}
+
+/// Declare a device ID table as a module-level table. This creates the necessary module alias
+/// entries to enable module autoloading.
+#[macro_export]
+macro_rules! module_of_id_table {
+ ($item_name:ident, $table_name:ident) => {
+ $crate::module_id_table!($item_name, "of", $crate::of::DeviceId, $table_name);
+ };
+}
+
+// SAFETY: `ZERO` is all zeroed-out and `to_rawid` stores `offset` in `of_device_id::data`.
+unsafe impl const driver::RawDeviceId for DeviceId {
+ type RawType = bindings::of_device_id;
+ const ZERO: Self::RawType = bindings::of_device_id {
+ name: [0; 32],
+ type_: [0; 32],
+ compatible: [0; 128],
+ data: core::ptr::null(),
+ };
+
+ fn to_rawid(&self, offset: isize) -> Self::RawType {
+ let DeviceId::Compatible(compatible) = self;
+ let mut id = Self::ZERO;
+ let mut i = 0;
+ while i < compatible.len() {
+ // If `compatible` does not fit in `id.compatible`, an "index out of bounds" build time
+ // error will be triggered.
+ id.compatible[i] = compatible[i] as _;
+ i += 1;
+ }
+ id.compatible[i] = b'\0' as _;
+ id.data = offset as _;
+ id
+ }
+}
+
+/// Type alias for an OF phandle
+pub type PHandle = bindings::phandle;
+
+/// An OF device tree node.
+///
+/// # Invariants
+///
+/// `raw_node` points to a valid OF node, and we hold a reference to it.
+pub struct Node {
+ raw_node: *mut bindings::device_node,
+}
+
+#[allow(dead_code)]
+impl Node {
+ /// Creates a `Node` from a raw C pointer. The pointer must be owned (the caller
+ /// gives up its reference). If the pointer is NULL, returns None.
+ pub(crate) unsafe fn from_raw(raw_node: *mut bindings::device_node) -> Option<Node> {
+ if raw_node.is_null() {
+ None
+ } else {
+ // INVARIANT: `raw_node` is valid per the above contract, and non-null per the
+ // above check.
+ Some(Node { raw_node })
+ }
+ }
+
+ /// Creates a `Node` from a raw C pointer. The pointer must be borrowed (the caller
+ /// retains its reference, which must be valid for the duration of the call). If the
+ /// pointer is NULL, returns None.
+ pub(crate) unsafe fn get_from_raw(raw_node: *mut bindings::device_node) -> Option<Node> {
+ // SAFETY: `raw_node` is valid or NULL per the above contract. `of_node_get` can handle
+ // NULL.
+ unsafe {
+ #[cfg(CONFIG_OF_DYNAMIC)]
+ bindings::of_node_get(raw_node);
+ Node::from_raw(raw_node)
+ }
+ }
+
+ /// Returns a reference to the underlying C `device_node` structure.
+ fn node(&self) -> &bindings::device_node {
+ // SAFETY: `raw_node` is valid per the type invariant.
+ unsafe { &*self.raw_node }
+ }
+
+ /// Returns the name of the node.
+ pub fn name(&self) -> &CStr {
+ // SAFETY: The lifetime of the `CStr` is the same as the lifetime of this `Node`.
+ unsafe { CStr::from_char_ptr(self.node().name) }
+ }
+
+ /// Returns the phandle for this node.
+ pub fn phandle(&self) -> PHandle {
+ self.node().phandle
+ }
+
+ /// Returns the full name (with address) for this node.
+ pub fn full_name(&self) -> &CStr {
+ // SAFETY: The lifetime of the `CStr` is the same as the lifetime of this `Node`.
+ unsafe { CStr::from_char_ptr(self.node().full_name) }
+ }
+
+ /// Returns `true` if the node is the root node.
+ pub fn is_root(&self) -> bool {
+ unsafe { bindings::of_node_is_root(self.raw_node) }
+ }
+
+ /// Returns the parent node, if any.
+ pub fn parent(&self) -> Option<Node> {
+ #[cfg(not(CONFIG_OF))]
+ {
+ None
+ }
+ #[cfg(CONFIG_OF)]
+ // SAFETY: `raw_node` is valid per the type invariant, and `of_get_parent()` takes a
+ // new reference to the parent (or returns NULL).
+ unsafe {
+ Node::from_raw(bindings::of_get_parent(self.raw_node))
+ }
+ }
+
+ /// Returns an iterator over the node's children.
+ // TODO: use type alias for return type once type_alias_impl_trait is stable
+ pub fn children(
+ &self,
+ ) -> NodeIterator<'_, impl Fn(*mut bindings::device_node) -> *mut bindings::device_node + '_>
+ {
+ #[cfg(not(CONFIG_OF))]
+ {
+ NodeIterator::new(|_prev| core::ptr::null_mut())
+ }
+ #[cfg(CONFIG_OF)]
+ // SAFETY: `raw_node` is valid per the type invariant, and the lifetime of the `NodeIterator`
+ // does not exceed the lifetime of the `Node` so it can borrow its reference.
+ NodeIterator::new(|prev| unsafe { bindings::of_get_next_child(self.raw_node, prev) })
+ }
+
+ /// Find a child by its name and return it, or None if not found.
+ #[allow(unused_variables)]
+ pub fn get_child_by_name(&self, name: &CStr) -> Option<Node> {
+ #[cfg(not(CONFIG_OF))]
+ {
+ None
+ }
+ #[cfg(CONFIG_OF)]
+ // SAFETY: `raw_node` is valid per the type invariant.
+ unsafe {
+ Node::from_raw(bindings::of_get_child_by_name(
+ self.raw_node,
+ name.as_char_ptr(),
+ ))
+ }
+ }
+
+ /// Checks whether the node is compatible with the given compatible string.
+ ///
+ /// Returns `None` if there is no match, or `Some<NonZeroU32>` if there is, with the value
+ /// representing as match score (higher values for more specific compatible matches).
+ #[allow(unused_variables)]
+ pub fn is_compatible(&self, compatible: &CStr) -> Option<NonZeroU32> {
+ #[cfg(not(CONFIG_OF))]
+ let ret = 0;
+ #[cfg(CONFIG_OF)]
+ // SAFETY: `raw_node` is valid per the type invariant.
+ let ret =
+ unsafe { bindings::of_device_is_compatible(self.raw_node, compatible.as_char_ptr()) };
+
+ NonZeroU32::new(ret.try_into().ok()?)
+ }
+
+ /// Parse a phandle property and return the Node referenced at a given index, if any.
+ ///
+ /// Used only for phandle properties with no arguments.
+ #[allow(unused_variables)]
+ pub fn parse_phandle(&self, name: &CStr, index: usize) -> Option<Node> {
+ #[cfg(not(CONFIG_OF))]
+ {
+ None
+ }
+ #[cfg(CONFIG_OF)]
+ // SAFETY: `raw_node` is valid per the type invariant. `of_parse_phandle` returns an
+ // owned reference.
+ unsafe {
+ Node::from_raw(bindings::of_parse_phandle(
+ self.raw_node,
+ name.as_char_ptr(),
+ index.try_into().ok()?,
+ ))
+ }
+ }
+
+ #[allow(unused_variables)]
+ /// Look up a node property by name, returning a `Property` object if found.
+ pub fn find_property(&self, propname: &CStr) -> Option<Property<'_>> {
+ #[cfg(not(CONFIG_OF))]
+ {
+ None
+ }
+ #[cfg(CONFIG_OF)]
+ // SAFETY: `raw_node` is valid per the type invariant. The property structure
+ // returned borrows the reference to the owning node, and so has the same
+ // lifetime.
+ unsafe {
+ Property::from_raw(bindings::of_find_property(
+ self.raw_node,
+ propname.as_char_ptr(),
+ core::ptr::null_mut(),
+ ))
+ }
+ }
+
+ /// Look up a mandatory node property by name, and decode it into a value type.
+ ///
+ /// Returns `Err(ENOENT)` if the property is not found.
+ ///
+ /// The type `T` must implement `TryFrom<Property<'_>>`.
+ pub fn get_property<'a, T: TryFrom<Property<'a>>>(&'a self, propname: &CStr) -> Result<T>
+ where
+ crate::error::Error: From<<T as TryFrom<Property<'a>>>::Error>,
+ {
+ Ok(self.find_property(propname).ok_or(ENOENT)?.try_into()?)
+ }
+
+ /// Look up an optional node property by name, and decode it into a value type.
+ ///
+ /// Returns `Ok(None)` if the property is not found.
+ ///
+ /// The type `T` must implement `TryFrom<Property<'_>>`.
+ pub fn get_opt_property<'a, T: TryFrom<Property<'a>>>(
+ &'a self,
+ propname: &CStr,
+ ) -> Result<Option<T>>
+ where
+ crate::error::Error: From<<T as TryFrom<Property<'a>>>::Error>,
+ {
+ self.find_property(propname)
+ .map_or(Ok(None), |p| Ok(Some(p.try_into()?)))
+ }
+}
+
+/// A property attached to a device tree `Node`.
+///
+/// # Invariants
+///
+/// `raw` must be valid and point to a property that outlives the lifetime of this object.
+#[derive(Copy, Clone)]
+pub struct Property<'a> {
+ raw: *mut bindings::property,
+ _p: PhantomData<&'a Node>,
+}
+
+impl<'a> Property<'a> {
+ #[cfg(CONFIG_OF)]
+ /// Create a `Property` object from a raw C pointer. Returns `None` if NULL.
+ ///
+ /// The passed pointer must be valid and outlive the lifetime argument, or NULL.
+ unsafe fn from_raw(raw: *mut bindings::property) -> Option<Property<'a>> {
+ if raw.is_null() {
+ None
+ } else {
+ Some(Property {
+ raw,
+ _p: PhantomData,
+ })
+ }
+ }
+
+ /// Returns the name of the property as a `CStr`.
+ pub fn name(&self) -> &CStr {
+ // SAFETY: `raw` is valid per the type invariant, and the lifetime of the `CStr` does not
+ // outlive it.
+ unsafe { CStr::from_char_ptr((*self.raw).name) }
+ }
+
+ /// Returns the name of the property as a `&[u8]`.
+ pub fn value(&self) -> &[u8] {
+ // SAFETY: `raw` is valid per the type invariant, and the lifetime of the slice does not
+ // outlive it.
+ unsafe { core::slice::from_raw_parts((*self.raw).value as *const u8, self.len()) }
+ }
+
+ /// Returns the length of the property in bytes.
+ pub fn len(&self) -> usize {
+ // SAFETY: `raw` is valid per the type invariant.
+ unsafe { (*self.raw).length.try_into().unwrap() }
+ }
+
+ /// Returns true if the property is empty (zero-length), which typically represents boolean true.
+ pub fn is_empty(&self) -> bool {
+ self.len() == 0
+ }
+}
+
+/// A trait that represents a value decodable from a property with a fixed unit size.
+///
+/// This allows us to auto-derive property decode implementations for `Vec<T: PropertyUnit>`.
+pub trait PropertyUnit: Sized {
+ /// The size in bytes of a single data unit.
+ const UNIT_SIZE: usize;
+
+ /// Decode this data unit from a byte slice. The passed slice will have a length of `UNIT_SIZE`.
+ fn from_bytes(data: &[u8]) -> Result<Self>;
+}
+
+// This doesn't work...
+// impl<'a, T: PropertyUnit> TryFrom<Property<'a>> for T {
+// type Error = Error;
+//
+// fn try_from(p: Property<'_>) -> core::result::Result<T, Self::Error> {
+// if p.value().len() != T::UNIT_SIZE {
+// Err(EINVAL)
+// } else {
+// Ok(T::from_bytes(p.value())?)
+// }
+// }
+// }
+
+impl<'a, T: PropertyUnit> TryFrom<Property<'a>> for Vec<T> {
+ type Error = Error;
+
+ fn try_from(p: Property<'_>) -> core::result::Result<Vec<T>, Self::Error> {
+ if p.len() % T::UNIT_SIZE != 0 {
+ return Err(EINVAL);
+ }
+
+ let mut v = Vec::new();
+ let val = p.value();
+ for off in (0..p.len()).step_by(T::UNIT_SIZE) {
+ v.try_push(T::from_bytes(&val[off..off + T::UNIT_SIZE])?)?;
+ }
+ Ok(v)
+ }
+}
+
+macro_rules! prop_int_type (
+ ($type:ty) => {
+ impl<'a> TryFrom<Property<'a>> for $type {
+ type Error = Error;
+
+ fn try_from(p: Property<'_>) -> core::result::Result<$type, Self::Error> {
+ Ok(<$type>::from_be_bytes(p.value().try_into().or(Err(EINVAL))?))
+ }
+ }
+
+ impl PropertyUnit for $type {
+ const UNIT_SIZE: usize = <$type>::BITS as usize / 8;
+
+ fn from_bytes(data: &[u8]) -> Result<Self> {
+ Ok(<$type>::from_be_bytes(data.try_into().or(Err(EINVAL))?))
+ }
+ }
+ }
+);
+
+prop_int_type!(u8);
+prop_int_type!(u16);
+prop_int_type!(u32);
+prop_int_type!(u64);
+prop_int_type!(i8);
+prop_int_type!(i16);
+prop_int_type!(i32);
+prop_int_type!(i64);
+
+/// An iterator across a collection of Node objects.
+///
+/// # Invariants
+///
+/// `cur` must be NULL or a valid node owned reference. If NULL, it represents either the first
+/// or last position of the iterator.
+///
+/// If `done` is true, `cur` must be NULL.
+///
+/// fn_next must be a callback that iterates from one node to the next, and it must not capture
+/// values that exceed the lifetime of the iterator. It must return owned references and also
+/// take owned references.
+pub struct NodeIterator<'a, T>
+where
+ T: Fn(*mut bindings::device_node) -> *mut bindings::device_node,
+{
+ cur: *mut bindings::device_node,
+ done: bool,
+ fn_next: T,
+ _p: PhantomData<&'a T>,
+}
+
+impl<'a, T> NodeIterator<'a, T>
+where
+ T: Fn(*mut bindings::device_node) -> *mut bindings::device_node,
+{
+ fn new(next: T) -> NodeIterator<'a, T> {
+ // INVARIANT: `cur` is initialized to NULL to represent the initial state.
+ NodeIterator {
+ cur: core::ptr::null_mut(),
+ done: false,
+ fn_next: next,
+ _p: PhantomData,
+ }
+ }
+}
+
+impl<'a, T> Iterator for NodeIterator<'a, T>
+where
+ T: Fn(*mut bindings::device_node) -> *mut bindings::device_node,
+{
+ type Item = Node;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ if self.done {
+ None
+ } else {
+ // INVARIANT: if the new `cur` is NULL, then the iterator has reached its end and we
+ // set `done` to `true`.
+ self.cur = (self.fn_next)(self.cur);
+ self.done = self.cur.is_null();
+ // SAFETY: `fn_next` must return an owned reference per the iterator contract.
+ // The iterator itself is considered to own this reference, so we take another one.
+ unsafe { Node::get_from_raw(self.cur) }
+ }
+ }
+}
+
+// Drop impl to ensure we drop the current node being iterated on, if any.
+impl<'a, T> Drop for NodeIterator<'a, T>
+where
+ T: Fn(*mut bindings::device_node) -> *mut bindings::device_node,
+{
+ fn drop(&mut self) {
+ // SAFETY: `cur` is valid or NULL, and `of_node_put()` can handle NULL.
+ #[cfg(CONFIG_OF_DYNAMIC)]
+ unsafe {
+ bindings::of_node_put(self.cur)
+ };
+ }
+}
+
+/// Returns the root node of the OF device tree (if any).
+pub fn root() -> Option<Node> {
+ unsafe { Node::get_from_raw(bindings::of_root) }
+}
+
+/// Returns the /chosen node of the OF device tree (if any).
+pub fn chosen() -> Option<Node> {
+ unsafe { Node::get_from_raw(bindings::of_chosen) }
+}
+
+/// Returns the /aliases node of the OF device tree (if any).
+pub fn aliases() -> Option<Node> {
+ unsafe { Node::get_from_raw(bindings::of_aliases) }
+}
+
+/// Returns the system stdout node of the OF device tree (if any).
+pub fn stdout() -> Option<Node> {
+ unsafe { Node::get_from_raw(bindings::of_stdout) }
+}
+
+#[allow(unused_variables)]
+/// Looks up a node in the device tree by phandle.
+pub fn find_node_by_phandle(handle: PHandle) -> Option<Node> {
+ #[cfg(not(CONFIG_OF))]
+ {
+ None
+ }
+ #[cfg(CONFIG_OF)]
+ unsafe {
+ #[allow(dead_code)]
+ Node::from_raw(bindings::of_find_node_by_phandle(handle))
+ }
+}
+
+impl Clone for Node {
+ fn clone(&self) -> Node {
+ // SAFETY: `raw_node` is valid and non-NULL per the type invariant,
+ // so this can never return None.
+ unsafe { Node::get_from_raw(self.raw_node).unwrap() }
+ }
+}
+
+impl Drop for Node {
+ fn drop(&mut self) {
+ #[cfg(CONFIG_OF_DYNAMIC)]
+ // SAFETY: `raw_node` is valid per the type invariant.
+ unsafe {
+ bindings::of_node_put(self.raw_node)
+ };
+ }
+}
diff --git a/rust/kernel/platform.rs b/rust/kernel/platform.rs
new file mode 100644
index 000000000000..542865da17a1
--- /dev/null
+++ b/rust/kernel/platform.rs
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Platform devices and drivers.
+//!
+//! Also called `platdev`, `pdev`.
+//!
+//! C header: [`include/linux/platform_device.h`](../../../../include/linux/platform_device.h)
+
+use crate::{
+ bindings,
+ device::{self, RawDevice},
+ driver,
+ error::{code::*, from_kernel_result, to_result, Result},
+ io_mem::{IoMem, IoResource, Resource},
+ of,
+ str::CStr,
+ types::ForeignOwnable,
+ ThisModule,
+};
+
+/// A registration of a platform driver.
+pub type Registration<T> = driver::Registration<Adapter<T>>;
+
+/// An adapter for the registration of platform drivers.
+pub struct Adapter<T: Driver>(T);
+
+impl<T: Driver> driver::DriverOps for Adapter<T> {
+ type RegType = bindings::platform_driver;
+
+ unsafe fn register(
+ reg: *mut bindings::platform_driver,
+ name: &'static CStr,
+ module: &'static ThisModule,
+ ) -> Result {
+ // SAFETY: By the safety requirements of this function (defined in the trait definition),
+ // `reg` is non-null and valid.
+ let pdrv = unsafe { &mut *reg };
+
+ pdrv.driver.name = name.as_char_ptr();
+ pdrv.probe = Some(Self::probe_callback);
+ pdrv.remove = Some(Self::remove_callback);
+ if let Some(t) = T::OF_DEVICE_ID_TABLE {
+ pdrv.driver.of_match_table = t.as_ref();
+ }
+ // SAFETY:
+ // - `pdrv` lives at least until the call to `platform_driver_unregister()` returns.
+ // - `name` pointer has static lifetime.
+ // - `module.0` lives at least as long as the module.
+ // - `probe()` and `remove()` are static functions.
+ // - `of_match_table` is either a raw pointer with static lifetime,
+ // as guaranteed by the [`driver::IdTable`] type, or null.
+ to_result(unsafe { bindings::__platform_driver_register(reg, module.0) })
+ }
+
+ unsafe fn unregister(reg: *mut bindings::platform_driver) {
+ // SAFETY: By the safety requirements of this function (defined in the trait definition),
+ // `reg` was passed (and updated) by a previous successful call to
+ // `platform_driver_register`.
+ unsafe { bindings::platform_driver_unregister(reg) };
+ }
+}
+
+impl<T: Driver> Adapter<T> {
+ fn get_id_info(dev: &Device) -> Option<&'static T::IdInfo> {
+ let table = T::OF_DEVICE_ID_TABLE?;
+
+ // SAFETY: `table` has static lifetime, so it is valid for read. `dev` is guaranteed to be
+ // valid while it's alive, so is the raw device returned by it.
+ let id = unsafe { bindings::of_match_device(table.as_ref(), dev.raw_device()) };
+ if id.is_null() {
+ return None;
+ }
+
+ // SAFETY: `id` is a pointer within the static table, so it's always valid.
+ let offset = unsafe { (*id).data };
+ if offset.is_null() {
+ return None;
+ }
+
+ // SAFETY: The offset comes from a previous call to `offset_from` in `IdArray::new`, which
+ // guarantees that the resulting pointer is within the table.
+ let ptr = unsafe {
+ id.cast::<u8>()
+ .offset(offset as _)
+ .cast::<Option<T::IdInfo>>()
+ };
+
+ // SAFETY: The id table has a static lifetime, so `ptr` is guaranteed to be valid for read.
+ #[allow(clippy::needless_borrow)]
+ unsafe {
+ (&*ptr).as_ref()
+ }
+ }
+
+ extern "C" fn probe_callback(pdev: *mut bindings::platform_device) -> core::ffi::c_int {
+ from_kernel_result! {
+ // SAFETY: `pdev` is valid by the contract with the C code. `dev` is alive only for the
+ // duration of this call, so it is guaranteed to remain alive for the lifetime of
+ // `pdev`.
+ let mut dev = unsafe { Device::from_ptr(pdev) };
+ let info = Self::get_id_info(&dev);
+ let data = T::probe(&mut dev, info)?;
+ // SAFETY: `pdev` is guaranteed to be a valid, non-null pointer.
+ unsafe { bindings::platform_set_drvdata(pdev, data.into_foreign() as _) };
+ Ok(0)
+ }
+ }
+
+ extern "C" fn remove_callback(pdev: *mut bindings::platform_device) -> core::ffi::c_int {
+ from_kernel_result! {
+ // SAFETY: `pdev` is guaranteed to be a valid, non-null pointer.
+ let ptr = unsafe { bindings::platform_get_drvdata(pdev) };
+ // SAFETY:
+ // - we allocated this pointer using `T::Data::into_foreign`,
+ // so it is safe to turn back into a `T::Data`.
+ // - the allocation happened in `probe`, no-one freed the memory,
+ // `remove` is the canonical kernel location to free driver data. so OK
+ // to convert the pointer back to a Rust structure here.
+ let data = unsafe { T::Data::from_foreign(ptr) };
+ let ret = T::remove(&data);
+ <T::Data as driver::DeviceRemoval>::device_remove(&data);
+ ret?;
+ Ok(0)
+ }
+ }
+}
+
+/// A platform driver.
+pub trait Driver {
+ /// Data stored on device by driver.
+ ///
+ /// Corresponds to the data set or retrieved via the kernel's
+ /// `platform_{set,get}_drvdata()` functions.
+ ///
+ /// Require that `Data` implements `ForeignOwnable`. We guarantee to
+ /// never move the underlying wrapped data structure. This allows
+ type Data: ForeignOwnable + Send + Sync + driver::DeviceRemoval = ();
+
+ /// The type holding information about each device id supported by the driver.
+ type IdInfo: 'static = ();
+
+ /// The table of device ids supported by the driver.
+ const OF_DEVICE_ID_TABLE: Option<driver::IdTable<'static, of::DeviceId, Self::IdInfo>> = None;
+
+ /// Platform driver probe.
+ ///
+ /// Called when a new platform device is added or discovered.
+ /// Implementers should attempt to initialize the device here.
+ fn probe(dev: &mut Device, id_info: Option<&Self::IdInfo>) -> Result<Self::Data>;
+
+ /// Platform driver remove.
+ ///
+ /// Called when a platform device is removed.
+ /// Implementers should prepare the device for complete removal here.
+ fn remove(_data: &Self::Data) -> Result {
+ Ok(())
+ }
+}
+
+/// A platform device.
+///
+/// # Invariants
+///
+/// The field `ptr` is non-null and valid for the lifetime of the object.
+pub struct Device {
+ ptr: *mut bindings::platform_device,
+ used_resource: u64,
+}
+
+impl Device {
+ /// Creates a new device from the given pointer.
+ ///
+ /// # Safety
+ ///
+ /// `ptr` must be non-null and valid. It must remain valid for the lifetime of the returned
+ /// instance.
+ unsafe fn from_ptr(ptr: *mut bindings::platform_device) -> Self {
+ // INVARIANT: The safety requirements of the function ensure the lifetime invariant.
+ Self {
+ ptr,
+ used_resource: 0,
+ }
+ }
+
+ /// Returns id of the platform device.
+ pub fn id(&self) -> i32 {
+ // SAFETY: By the type invariants, we know that `self.ptr` is non-null and valid.
+ unsafe { (*self.ptr).id }
+ }
+
+ /// Sets the DMA masks (normal and coherent) for a platform device.
+ pub fn set_dma_masks(&mut self, mask: u64) -> Result {
+ to_result(unsafe { bindings::dma_set_mask_and_coherent(&mut (*self.ptr).dev, mask) })
+ }
+
+ /// Gets a system resources of a platform device.
+ pub fn get_resource(&mut self, rtype: IoResource, num: usize) -> Result<Resource> {
+ // SAFETY: `self.ptr` is valid by the type invariant.
+ let res = unsafe { bindings::platform_get_resource(self.ptr, rtype as _, num as _) };
+ if res.is_null() {
+ return Err(EINVAL);
+ }
+
+ // Get the position of the found resource in the array.
+ // SAFETY:
+ // - `self.ptr` is valid by the type invariant.
+ // - `res` is a displaced pointer to one of the array's elements,
+ // and `resource` is its base pointer.
+ let index = unsafe { res.offset_from((*self.ptr).resource) } as usize;
+
+ // Make sure that the index does not exceed the 64-bit mask.
+ assert!(index < 64);
+
+ if self.used_resource >> index & 1 == 1 {
+ return Err(EBUSY);
+ }
+ self.used_resource |= 1 << index;
+
+ // SAFETY: The pointer `res` is returned from `bindings::platform_get_resource`
+ // above and checked if it is not a NULL.
+ unsafe { Resource::new((*res).start, (*res).end, (*res).flags) }.ok_or(EINVAL)
+ }
+
+ /// Ioremaps resources of a platform device.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure that either (a) the resulting interface cannot be used to initiate DMA
+ /// operations, or (b) that DMA operations initiated via the returned interface use DMA handles
+ /// allocated through the `dma` module.
+ pub unsafe fn ioremap_resource<const SIZE: usize>(
+ &mut self,
+ index: usize,
+ ) -> Result<IoMem<SIZE>> {
+ let mask = self.used_resource;
+ let res = self.get_resource(IoResource::Mem, index)?;
+
+ // SAFETY: Valid by the safety contract.
+ let iomem = unsafe { IoMem::<SIZE>::try_new(res) };
+ // If remapping fails, the given resource won't be used, so restore the old mask.
+ if iomem.is_err() {
+ self.used_resource = mask;
+ }
+ iomem
+ }
+}
+
+// SAFETY: The device returned by `raw_device` is the raw platform device.
+unsafe impl device::RawDevice for Device {
+ fn raw_device(&self) -> *mut bindings::device {
+ // SAFETY: By the type invariants, we know that `self.ptr` is non-null and valid.
+ unsafe { &mut (*self.ptr).dev }
+ }
+}
+
+/// Declares a kernel module that exposes a single platform driver.
+///
+/// # Examples
+///
+/// ```ignore
+/// # use kernel::{platform, define_of_id_table, module_platform_driver};
+/// #
+/// struct MyDriver;
+/// impl platform::Driver for MyDriver {
+/// // [...]
+/// # fn probe(_dev: &mut platform::Device, _id_info: Option<&Self::IdInfo>) -> Result {
+/// # Ok(())
+/// # }
+/// # define_of_id_table! {(), [
+/// # (of::DeviceId::Compatible(b"brcm,bcm2835-rng"), None),
+/// # ]}
+/// }
+///
+/// module_platform_driver! {
+/// type: MyDriver,
+/// name: "module_name",
+/// author: "Author name",
+/// license: "GPL",
+/// }
+/// ```
+#[macro_export]
+macro_rules! module_platform_driver {
+ ($($f:tt)*) => {
+ $crate::module_driver!(<T>, $crate::platform::Adapter<T>, { $($f)* });
+ };
+}
diff --git a/rust/kernel/prelude.rs b/rust/kernel/prelude.rs
index 7a90249ee9b9..f145a09603a5 100644
--- a/rust/kernel/prelude.rs
+++ b/rust/kernel/prelude.rs
@@ -11,15 +11,23 @@
//! use kernel::prelude::*;
//! ```
+#[doc(no_inline)]
pub use core::pin::Pin;
+#[doc(no_inline)]
pub use alloc::{boxed::Box, vec::Vec};
+#[doc(no_inline)]
pub use macros::{module, vtable};
pub use super::build_assert;
-pub use super::{dbg, pr_alert, pr_crit, pr_debug, pr_emerg, pr_err, pr_info, pr_notice, pr_warn};
+// `super::std_vendor` is hidden, which makes the macro inline for some reason.
+#[doc(no_inline)]
+pub use super::dbg;
+pub use super::fmt;
+pub use super::{dev_alert, dev_crit, dev_dbg, dev_emerg, dev_err, dev_info, dev_notice, dev_warn};
+pub use super::{pr_alert, pr_crit, pr_debug, pr_emerg, pr_err, pr_info, pr_notice, pr_warn};
pub use super::static_assert;
diff --git a/rust/kernel/revocable.rs b/rust/kernel/revocable.rs
new file mode 100644
index 000000000000..1093c4d26026
--- /dev/null
+++ b/rust/kernel/revocable.rs
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Revocable objects.
+//!
+//! The [`Revocable`] type wraps other types and allows access to them to be revoked. The existence
+//! of a [`RevocableGuard`] ensures that objects remain valid.
+
+use crate::{bindings, sync::rcu};
+use core::{
+ cell::UnsafeCell,
+ marker::PhantomData,
+ mem::MaybeUninit,
+ ops::Deref,
+ ptr::drop_in_place,
+ sync::atomic::{fence, AtomicBool, AtomicU32, Ordering},
+};
+
+/// An object that can become inaccessible at runtime.
+///
+/// Once access is revoked and all concurrent users complete (i.e., all existing instances of
+/// [`RevocableGuard`] are dropped), the wrapped object is also dropped.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::revocable::Revocable;
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// fn add_two(v: &Revocable<Example>) -> Option<u32> {
+/// let guard = v.try_access()?;
+/// Some(guard.a + guard.b)
+/// }
+///
+/// let v = Revocable::new(Example { a: 10, b: 20 });
+/// assert_eq!(add_two(&v), Some(30));
+/// v.revoke();
+/// assert_eq!(add_two(&v), None);
+/// ```
+///
+/// Sample example as above, but explicitly using the rcu read side lock.
+///
+/// ```
+/// # use kernel::revocable::Revocable;
+/// use kernel::sync::rcu;
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// fn add_two(v: &Revocable<Example>) -> Option<u32> {
+/// let guard = rcu::read_lock();
+/// let e = v.try_access_with_guard(&guard)?;
+/// Some(e.a + e.b)
+/// }
+///
+/// let v = Revocable::new(Example { a: 10, b: 20 });
+/// assert_eq!(add_two(&v), Some(30));
+/// v.revoke();
+/// assert_eq!(add_two(&v), None);
+/// ```
+pub struct Revocable<T> {
+ is_available: AtomicBool,
+ data: MaybeUninit<UnsafeCell<T>>,
+}
+
+// SAFETY: `Revocable` is `Send` if the wrapped object is also `Send`. This is because while the
+// functionality exposed by `Revocable` can be accessed from any thread/CPU, it is possible that
+// this isn't supported by the wrapped object.
+unsafe impl<T: Send> Send for Revocable<T> {}
+
+// SAFETY: `Revocable` is `Sync` if the wrapped object is both `Send` and `Sync`. We require `Send`
+// from the wrapped object as well because of `Revocable::revoke`, which can trigger the `Drop`
+// implementation of the wrapped object from an arbitrary thread.
+unsafe impl<T: Sync + Send> Sync for Revocable<T> {}
+
+impl<T> Revocable<T> {
+ /// Creates a new revocable instance of the given data.
+ pub const fn new(data: T) -> Self {
+ Self {
+ is_available: AtomicBool::new(true),
+ data: MaybeUninit::new(UnsafeCell::new(data)),
+ }
+ }
+
+ /// Tries to access the \[revocable\] wrapped object.
+ ///
+ /// Returns `None` if the object has been revoked and is therefore no longer accessible.
+ ///
+ /// Returns a guard that gives access to the object otherwise; the object is guaranteed to
+ /// remain accessible while the guard is alive. In such cases, callers are not allowed to sleep
+ /// because another CPU may be waiting to complete the revocation of this object.
+ pub fn try_access(&self) -> Option<RevocableGuard<'_, T>> {
+ let guard = rcu::read_lock();
+ if self.is_available.load(Ordering::Relaxed) {
+ // SAFETY: Since `self.is_available` is true, data is initialised and has to remain
+ // valid because the RCU read side lock prevents it from being dropped.
+ Some(unsafe { RevocableGuard::new(self.data.assume_init_ref().get(), guard) })
+ } else {
+ None
+ }
+ }
+
+ /// Tries to access the \[revocable\] wrapped object.
+ ///
+ /// Returns `None` if the object has been revoked and is therefore no longer accessible.
+ ///
+ /// Returns a shared reference to the object otherwise; the object is guaranteed to
+ /// remain accessible while the rcu read side guard is alive. In such cases, callers are not
+ /// allowed to sleep because another CPU may be waiting to complete the revocation of this
+ /// object.
+ pub fn try_access_with_guard<'a>(&'a self, _guard: &'a rcu::Guard) -> Option<&'a T> {
+ if self.is_available.load(Ordering::Relaxed) {
+ // SAFETY: Since `self.is_available` is true, data is initialised and has to remain
+ // valid because the RCU read side lock prevents it from being dropped.
+ Some(unsafe { &*self.data.assume_init_ref().get() })
+ } else {
+ None
+ }
+ }
+
+ /// Revokes access to and drops the wrapped object.
+ ///
+ /// Access to the object is revoked immediately to new callers of [`Revocable::try_access`]. If
+ /// there are concurrent users of the object (i.e., ones that called [`Revocable::try_access`]
+ /// beforehand and still haven't dropped the returned guard), this function waits for the
+ /// concurrent access to complete before dropping the wrapped object.
+ pub fn revoke(&self) {
+ if self
+ .is_available
+ .compare_exchange(true, false, Ordering::Relaxed, Ordering::Relaxed)
+ .is_ok()
+ {
+ // SAFETY: Just an FFI call, there are no further requirements.
+ unsafe { bindings::synchronize_rcu() };
+
+ // SAFETY: We know `self.data` is valid because only one CPU can succeed the
+ // `compare_exchange` above that takes `is_available` from `true` to `false`.
+ unsafe { drop_in_place(self.data.assume_init_ref().get()) };
+ }
+ }
+}
+
+impl<T> Drop for Revocable<T> {
+ fn drop(&mut self) {
+ // Drop only if the data hasn't been revoked yet (in which case it has already been
+ // dropped).
+ if *self.is_available.get_mut() {
+ // SAFETY: We know `self.data` is valid because no other CPU has changed
+ // `is_available` to `false` yet, and no other CPU can do it anymore because this CPU
+ // holds the only reference (mutable) to `self` now.
+ unsafe { drop_in_place(self.data.assume_init_ref().get()) };
+ }
+ }
+}
+
+/// A guard that allows access to a revocable object and keeps it alive.
+///
+/// CPUs may not sleep while holding on to [`RevocableGuard`] because it's in atomic context
+/// holding the RCU read-side lock.
+///
+/// # Invariants
+///
+/// The RCU read-side lock is held while the guard is alive.
+pub struct RevocableGuard<'a, T> {
+ data_ref: *const T,
+ _rcu_guard: rcu::Guard,
+ _p: PhantomData<&'a ()>,
+}
+
+impl<T> RevocableGuard<'_, T> {
+ fn new(data_ref: *const T, rcu_guard: rcu::Guard) -> Self {
+ Self {
+ data_ref,
+ _rcu_guard: rcu_guard,
+ _p: PhantomData,
+ }
+ }
+}
+
+impl<T> Deref for RevocableGuard<'_, T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: By the type invariants, we hold the rcu read-side lock, so the object is
+ // guaranteed to remain valid.
+ unsafe { &*self.data_ref }
+ }
+}
+
+/// An object that can become inaccessible at runtime.
+///
+/// Once access is revoked and all concurrent users complete (i.e., all existing instances of
+/// [`AsyncRevocableGuard`] are dropped), the wrapped object is also dropped.
+///
+/// Unlike [`Revocable`], [`AsyncRevocable`] does not wait for concurrent users of the wrapped
+/// object to finish before [`AsyncRevocable::revoke`] completes -- thus the async qualifier. This
+/// has the advantage of not requiring RCU locks or waits of any kind.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::revocable::AsyncRevocable;
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// fn add_two(v: &AsyncRevocable<Example>) -> Option<u32> {
+/// let guard = v.try_access()?;
+/// Some(guard.a + guard.b)
+/// }
+///
+/// let v = AsyncRevocable::new(Example { a: 10, b: 20 });
+/// assert_eq!(add_two(&v), Some(30));
+/// v.revoke();
+/// assert_eq!(add_two(&v), None);
+/// ```
+///
+/// Example where revocation happens while there is a user:
+///
+/// ```
+/// # use kernel::revocable::AsyncRevocable;
+/// use core::sync::atomic::{AtomicBool, Ordering};
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// static DROPPED: AtomicBool = AtomicBool::new(false);
+///
+/// impl Drop for Example {
+/// fn drop(&mut self) {
+/// DROPPED.store(true, Ordering::Relaxed);
+/// }
+/// }
+///
+/// fn add_two(v: &AsyncRevocable<Example>) -> Option<u32> {
+/// let guard = v.try_access()?;
+/// Some(guard.a + guard.b)
+/// }
+///
+/// let v = AsyncRevocable::new(Example { a: 10, b: 20 });
+/// assert_eq!(add_two(&v), Some(30));
+///
+/// let guard = v.try_access().unwrap();
+/// assert!(!v.is_revoked());
+/// assert!(!DROPPED.load(Ordering::Relaxed));
+/// v.revoke();
+/// assert!(!DROPPED.load(Ordering::Relaxed));
+/// assert!(v.is_revoked());
+/// assert!(v.try_access().is_none());
+/// assert_eq!(guard.a + guard.b, 30);
+/// drop(guard);
+/// assert!(DROPPED.load(Ordering::Relaxed));
+/// ```
+pub struct AsyncRevocable<T> {
+ usage_count: AtomicU32,
+ data: MaybeUninit<UnsafeCell<T>>,
+}
+
+// SAFETY: `AsyncRevocable` is `Send` if the wrapped object is also `Send`. This is because while
+// the functionality exposed by `AsyncRevocable` can be accessed from any thread/CPU, it is
+// possible that this isn't supported by the wrapped object.
+unsafe impl<T: Send> Send for AsyncRevocable<T> {}
+
+// SAFETY: `AsyncRevocable` is `Sync` if the wrapped object is both `Send` and `Sync`. We require
+// `Send` from the wrapped object as well because of `AsyncRevocable::revoke`, which can trigger
+// the `Drop` implementation of the wrapped object from an arbitrary thread.
+unsafe impl<T: Sync + Send> Sync for AsyncRevocable<T> {}
+
+const REVOKED: u32 = 0x80000000;
+const COUNT_MASK: u32 = !REVOKED;
+const SATURATED_COUNT: u32 = REVOKED - 1;
+
+impl<T> AsyncRevocable<T> {
+ /// Creates a new asynchronously revocable instance of the given data.
+ pub fn new(data: T) -> Self {
+ Self {
+ usage_count: AtomicU32::new(0),
+ data: MaybeUninit::new(UnsafeCell::new(data)),
+ }
+ }
+
+ /// Tries to access the \[revocable\] wrapped object.
+ ///
+ /// Returns `None` if the object has been revoked and is therefore no longer accessible.
+ ///
+ /// Returns a guard that gives access to the object otherwise; the object is guaranteed to
+ /// remain accessible while the guard is alive.
+ pub fn try_access(&self) -> Option<AsyncRevocableGuard<'_, T>> {
+ loop {
+ let count = self.usage_count.load(Ordering::Relaxed);
+
+ // Fail attempt to access if the object is already revoked.
+ if count & REVOKED != 0 {
+ return None;
+ }
+
+ // No need to increment if the count is saturated.
+ if count == SATURATED_COUNT
+ || self
+ .usage_count
+ .compare_exchange(count, count + 1, Ordering::Relaxed, Ordering::Relaxed)
+ .is_ok()
+ {
+ return Some(AsyncRevocableGuard { revocable: self });
+ }
+ }
+ }
+
+ /// Revokes access to the protected object.
+ ///
+ /// Returns `true` if access has been revoked, or `false` when the object has already been
+ /// revoked by a previous call to [`AsyncRevocable::revoke`].
+ ///
+ /// This call is non-blocking, that is, no new users of the revocable object will be allowed,
+ /// but potential current users are able to continue to use it and the thread won't wait for
+ /// them to finish. In such cases, the object will be dropped when the last user completes.
+ pub fn revoke(&self) -> bool {
+ // Set the `REVOKED` bit.
+ //
+ // The acquire barrier matches up with the release when decrementing the usage count.
+ let prev = self.usage_count.fetch_or(REVOKED, Ordering::Acquire);
+ if prev & REVOKED != 0 {
+ // Another thread already revoked this object.
+ return false;
+ }
+
+ if prev == 0 {
+ // SAFETY: This thread just revoked the object and the usage count is zero, so the
+ // object is valid and there will be no future users.
+ unsafe { drop_in_place(UnsafeCell::raw_get(self.data.as_ptr())) };
+ }
+
+ true
+ }
+
+ /// Returns whether access to the object has been revoked.
+ pub fn is_revoked(&self) -> bool {
+ self.usage_count.load(Ordering::Relaxed) & REVOKED != 0
+ }
+}
+
+impl<T> Drop for AsyncRevocable<T> {
+ fn drop(&mut self) {
+ let count = *self.usage_count.get_mut();
+ if count != REVOKED {
+ // The object hasn't been dropped yet, so we do it now.
+
+ // This matches with the release when decrementing the usage count.
+ fence(Ordering::Acquire);
+
+ // SAFETY: Since `count` is does not indicate a count of 0 and the REVOKED bit set, the
+ // object is still valid.
+ unsafe { drop_in_place(UnsafeCell::raw_get(self.data.as_ptr())) };
+ }
+ }
+}
+
+/// A guard that allows access to a revocable object and keeps it alive.
+///
+/// # Invariants
+///
+/// The owner owns an increment on the usage count (which may have saturated it), which keeps the
+/// revocable object alive.
+pub struct AsyncRevocableGuard<'a, T> {
+ revocable: &'a AsyncRevocable<T>,
+}
+
+impl<T> Deref for AsyncRevocableGuard<'_, T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: The type invariants guarantee that the caller owns an increment.
+ unsafe { &*self.revocable.data.assume_init_ref().get() }
+ }
+}
+
+impl<T> Drop for AsyncRevocableGuard<'_, T> {
+ fn drop(&mut self) {
+ loop {
+ let count = self.revocable.usage_count.load(Ordering::Relaxed);
+ let actual_count = count & COUNT_MASK;
+ if actual_count == SATURATED_COUNT {
+ // The count is saturated, so we won't decrement (nor do we drop the object).
+ return;
+ }
+
+ if actual_count == 0 {
+ // Trying to underflow the count.
+ panic!("actual_count is zero");
+ }
+
+ // On success, we use release ordering, which matches with the acquire in one of the
+ // places where we drop the object, namely: below, in `AsyncRevocable::revoke`, or in
+ // `AsyncRevocable::drop`.
+ if self
+ .revocable
+ .usage_count
+ .compare_exchange(count, count - 1, Ordering::Release, Ordering::Relaxed)
+ .is_ok()
+ {
+ if count == 1 | REVOKED {
+ // `count` is now zero and it is revoked, so free it now.
+
+ // This matches with the release above (which may have happened in other
+ // threads concurrently).
+ fence(Ordering::Acquire);
+
+ // SAFETY: Since `count` was 1, the object is still alive.
+ unsafe { drop_in_place(UnsafeCell::raw_get(self.revocable.data.as_ptr())) };
+ }
+
+ return;
+ }
+ }
+ }
+}
diff --git a/rust/kernel/soc/apple/mod.rs b/rust/kernel/soc/apple/mod.rs
new file mode 100644
index 000000000000..dd69db63677d
--- /dev/null
+++ b/rust/kernel/soc/apple/mod.rs
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Apple SoC drivers
+
+#[cfg(CONFIG_APPLE_RTKIT = "y")]
+pub mod rtkit;
diff --git a/rust/kernel/soc/apple/rtkit.rs b/rust/kernel/soc/apple/rtkit.rs
new file mode 100644
index 000000000000..bde63cd00193
--- /dev/null
+++ b/rust/kernel/soc/apple/rtkit.rs
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0-only OR MIT
+
+//! Support for Apple RTKit coprocessors.
+//!
+//! C header: [`include/linux/soc/apple/rtkit.h`](../../../../include/linux/gpio/driver.h)
+
+use crate::{
+ bindings, device,
+ error::{code::*, from_kernel_err_ptr, to_result, Error, Result},
+ str::CStr,
+ types::{ForeignOwnable, ScopeGuard},
+};
+
+use alloc::boxed::Box;
+use core::marker::PhantomData;
+use core::ptr;
+use macros::vtable;
+
+/// Trait to represent allocatable buffers for the RTKit core.
+///
+/// Users must implement this trait for their own representation of those allocations.
+pub trait Buffer {
+ /// Returns the IOVA (virtual address) of the buffer from RTKit's point of view, or an error if
+ /// unavailable.
+ fn iova(&self) -> Result<usize>;
+
+ /// Returns a mutable byte slice of the buffer contents, or an
+ /// error if unavailable.
+ fn buf(&mut self) -> Result<&mut [u8]>;
+}
+
+/// Callback operations for an RTKit client.
+#[vtable]
+pub trait Operations {
+ /// Arbitrary user context type.
+ type Data: ForeignOwnable + Send + Sync;
+
+ /// Type representing an allocated buffer for RTKit.
+ type Buffer: Buffer;
+
+ /// Called when RTKit crashes.
+ fn crashed(_data: <Self::Data as ForeignOwnable>::Borrowed<'_>) {}
+
+ /// Called when a message was received on a non-system endpoint. Called in non-IRQ context.
+ fn recv_message(
+ _data: <Self::Data as ForeignOwnable>::Borrowed<'_>,
+ _endpoint: u8,
+ _message: u64,
+ ) {
+ }
+
+ /// Called in IRQ context when a message was received on a non-system endpoint.
+ ///
+ /// Must return `true` if the message is handled, or `false` to process it in
+ /// the handling thread.
+ fn recv_message_early(
+ _data: <Self::Data as ForeignOwnable>::Borrowed<'_>,
+ _endpoint: u8,
+ _message: u64,
+ ) -> bool {
+ false
+ }
+
+ /// Allocate a buffer for use by RTKit.
+ fn shmem_alloc(
+ _data: <Self::Data as ForeignOwnable>::Borrowed<'_>,
+ _size: usize,
+ ) -> Result<Self::Buffer> {
+ Err(EINVAL)
+ }
+
+ /// Map an existing buffer used by RTKit at a device-specified virtual address.
+ fn shmem_map(
+ _data: <Self::Data as ForeignOwnable>::Borrowed<'_>,
+ _iova: usize,
+ _size: usize,
+ ) -> Result<Self::Buffer> {
+ Err(EINVAL)
+ }
+}
+
+/// Represents `struct apple_rtkit *`.
+///
+/// # Invariants
+///
+/// The rtk pointer is valid.
+/// The data pointer is a valid pointer from T::Data::into_foreign().
+pub struct RtKit<T: Operations> {
+ rtk: *mut bindings::apple_rtkit,
+ data: *mut core::ffi::c_void,
+ _p: PhantomData<T>,
+}
+
+unsafe extern "C" fn crashed_callback<T: Operations>(cookie: *mut core::ffi::c_void) {
+ T::crashed(unsafe { T::Data::borrow(cookie) });
+}
+
+unsafe extern "C" fn recv_message_callback<T: Operations>(
+ cookie: *mut core::ffi::c_void,
+ endpoint: u8,
+ message: u64,
+) {
+ T::recv_message(unsafe { T::Data::borrow(cookie) }, endpoint, message);
+}
+
+unsafe extern "C" fn recv_message_early_callback<T: Operations>(
+ cookie: *mut core::ffi::c_void,
+ endpoint: u8,
+ message: u64,
+) -> bool {
+ T::recv_message_early(unsafe { T::Data::borrow(cookie) }, endpoint, message)
+}
+
+unsafe extern "C" fn shmem_setup_callback<T: Operations>(
+ cookie: *mut core::ffi::c_void,
+ bfr: *mut bindings::apple_rtkit_shmem,
+) -> core::ffi::c_int {
+ // SAFETY: `bfr` is a valid buffer
+ let bfr_mut = unsafe { &mut *bfr };
+
+ let buf = if bfr_mut.iova != 0 {
+ bfr_mut.is_mapped = true;
+ T::shmem_map(
+ // SAFETY: `cookie` came from a previous call to `into_foreign`.
+ unsafe { T::Data::borrow(cookie) },
+ bfr_mut.iova as usize,
+ bfr_mut.size,
+ )
+ } else {
+ bfr_mut.is_mapped = false;
+ // SAFETY: `cookie` came from a previous call to `into_foreign`.
+ T::shmem_alloc(unsafe { T::Data::borrow(cookie) }, bfr_mut.size)
+ };
+
+ let mut buf = match buf {
+ Err(e) => {
+ return e.to_kernel_errno();
+ }
+ Ok(buf) => buf,
+ };
+
+ let iova = match buf.iova() {
+ Err(e) => {
+ return e.to_kernel_errno();
+ }
+ Ok(iova) => iova,
+ };
+
+ let slice = match buf.buf() {
+ Err(e) => {
+ return e.to_kernel_errno();
+ }
+ Ok(slice) => slice,
+ };
+
+ if slice.len() < bfr_mut.size {
+ return ENOMEM.to_kernel_errno();
+ }
+
+ bfr_mut.iova = iova as u64;
+ bfr_mut.buffer = slice.as_mut_ptr() as *mut _;
+
+ // Now box the returned buffer type and stash it in the private pointer of the
+ // `apple_rtkit_shmem` struct for safekeeping.
+ match Box::try_new(buf) {
+ Err(e) => Error::from(e).to_kernel_errno(),
+ Ok(boxed) => {
+ bfr_mut.private = Box::into_raw(boxed) as *mut _;
+ 0
+ }
+ }
+}
+
+unsafe extern "C" fn shmem_destroy_callback<T: Operations>(
+ _cookie: *mut core::ffi::c_void,
+ bfr: *mut bindings::apple_rtkit_shmem,
+) {
+ let bfr_mut = unsafe { &mut *bfr };
+ // SAFETY: Per shmem_setup_callback, this has to be a pointer to a Buffer if it is set.
+ if !bfr_mut.private.is_null() {
+ unsafe {
+ core::mem::drop(Box::from_raw(bfr_mut.private as *mut T::Buffer));
+ }
+ bfr_mut.private = core::ptr::null_mut();
+ }
+}
+
+impl<T: Operations> RtKit<T> {
+ const VTABLE: bindings::apple_rtkit_ops = bindings::apple_rtkit_ops {
+ crashed: Some(crashed_callback::<T>),
+ recv_message: Some(recv_message_callback::<T>),
+ recv_message_early: Some(recv_message_early_callback::<T>),
+ shmem_setup: if T::HAS_SHMEM_ALLOC || T::HAS_SHMEM_MAP {
+ Some(shmem_setup_callback::<T>)
+ } else {
+ None
+ },
+ shmem_destroy: if T::HAS_SHMEM_ALLOC || T::HAS_SHMEM_MAP {
+ Some(shmem_destroy_callback::<T>)
+ } else {
+ None
+ },
+ };
+
+ /// Creates a new RTKit client for a given device and optional mailbox name or index.
+ pub fn new(
+ dev: &dyn device::RawDevice,
+ mbox_name: Option<&'static CStr>,
+ mbox_idx: usize,
+ data: T::Data,
+ ) -> Result<Self> {
+ let ptr = data.into_foreign() as *mut _;
+ let guard = ScopeGuard::new(|| {
+ // SAFETY: `ptr` came from a previous call to `into_foreign`.
+ unsafe { T::Data::from_foreign(ptr) };
+ });
+ // SAFETY: This just calls the C init function.
+ let rtk = unsafe {
+ from_kernel_err_ptr(bindings::apple_rtkit_init(
+ dev.raw_device(),
+ ptr,
+ match mbox_name {
+ Some(s) => s.as_char_ptr(),
+ None => ptr::null(),
+ },
+ mbox_idx.try_into()?,
+ &Self::VTABLE,
+ ))
+ }?;
+
+ guard.dismiss();
+ // INVARIANT: `rtk` and `data` are valid here.
+ Ok(Self {
+ rtk,
+ data: ptr,
+ _p: PhantomData,
+ })
+ }
+
+ /// Boots (wakes up) the RTKit coprocessor.
+ pub fn boot(&mut self) -> Result {
+ // SAFETY: `rtk` is valid per the type invariant.
+ to_result(unsafe { bindings::apple_rtkit_boot(self.rtk) })
+ }
+
+ /// Starts a non-system endpoint.
+ pub fn start_endpoint(&mut self, endpoint: u8) -> Result {
+ // SAFETY: `rtk` is valid per the type invariant.
+ to_result(unsafe { bindings::apple_rtkit_start_ep(self.rtk, endpoint) })
+ }
+
+ /// Sends a message to a given endpoint.
+ pub fn send_message(&mut self, endpoint: u8, message: u64) -> Result {
+ // SAFETY: `rtk` is valid per the type invariant.
+ to_result(unsafe {
+ bindings::apple_rtkit_send_message(self.rtk, endpoint, message, ptr::null_mut(), false)
+ })
+ }
+}
+
+// SAFETY: `RtKit` operations require a mutable reference
+unsafe impl<T: Operations> Sync for RtKit<T> {}
+
+// SAFETY: `RtKit` operations require a mutable reference
+unsafe impl<T: Operations> Send for RtKit<T> {}
+
+impl<T: Operations> Drop for RtKit<T> {
+ fn drop(&mut self) {
+ // SAFETY: The pointer is valid by the type invariant.
+ unsafe { bindings::apple_rtkit_free(self.rtk) };
+
+ // Free context data.
+ //
+ // SAFETY: This matches the call to `into_foreign` from `new` in the success case.
+ unsafe { T::Data::from_foreign(self.data) };
+ }
+}
diff --git a/rust/kernel/soc/mod.rs b/rust/kernel/soc/mod.rs
new file mode 100644
index 000000000000..e3024042e74f
--- /dev/null
+++ b/rust/kernel/soc/mod.rs
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! SoC drivers
+
+pub mod apple;
diff --git a/rust/kernel/std_vendor.rs b/rust/kernel/std_vendor.rs
index b3e68b24a8c6..2e864354cac1 100644
--- a/rust/kernel/std_vendor.rs
+++ b/rust/kernel/std_vendor.rs
@@ -137,6 +137,7 @@
/// [`std::dbg`]: https://doc.rust-lang.org/std/macro.dbg.html
/// [`eprintln`]: https://doc.rust-lang.org/std/macro.eprintln.html
/// [`printk`]: https://www.kernel.org/doc/html/latest/core-api/printk-basics.html
+#[allow(rustdoc::broken_intra_doc_links)]
#[macro_export]
macro_rules! dbg {
// NOTE: We cannot use `concat!` to make a static string as a format argument
diff --git a/rust/kernel/sync.rs b/rust/kernel/sync.rs
new file mode 100644
index 000000000000..c86faf35999c
--- /dev/null
+++ b/rust/kernel/sync.rs
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Synchronisation primitives.
+//!
+//! This module contains the kernel APIs related to synchronisation that have been ported or
+//! wrapped for usage by Rust code in the kernel.
+
+mod arc;
+mod condvar;
+mod guard;
+mod mutex;
+pub mod rcu;
+mod revocable;
+pub mod smutex;
+
+use crate::{bindings, str::CStr};
+use core::{cell::UnsafeCell, mem::MaybeUninit, pin::Pin};
+
+pub use arc::{Arc, ArcBorrow, UniqueArc};
+pub use condvar::CondVar;
+pub use guard::{Guard, Lock, LockFactory, LockInfo, LockIniter, ReadLock, WriteLock};
+pub use mutex::{Mutex, RevocableMutex, RevocableMutexGuard};
+pub use revocable::{Revocable, RevocableGuard};
+
+/// Represents a lockdep class. It's a wrapper around C's `lock_class_key`.
+#[repr(transparent)]
+pub struct LockClassKey(UnsafeCell<MaybeUninit<bindings::lock_class_key>>);
+
+// SAFETY: This is a wrapper around a lock class key, so it is safe to use references to it from
+// any thread.
+unsafe impl Sync for LockClassKey {}
+
+impl LockClassKey {
+ /// Creates a new lock class key.
+ pub const fn new() -> Self {
+ Self(UnsafeCell::new(MaybeUninit::uninit()))
+ }
+
+ #[allow(dead_code)]
+ pub(crate) fn get(&self) -> *mut bindings::lock_class_key {
+ self.0.get().cast()
+ }
+}
+
+/// Safely initialises an object that has an `init` function that takes a name and a lock class as
+/// arguments, examples of these are [`Mutex`] and [`SpinLock`]. Each of them also provides a more
+/// specialised name that uses this macro.
+#[doc(hidden)]
+#[macro_export]
+macro_rules! init_with_lockdep {
+ ($obj:expr, $name:expr) => {{
+ static CLASS1: $crate::sync::LockClassKey = $crate::sync::LockClassKey::new();
+ static CLASS2: $crate::sync::LockClassKey = $crate::sync::LockClassKey::new();
+ let obj = $obj;
+ let name = $crate::c_str!($name);
+ $crate::sync::NeedsLockClass::init(obj, name, &CLASS1, &CLASS2)
+ }};
+}
+
+/// A trait for types that need a lock class during initialisation.
+///
+/// Implementers of this trait benefit from the [`init_with_lockdep`] macro that generates a new
+/// class for each initialisation call site.
+pub trait NeedsLockClass {
+ /// Initialises the type instance so that it can be safely used.
+ ///
+ /// Callers are encouraged to use the [`init_with_lockdep`] macro as it automatically creates a
+ /// new lock class on each usage.
+ fn init(
+ self: Pin<&mut Self>,
+ name: &'static CStr,
+ key1: &'static LockClassKey,
+ key2: &'static LockClassKey,
+ );
+}
diff --git a/rust/kernel/sync/arc.rs b/rust/kernel/sync/arc.rs
new file mode 100644
index 000000000000..a1cc6c0882c4
--- /dev/null
+++ b/rust/kernel/sync/arc.rs
@@ -0,0 +1,577 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! A reference-counted pointer.
+//!
+//! This module implements a way for users to create reference-counted objects and pointers to
+//! them. Such a pointer automatically increments and decrements the count, and drops the
+//! underlying object when it reaches zero. It is also safe to use concurrently from multiple
+//! threads.
+//!
+//! It is different from the standard library's [`Arc`] in a few ways:
+//! 1. It is backed by the kernel's `refcount_t` type.
+//! 2. It does not support weak references, which allows it to be half the size.
+//! 3. It saturates the reference count instead of aborting when it goes over a threshold.
+//! 4. It does not provide a `get_mut` method, so the ref counted object is pinned.
+//!
+//! [`Arc`]: https://doc.rust-lang.org/std/sync/struct.Arc.html
+
+use crate::{
+ bindings,
+ error::Result,
+ types::{ForeignOwnable, Opaque},
+};
+use alloc::boxed::Box;
+use core::{
+ any::Any,
+ fmt,
+ marker::{PhantomData, Unsize},
+ mem::{ManuallyDrop, MaybeUninit},
+ ops::{Deref, DerefMut},
+ pin::Pin,
+ ptr::NonNull,
+};
+
+/// A reference-counted pointer to an instance of `T`.
+///
+/// The reference count is incremented when new instances of [`Arc`] are created, and decremented
+/// when they are dropped. When the count reaches zero, the underlying `T` is also dropped.
+///
+/// # Invariants
+///
+/// The reference count on an instance of [`Arc`] is always non-zero.
+/// The object pointed to by [`Arc`] is always pinned.
+///
+/// # Examples
+///
+/// ```
+/// use kernel::sync::Arc;
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// // Create a ref-counted instance of `Example`.
+/// let obj = Arc::try_new(Example { a: 10, b: 20 })?;
+///
+/// // Get a new pointer to `obj` and increment the refcount.
+/// let cloned = obj.clone();
+///
+/// // Assert that both `obj` and `cloned` point to the same underlying object.
+/// assert!(core::ptr::eq(&*obj, &*cloned));
+///
+/// // Destroy `obj` and decrement its refcount.
+/// drop(obj);
+///
+/// // Check that the values are still accessible through `cloned`.
+/// assert_eq!(cloned.a, 10);
+/// assert_eq!(cloned.b, 20);
+///
+/// // The refcount drops to zero when `cloned` goes out of scope, and the memory is freed.
+/// ```
+///
+/// Using `Arc<T>` as the type of `self`:
+///
+/// ```
+/// use kernel::sync::Arc;
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// impl Example {
+/// fn take_over(self: Arc<Self>) {
+/// // ...
+/// }
+///
+/// fn use_reference(self: &Arc<Self>) {
+/// // ...
+/// }
+/// }
+///
+/// let obj = Arc::try_new(Example { a: 10, b: 20 })?;
+/// obj.use_reference();
+/// obj.take_over();
+/// ```
+///
+/// Coercion from `Arc<Example>` to `Arc<dyn MyTrait>`:
+///
+/// ```
+/// use kernel::sync::{Arc, ArcBorrow};
+///
+/// trait MyTrait {
+/// // Trait has a function whose `self` type is `Arc<Self>`.
+/// fn example1(self: Arc<Self>) {}
+///
+/// // Trait has a function whose `self` type is `ArcBorrow<'_, Self>`.
+/// fn example2(self: ArcBorrow<'_, Self>) {}
+/// }
+///
+/// struct Example;
+/// impl MyTrait for Example {}
+///
+/// // `obj` has type `Arc<Example>`.
+/// let obj: Arc<Example> = Arc::try_new(Example)?;
+///
+/// // `coerced` has type `Arc<dyn MyTrait>`.
+/// let coerced: Arc<dyn MyTrait> = obj;
+/// ```
+pub struct Arc<T: ?Sized> {
+ ptr: NonNull<ArcInner<T>>,
+ _p: PhantomData<ArcInner<T>>,
+}
+
+#[repr(C)]
+struct ArcInner<T: ?Sized> {
+ refcount: Opaque<bindings::refcount_t>,
+ data: T,
+}
+
+// This is to allow [`Arc`] (and variants) to be used as the type of `self`.
+impl<T: ?Sized> core::ops::Receiver for Arc<T> {}
+
+// This is to allow coercion from `Arc<T>` to `Arc<U>` if `T` can be converted to the
+// dynamically-sized type (DST) `U`.
+impl<T: ?Sized + Unsize<U>, U: ?Sized> core::ops::CoerceUnsized<Arc<U>> for Arc<T> {}
+
+// This is to allow `Arc<U>` to be dispatched on when `Arc<T>` can be coerced into `Arc<U>`.
+impl<T: ?Sized + Unsize<U>, U: ?Sized> core::ops::DispatchFromDyn<Arc<U>> for Arc<T> {}
+
+// SAFETY: It is safe to send `Arc<T>` to another thread when the underlying `T` is `Sync` because
+// it effectively means sharing `&T` (which is safe because `T` is `Sync`); additionally, it needs
+// `T` to be `Send` because any thread that has an `Arc<T>` may ultimately access `T` directly, for
+// example, when the reference count reaches zero and `T` is dropped.
+unsafe impl<T: ?Sized + Sync + Send> Send for Arc<T> {}
+
+// SAFETY: It is safe to send `&Arc<T>` to another thread when the underlying `T` is `Sync` for the
+// same reason as above. `T` needs to be `Send` as well because a thread can clone an `&Arc<T>`
+// into an `Arc<T>`, which may lead to `T` being accessed by the same reasoning as above.
+unsafe impl<T: ?Sized + Sync + Send> Sync for Arc<T> {}
+
+impl<T> Arc<T> {
+ /// Constructs a new reference counted instance of `T`.
+ pub fn try_new(contents: T) -> Result<Self> {
+ // INVARIANT: The refcount is initialised to a non-zero value.
+ let value = ArcInner {
+ // SAFETY: There are no safety requirements for this FFI call.
+ refcount: Opaque::new(unsafe { bindings::REFCOUNT_INIT(1) }),
+ data: contents,
+ };
+
+ let inner = Box::try_new(value)?;
+
+ // SAFETY: We just created `inner` with a reference count of 1, which is owned by the new
+ // `Arc` object.
+ Ok(unsafe { Self::from_inner(Box::leak(inner).into()) })
+ }
+}
+
+impl<T: ?Sized> Arc<T> {
+ /// Constructs a new [`Arc`] from an existing [`ArcInner`].
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure that `inner` points to a valid location and has a non-zero reference
+ /// count, one of which will be owned by the new [`Arc`] instance.
+ unsafe fn from_inner(inner: NonNull<ArcInner<T>>) -> Self {
+ // INVARIANT: By the safety requirements, the invariants hold.
+ Arc {
+ ptr: inner,
+ _p: PhantomData,
+ }
+ }
+
+ /// Returns an [`ArcBorrow`] from the given [`Arc`].
+ ///
+ /// This is useful when the argument of a function call is an [`ArcBorrow`] (e.g., in a method
+ /// receiver), but we have an [`Arc`] instead. Getting an [`ArcBorrow`] is free when optimised.
+ #[inline]
+ pub fn as_arc_borrow(&self) -> ArcBorrow<'_, T> {
+ // SAFETY: The constraint that the lifetime of the shared reference must outlive that of
+ // the returned `ArcBorrow` ensures that the object remains alive and that no mutable
+ // reference can be created.
+ unsafe { ArcBorrow::new(self.ptr) }
+ }
+}
+
+impl<T: 'static> ForeignOwnable for Arc<T> {
+ type Borrowed<'a> = ArcBorrow<'a, T>;
+
+ fn into_foreign(self) -> *const core::ffi::c_void {
+ ManuallyDrop::new(self).ptr.as_ptr() as _
+ }
+
+ unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> ArcBorrow<'a, T> {
+ // SAFETY: By the safety requirement of this function, we know that `ptr` came from
+ // a previous call to `Arc::into_foreign`.
+ let inner = NonNull::new(ptr as *mut ArcInner<T>).unwrap();
+
+ // SAFETY: The safety requirements of `from_foreign` ensure that the object remains alive
+ // for the lifetime of the returned value. Additionally, the safety requirements of
+ // `ForeignOwnable::borrow_mut` ensure that no new mutable references are created.
+ unsafe { ArcBorrow::new(inner) }
+ }
+
+ unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self {
+ // SAFETY: By the safety requirement of this function, we know that `ptr` came from
+ // a previous call to `Arc::into_foreign`, which guarantees that `ptr` is valid and
+ // holds a reference count increment that is transferrable to us.
+ unsafe { Self::from_inner(NonNull::new(ptr as _).unwrap()) }
+ }
+}
+
+impl Arc<dyn Any + Send + Sync> {
+ /// Attempt to downcast the `Arc<dyn Any + Send + Sync>` to a concrete type.
+ pub fn downcast<T>(self) -> core::result::Result<Arc<T>, Self>
+ where
+ T: Any + Send + Sync,
+ {
+ if (*self).is::<T>() {
+ // SAFETY: We have just checked that the type is correct, so we can cast the pointer.
+ unsafe {
+ let ptr = self.ptr.cast::<ArcInner<T>>();
+ core::mem::forget(self);
+ Ok(Arc::from_inner(ptr))
+ }
+ } else {
+ Err(self)
+ }
+ }
+}
+
+impl<T: ?Sized> Deref for Arc<T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is
+ // safe to dereference it.
+ unsafe { &self.ptr.as_ref().data }
+ }
+}
+
+impl<T: ?Sized> Clone for Arc<T> {
+ fn clone(&self) -> Self {
+ // INVARIANT: C `refcount_inc` saturates the refcount, so it cannot overflow to zero.
+ // SAFETY: By the type invariant, there is necessarily a reference to the object, so it is
+ // safe to increment the refcount.
+ unsafe { bindings::refcount_inc(self.ptr.as_ref().refcount.get()) };
+
+ // SAFETY: We just incremented the refcount. This increment is now owned by the new `Arc`.
+ unsafe { Self::from_inner(self.ptr) }
+ }
+}
+
+impl<T: ?Sized> Drop for Arc<T> {
+ fn drop(&mut self) {
+ // SAFETY: By the type invariant, there is necessarily a reference to the object. We cannot
+ // touch `refcount` after it's decremented to a non-zero value because another thread/CPU
+ // may concurrently decrement it to zero and free it. It is ok to have a raw pointer to
+ // freed/invalid memory as long as it is never dereferenced.
+ let refcount = unsafe { self.ptr.as_ref() }.refcount.get();
+
+ // INVARIANT: If the refcount reaches zero, there are no other instances of `Arc`, and
+ // this instance is being dropped, so the broken invariant is not observable.
+ // SAFETY: Also by the type invariant, we are allowed to decrement the refcount.
+ let is_zero = unsafe { bindings::refcount_dec_and_test(refcount) };
+ if is_zero {
+ // The count reached zero, we must free the memory.
+ //
+ // SAFETY: The pointer was initialised from the result of `Box::leak`.
+ unsafe { Box::from_raw(self.ptr.as_ptr()) };
+ }
+ }
+}
+
+impl<T: ?Sized> From<UniqueArc<T>> for Arc<T> {
+ fn from(item: UniqueArc<T>) -> Self {
+ item.inner
+ }
+}
+
+impl<T: ?Sized> From<Pin<UniqueArc<T>>> for Arc<T> {
+ fn from(item: Pin<UniqueArc<T>>) -> Self {
+ // SAFETY: The type invariants of `Arc` guarantee that the data is pinned.
+ unsafe { Pin::into_inner_unchecked(item).inner }
+ }
+}
+
+/// A borrowed reference to an [`Arc`] instance.
+///
+/// For cases when one doesn't ever need to increment the refcount on the allocation, it is simpler
+/// to use just `&T`, which we can trivially get from an `Arc<T>` instance.
+///
+/// However, when one may need to increment the refcount, it is preferable to use an `ArcBorrow<T>`
+/// over `&Arc<T>` because the latter results in a double-indirection: a pointer (shared reference)
+/// to a pointer (`Arc<T>`) to the object (`T`). An [`ArcBorrow`] eliminates this double
+/// indirection while still allowing one to increment the refcount and getting an `Arc<T>` when/if
+/// needed.
+///
+/// # Invariants
+///
+/// There are no mutable references to the underlying [`Arc`], and it remains valid for the
+/// lifetime of the [`ArcBorrow`] instance.
+///
+/// # Example
+///
+/// ```
+/// use crate::sync::{Arc, ArcBorrow};
+///
+/// struct Example;
+///
+/// fn do_something(e: ArcBorrow<'_, Example>) -> Arc<Example> {
+/// e.into()
+/// }
+///
+/// let obj = Arc::try_new(Example)?;
+/// let cloned = do_something(obj.as_arc_borrow());
+///
+/// // Assert that both `obj` and `cloned` point to the same underlying object.
+/// assert!(core::ptr::eq(&*obj, &*cloned));
+/// ```
+///
+/// Using `ArcBorrow<T>` as the type of `self`:
+///
+/// ```
+/// use crate::sync::{Arc, ArcBorrow};
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// impl Example {
+/// fn use_reference(self: ArcBorrow<'_, Self>) {
+/// // ...
+/// }
+/// }
+///
+/// let obj = Arc::try_new(Example { a: 10, b: 20 })?;
+/// obj.as_arc_borrow().use_reference();
+/// ```
+pub struct ArcBorrow<'a, T: ?Sized + 'a> {
+ inner: NonNull<ArcInner<T>>,
+ _p: PhantomData<&'a ()>,
+}
+
+// This is to allow [`ArcBorrow`] (and variants) to be used as the type of `self`.
+impl<T: ?Sized> core::ops::Receiver for ArcBorrow<'_, T> {}
+
+// This is to allow `ArcBorrow<U>` to be dispatched on when `ArcBorrow<T>` can be coerced into
+// `ArcBorrow<U>`.
+impl<T: ?Sized + Unsize<U>, U: ?Sized> core::ops::DispatchFromDyn<ArcBorrow<'_, U>>
+ for ArcBorrow<'_, T>
+{
+}
+
+impl<T: ?Sized> Clone for ArcBorrow<'_, T> {
+ fn clone(&self) -> Self {
+ *self
+ }
+}
+
+impl<T: ?Sized> Copy for ArcBorrow<'_, T> {}
+
+impl<T: ?Sized> ArcBorrow<'_, T> {
+ /// Creates a new [`ArcBorrow`] instance.
+ ///
+ /// # Safety
+ ///
+ /// Callers must ensure the following for the lifetime of the returned [`ArcBorrow`] instance:
+ /// 1. That `inner` remains valid;
+ /// 2. That no mutable references to `inner` are created.
+ unsafe fn new(inner: NonNull<ArcInner<T>>) -> Self {
+ // INVARIANT: The safety requirements guarantee the invariants.
+ Self {
+ inner,
+ _p: PhantomData,
+ }
+ }
+}
+
+impl<T: ?Sized> From<ArcBorrow<'_, T>> for Arc<T> {
+ fn from(b: ArcBorrow<'_, T>) -> Self {
+ // SAFETY: The existence of `b` guarantees that the refcount is non-zero. `ManuallyDrop`
+ // guarantees that `drop` isn't called, so it's ok that the temporary `Arc` doesn't own the
+ // increment.
+ ManuallyDrop::new(unsafe { Arc::from_inner(b.inner) })
+ .deref()
+ .clone()
+ }
+}
+
+impl<T: ?Sized> Deref for ArcBorrow<'_, T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: By the type invariant, the underlying object is still alive with no mutable
+ // references to it, so it is safe to create a shared reference.
+ unsafe { &self.inner.as_ref().data }
+ }
+}
+
+/// A refcounted object that is known to have a refcount of 1.
+///
+/// It is mutable and can be converted to an [`Arc`] so that it can be shared.
+///
+/// # Invariants
+///
+/// `inner` always has a reference count of 1.
+///
+/// # Examples
+///
+/// In the following example, we make changes to the inner object before turning it into an
+/// `Arc<Test>` object (after which point, it cannot be mutated directly). Note that `x.into()`
+/// cannot fail.
+///
+/// ```
+/// use kernel::sync::{Arc, UniqueArc};
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// fn test() -> Result<Arc<Example>> {
+/// let mut x = UniqueArc::try_new(Example { a: 10, b: 20 })?;
+/// x.a += 1;
+/// x.b += 1;
+/// Ok(x.into())
+/// }
+///
+/// # test().unwrap();
+/// ```
+///
+/// In the following example we first allocate memory for a ref-counted `Example` but we don't
+/// initialise it on allocation. We do initialise it later with a call to [`UniqueArc::write`],
+/// followed by a conversion to `Arc<Example>`. This is particularly useful when allocation happens
+/// in one context (e.g., sleepable) and initialisation in another (e.g., atomic):
+///
+/// ```
+/// use kernel::sync::{Arc, UniqueArc};
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// fn test() -> Result<Arc<Example>> {
+/// let x = UniqueArc::try_new_uninit()?;
+/// Ok(x.write(Example { a: 10, b: 20 }).into())
+/// }
+///
+/// # test().unwrap();
+/// ```
+///
+/// In the last example below, the caller gets a pinned instance of `Example` while converting to
+/// `Arc<Example>`; this is useful in scenarios where one needs a pinned reference during
+/// initialisation, for example, when initialising fields that are wrapped in locks.
+///
+/// ```
+/// use kernel::sync::{Arc, UniqueArc};
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// fn test() -> Result<Arc<Example>> {
+/// let mut pinned = Pin::from(UniqueArc::try_new(Example { a: 10, b: 20 })?);
+/// // We can modify `pinned` because it is `Unpin`.
+/// pinned.as_mut().a += 1;
+/// Ok(pinned.into())
+/// }
+///
+/// # test().unwrap();
+/// ```
+pub struct UniqueArc<T: ?Sized> {
+ inner: Arc<T>,
+}
+
+impl<T> UniqueArc<T> {
+ /// Tries to allocate a new [`UniqueArc`] instance.
+ pub fn try_new(value: T) -> Result<Self> {
+ Ok(Self {
+ // INVARIANT: The newly-created object has a ref-count of 1.
+ inner: Arc::try_new(value)?,
+ })
+ }
+
+ /// Tries to allocate a new [`UniqueArc`] instance whose contents are not initialised yet.
+ pub fn try_new_uninit() -> Result<UniqueArc<MaybeUninit<T>>> {
+ Ok(UniqueArc::<MaybeUninit<T>> {
+ // INVARIANT: The newly-created object has a ref-count of 1.
+ inner: Arc::try_new(MaybeUninit::uninit())?,
+ })
+ }
+}
+
+impl<T> UniqueArc<MaybeUninit<T>> {
+ /// Converts a `UniqueArc<MaybeUninit<T>>` into a `UniqueArc<T>` by writing a value into it.
+ pub fn write(mut self, value: T) -> UniqueArc<T> {
+ self.deref_mut().write(value);
+ unsafe { self.assume_init() }
+ }
+
+ /// Returns a UniqueArc<T>, assuming the MaybeUninit<T> has already been initialized.
+ ///
+ /// # Safety
+ /// The contents of the UniqueArc must have already been fully initialized.
+ pub unsafe fn assume_init(self) -> UniqueArc<T> {
+ let inner = ManuallyDrop::new(self).inner.ptr;
+ UniqueArc {
+ // SAFETY: The new `Arc` is taking over `ptr` from `self.inner` (which won't be
+ // dropped). The types are compatible because `MaybeUninit<T>` is compatible with `T`.
+ inner: unsafe { Arc::from_inner(inner.cast()) },
+ }
+ }
+}
+
+impl<T: ?Sized> From<UniqueArc<T>> for Pin<UniqueArc<T>> {
+ fn from(obj: UniqueArc<T>) -> Self {
+ // SAFETY: It is not possible to move/replace `T` inside a `Pin<UniqueArc<T>>` (unless `T`
+ // is `Unpin`), so it is ok to convert it to `Pin<UniqueArc<T>>`.
+ unsafe { Pin::new_unchecked(obj) }
+ }
+}
+
+impl<T: ?Sized> Deref for UniqueArc<T> {
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ self.inner.deref()
+ }
+}
+
+impl<T: ?Sized> DerefMut for UniqueArc<T> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ // SAFETY: By the `Arc` type invariant, there is necessarily a reference to the object, so
+ // it is safe to dereference it. Additionally, we know there is only one reference when
+ // it's inside a `UniqueArc`, so it is safe to get a mutable reference.
+ unsafe { &mut self.inner.ptr.as_mut().data }
+ }
+}
+
+impl<T: fmt::Display + ?Sized> fmt::Display for UniqueArc<T> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(self.deref(), f)
+ }
+}
+
+impl<T: fmt::Display + ?Sized> fmt::Display for Arc<T> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Display::fmt(self.deref(), f)
+ }
+}
+
+impl<T: fmt::Debug + ?Sized> fmt::Debug for UniqueArc<T> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(self.deref(), f)
+ }
+}
+
+impl<T: fmt::Debug + ?Sized> fmt::Debug for Arc<T> {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ fmt::Debug::fmt(self.deref(), f)
+ }
+}
diff --git a/rust/kernel/sync/condvar.rs b/rust/kernel/sync/condvar.rs
new file mode 100644
index 000000000000..4610f0bda650
--- /dev/null
+++ b/rust/kernel/sync/condvar.rs
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! A condition variable.
+//!
+//! This module allows Rust code to use the kernel's [`struct wait_queue_head`] as a condition
+//! variable.
+
+use super::{Guard, Lock, LockClassKey, LockInfo, NeedsLockClass};
+use crate::{bindings, str::CStr, types::Opaque};
+use core::{marker::PhantomPinned, pin::Pin};
+
+/// Safely initialises a [`CondVar`] with the given name, generating a new lock class.
+#[macro_export]
+macro_rules! condvar_init {
+ ($condvar:expr, $name:literal) => {
+ $crate::init_with_lockdep!($condvar, $name)
+ };
+}
+
+// TODO: `bindgen` is not generating this constant. Figure out why.
+const POLLFREE: u32 = 0x4000;
+
+/// Exposes the kernel's [`struct wait_queue_head`] as a condition variable. It allows the caller to
+/// atomically release the given lock and go to sleep. It reacquires the lock when it wakes up. And
+/// it wakes up when notified by another thread (via [`CondVar::notify_one`] or
+/// [`CondVar::notify_all`]) or because the thread received a signal.
+///
+/// [`struct wait_queue_head`]: ../../../include/linux/wait.h
+pub struct CondVar {
+ pub(crate) wait_list: Opaque<bindings::wait_queue_head>,
+
+ /// A condvar needs to be pinned because it contains a [`struct list_head`] that is
+ /// self-referential, so it cannot be safely moved once it is initialised.
+ _pin: PhantomPinned,
+}
+
+// SAFETY: `CondVar` only uses a `struct wait_queue_head`, which is safe to use on any thread.
+#[allow(clippy::non_send_fields_in_send_ty)]
+unsafe impl Send for CondVar {}
+
+// SAFETY: `CondVar` only uses a `struct wait_queue_head`, which is safe to use on multiple threads
+// concurrently.
+unsafe impl Sync for CondVar {}
+
+impl CondVar {
+ /// Constructs a new conditional variable.
+ ///
+ /// # Safety
+ ///
+ /// The caller must call `CondVar::init` before using the conditional variable.
+ pub const unsafe fn new() -> Self {
+ Self {
+ wait_list: Opaque::uninit(),
+ _pin: PhantomPinned,
+ }
+ }
+
+ /// Atomically releases the given lock (whose ownership is proven by the guard) and puts the
+ /// thread to sleep. It wakes up when notified by [`CondVar::notify_one`] or
+ /// [`CondVar::notify_all`], or when the thread receives a signal.
+ ///
+ /// Returns whether there is a signal pending.
+ #[must_use = "wait returns if a signal is pending, so the caller must check the return value"]
+ pub fn wait<L: Lock<I>, I: LockInfo>(&self, guard: &mut Guard<'_, L, I>) -> bool {
+ let lock = guard.lock;
+ let wait = Opaque::<bindings::wait_queue_entry>::uninit();
+
+ // SAFETY: `wait` points to valid memory.
+ unsafe { bindings::init_wait(wait.get()) };
+
+ // SAFETY: Both `wait` and `wait_list` point to valid memory.
+ unsafe {
+ bindings::prepare_to_wait_exclusive(
+ self.wait_list.get(),
+ wait.get(),
+ bindings::TASK_INTERRUPTIBLE as _,
+ )
+ };
+
+ // SAFETY: The guard is evidence that the caller owns the lock.
+ unsafe { lock.unlock(&mut guard.context) };
+
+ // SAFETY: No arguments, switches to another thread.
+ unsafe { bindings::schedule() };
+
+ guard.context = lock.lock_noguard();
+
+ // SAFETY: Both `wait` and `wait_list` point to valid memory.
+ unsafe { bindings::finish_wait(self.wait_list.get(), wait.get()) };
+
+ // Replace when kernel::task is upstream
+ //Task::current().signal_pending()
+ unsafe { bindings::signal_pending(bindings::get_current()) != 0 }
+ }
+
+ /// Calls the kernel function to notify the appropriate number of threads with the given flags.
+ fn notify(&self, count: i32, flags: u32) {
+ // SAFETY: `wait_list` points to valid memory.
+ unsafe {
+ bindings::__wake_up(
+ self.wait_list.get(),
+ bindings::TASK_NORMAL,
+ count,
+ flags as _,
+ )
+ };
+ }
+
+ /// Wakes a single waiter up, if any. This is not 'sticky' in the sense that if no thread is
+ /// waiting, the notification is lost completely (as opposed to automatically waking up the
+ /// next waiter).
+ pub fn notify_one(&self) {
+ self.notify(1, 0);
+ }
+
+ /// Wakes all waiters up, if any. This is not 'sticky' in the sense that if no thread is
+ /// waiting, the notification is lost completely (as opposed to automatically waking up the
+ /// next waiter).
+ pub fn notify_all(&self) {
+ self.notify(0, 0);
+ }
+
+ /// Wakes all waiters up. If they were added by `epoll`, they are also removed from the list of
+ /// waiters. This is useful when cleaning up a condition variable that may be waited on by
+ /// threads that use `epoll`.
+ pub fn free_waiters(&self) {
+ self.notify(1, bindings::POLLHUP | POLLFREE);
+ }
+}
+
+impl NeedsLockClass for CondVar {
+ fn init(
+ self: Pin<&mut Self>,
+ name: &'static CStr,
+ key: &'static LockClassKey,
+ _: &'static LockClassKey,
+ ) {
+ unsafe {
+ bindings::__init_waitqueue_head(self.wait_list.get(), name.as_char_ptr(), key.get())
+ };
+ }
+}
diff --git a/rust/kernel/sync/guard.rs b/rust/kernel/sync/guard.rs
new file mode 100644
index 000000000000..1546e2c7dfeb
--- /dev/null
+++ b/rust/kernel/sync/guard.rs
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! A generic lock guard and trait.
+//!
+//! This module contains a lock guard that can be used with any locking primitive that implements
+//! the ([`Lock`]) trait. It also contains the definition of the trait, which can be leveraged by
+//! other constructs to work on generic locking primitives.
+
+use super::{LockClassKey, NeedsLockClass};
+use crate::{
+ str::CStr,
+ types::{Bool, False, True},
+};
+use core::pin::Pin;
+
+/// Allows mutual exclusion primitives that implement the [`Lock`] trait to automatically unlock
+/// when a guard goes out of scope. It also provides a safe and convenient way to access the data
+/// protected by the lock.
+#[must_use = "the lock unlocks immediately when the guard is unused"]
+pub struct Guard<'a, L: Lock<I> + ?Sized, I: LockInfo = WriteLock> {
+ pub(crate) lock: &'a L,
+ pub(crate) context: L::GuardContext,
+}
+
+// SAFETY: `Guard` is sync when the data protected by the lock is also sync. This is more
+// conservative than the default compiler implementation; more details can be found on
+// <https://github.com/rust-lang/rust/issues/41622> -- it refers to `MutexGuard` from the standard
+// library.
+unsafe impl<L, I> Sync for Guard<'_, L, I>
+where
+ L: Lock<I> + ?Sized,
+ L::Inner: Sync,
+ I: LockInfo,
+{
+}
+
+impl<L: Lock<I> + ?Sized, I: LockInfo> core::ops::Deref for Guard<'_, L, I> {
+ type Target = L::Inner;
+
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: The caller owns the lock, so it is safe to deref the protected data.
+ unsafe { &*self.lock.locked_data().get() }
+ }
+}
+
+impl<L: Lock<I> + ?Sized, I: LockInfo<Writable = True>> core::ops::DerefMut for Guard<'_, L, I> {
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ // SAFETY: The caller owns the lock, so it is safe to deref the protected data.
+ unsafe { &mut *self.lock.locked_data().get() }
+ }
+}
+
+impl<L: Lock<I> + ?Sized, I: LockInfo> Drop for Guard<'_, L, I> {
+ fn drop(&mut self) {
+ // SAFETY: The caller owns the lock, so it is safe to unlock it.
+ unsafe { self.lock.unlock(&mut self.context) };
+ }
+}
+
+impl<'a, L: Lock<I> + ?Sized, I: LockInfo> Guard<'a, L, I> {
+ /// Constructs a new immutable lock guard.
+ ///
+ /// # Safety
+ ///
+ /// The caller must ensure that it owns the lock.
+ pub(crate) unsafe fn new(lock: &'a L, context: L::GuardContext) -> Self {
+ Self { lock, context }
+ }
+}
+
+/// Specifies properties of a lock.
+pub trait LockInfo {
+ /// Determines if the data protected by a lock is writable.
+ type Writable: Bool;
+}
+
+/// A marker for locks that only allow reading.
+pub struct ReadLock;
+impl LockInfo for ReadLock {
+ type Writable = False;
+}
+
+/// A marker for locks that allow reading and writing.
+pub struct WriteLock;
+impl LockInfo for WriteLock {
+ type Writable = True;
+}
+
+/// A generic mutual exclusion primitive.
+///
+/// [`Guard`] is written such that any mutual exclusion primitive that can implement this trait can
+/// also benefit from having an automatic way to unlock itself.
+///
+/// # Safety
+///
+/// - Implementers of this trait with the [`WriteLock`] marker must ensure that only one thread/CPU
+/// may access the protected data once the lock is held, that is, between calls to `lock_noguard`
+/// and `unlock`.
+/// - Implementers of all other markers must ensure that a mutable reference to the protected data
+/// is not active in any thread/CPU because at least one shared reference is active between calls
+/// to `lock_noguard` and `unlock`.
+pub unsafe trait Lock<I: LockInfo = WriteLock> {
+ /// The type of the data protected by the lock.
+ type Inner: ?Sized;
+
+ /// The type of context, if any, that needs to be stored in the guard.
+ type GuardContext;
+
+ /// Acquires the lock, making the caller its owner.
+ #[must_use]
+ fn lock_noguard(&self) -> Self::GuardContext;
+
+ /// Reacquires the lock, making the caller its owner.
+ ///
+ /// The guard context before the last unlock is passed in.
+ ///
+ /// Locks that don't require this state on relock can simply use the default implementation
+ /// that calls [`Lock::lock_noguard`].
+ fn relock(&self, ctx: &mut Self::GuardContext) {
+ *ctx = self.lock_noguard();
+ }
+
+ /// Releases the lock, giving up ownership of the lock.
+ ///
+ /// # Safety
+ ///
+ /// It must only be called by the current owner of the lock.
+ unsafe fn unlock(&self, context: &mut Self::GuardContext);
+
+ /// Returns the data protected by the lock.
+ fn locked_data(&self) -> &core::cell::UnsafeCell<Self::Inner>;
+}
+
+/// A creator of instances of a mutual exclusion (lock) primitive.
+pub trait LockFactory {
+ /// The parametrised type of the mutual exclusion primitive that can be created by this factory.
+ type LockedType<T>;
+
+ /// Constructs a new instance of the mutual exclusion primitive.
+ ///
+ /// # Safety
+ ///
+ /// The caller must call [`LockIniter::init_lock`] before using the lock.
+ unsafe fn new_lock<T>(data: T) -> Self::LockedType<T>;
+}
+
+/// A lock that can be initialised with a single lock class key.
+pub trait LockIniter {
+ /// Initialises the lock instance so that it can be safely used.
+ fn init_lock(self: Pin<&mut Self>, name: &'static CStr, key: &'static LockClassKey);
+}
+
+impl<L: LockIniter> NeedsLockClass for L {
+ fn init(
+ self: Pin<&mut Self>,
+ name: &'static CStr,
+ key: &'static LockClassKey,
+ _: &'static LockClassKey,
+ ) {
+ self.init_lock(name, key);
+ }
+}
diff --git a/rust/kernel/sync/mutex.rs b/rust/kernel/sync/mutex.rs
new file mode 100644
index 000000000000..c40396c15453
--- /dev/null
+++ b/rust/kernel/sync/mutex.rs
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! A kernel mutex.
+//!
+//! This module allows Rust code to use the kernel's [`struct mutex`].
+
+use super::{Guard, Lock, LockClassKey, LockFactory, LockIniter, WriteLock};
+use crate::{bindings, str::CStr, types::Opaque};
+use core::{cell::UnsafeCell, marker::PhantomPinned, pin::Pin};
+
+/// Safely initialises a [`Mutex`] with the given name, generating a new lock class.
+#[macro_export]
+macro_rules! mutex_init {
+ ($mutex:expr, $name:literal) => {
+ $crate::init_with_lockdep!($mutex, $name)
+ };
+}
+
+/// Exposes the kernel's [`struct mutex`]. When multiple threads attempt to lock the same mutex,
+/// only one at a time is allowed to progress, the others will block (sleep) until the mutex is
+/// unlocked, at which point another thread will be allowed to wake up and make progress.
+///
+/// A [`Mutex`] must first be initialised with a call to [`Mutex::init_lock`] before it can be
+/// used. The [`mutex_init`] macro is provided to automatically assign a new lock class to a mutex
+/// instance.
+///
+/// Since it may block, [`Mutex`] needs to be used with care in atomic contexts.
+///
+/// [`struct mutex`]: ../../../include/linux/mutex.h
+pub struct Mutex<T: ?Sized> {
+ /// The kernel `struct mutex` object.
+ mutex: Opaque<bindings::mutex>,
+
+ /// A mutex needs to be pinned because it contains a [`struct list_head`] that is
+ /// self-referential, so it cannot be safely moved once it is initialised.
+ _pin: PhantomPinned,
+
+ /// The data protected by the mutex.
+ data: UnsafeCell<T>,
+}
+
+// SAFETY: `Mutex` can be transferred across thread boundaries iff the data it protects can.
+#[allow(clippy::non_send_fields_in_send_ty)]
+unsafe impl<T: ?Sized + Send> Send for Mutex<T> {}
+
+// SAFETY: `Mutex` serialises the interior mutability it provides, so it is `Sync` as long as the
+// data it protects is `Send`.
+unsafe impl<T: ?Sized + Send> Sync for Mutex<T> {}
+
+impl<T> Mutex<T> {
+ /// Constructs a new mutex.
+ ///
+ /// # Safety
+ ///
+ /// The caller must call [`Mutex::init_lock`] before using the mutex.
+ pub const unsafe fn new(t: T) -> Self {
+ Self {
+ mutex: Opaque::uninit(),
+ data: UnsafeCell::new(t),
+ _pin: PhantomPinned,
+ }
+ }
+}
+
+impl<T: ?Sized> Mutex<T> {
+ /// Locks the mutex and gives the caller access to the data protected by it. Only one thread at
+ /// a time is allowed to access the protected data.
+ pub fn lock(&self) -> Guard<'_, Self> {
+ let ctx = self.lock_noguard();
+ // SAFETY: The mutex was just acquired.
+ unsafe { Guard::new(self, ctx) }
+ }
+}
+
+impl<T> LockFactory for Mutex<T> {
+ type LockedType<U> = Mutex<U>;
+
+ unsafe fn new_lock<U>(data: U) -> Mutex<U> {
+ // SAFETY: The safety requirements of `new_lock` also require that `init_lock` be called.
+ unsafe { Mutex::new(data) }
+ }
+}
+
+impl<T> LockIniter for Mutex<T> {
+ fn init_lock(self: Pin<&mut Self>, name: &'static CStr, key: &'static LockClassKey) {
+ unsafe { bindings::__mutex_init(self.mutex.get(), name.as_char_ptr(), key.get()) };
+ }
+}
+
+pub struct EmptyGuardContext;
+
+// SAFETY: The underlying kernel `struct mutex` object ensures mutual exclusion.
+unsafe impl<T: ?Sized> Lock for Mutex<T> {
+ type Inner = T;
+ type GuardContext = EmptyGuardContext;
+
+ fn lock_noguard(&self) -> EmptyGuardContext {
+ // SAFETY: `mutex` points to valid memory.
+ unsafe { bindings::mutex_lock(self.mutex.get()) };
+ EmptyGuardContext
+ }
+
+ unsafe fn unlock(&self, _: &mut EmptyGuardContext) {
+ // SAFETY: The safety requirements of the function ensure that the mutex is owned by the
+ // caller.
+ unsafe { bindings::mutex_unlock(self.mutex.get()) };
+ }
+
+ fn locked_data(&self) -> &UnsafeCell<T> {
+ &self.data
+ }
+}
+
+/// A revocable mutex.
+///
+/// That is, a mutex to which access can be revoked at runtime. It is a specialisation of the more
+/// generic [`super::revocable::Revocable`].
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::sync::RevocableMutex;
+/// # use kernel::revocable_init;
+/// # use core::pin::Pin;
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// fn read_sum(v: &RevocableMutex<Example>) -> Option<u32> {
+/// let guard = v.try_write()?;
+/// Some(guard.a + guard.b)
+/// }
+///
+/// // SAFETY: We call `revocable_init` immediately below.
+/// let mut v = unsafe { RevocableMutex::new(Example { a: 10, b: 20 }) };
+/// // SAFETY: We never move out of `v`.
+/// let pinned = unsafe { Pin::new_unchecked(&mut v) };
+/// revocable_init!(pinned, "example::v");
+/// assert_eq!(read_sum(&v), Some(30));
+/// v.revoke();
+/// assert_eq!(read_sum(&v), None);
+/// ```
+pub type RevocableMutex<T> = super::revocable::Revocable<Mutex<()>, T>;
+
+/// A guard for a revocable mutex.
+pub type RevocableMutexGuard<'a, T, I = WriteLock> =
+ super::revocable::RevocableGuard<'a, Mutex<()>, T, I>;
diff --git a/rust/kernel/sync/rcu.rs b/rust/kernel/sync/rcu.rs
new file mode 100644
index 000000000000..1a1c8ea49359
--- /dev/null
+++ b/rust/kernel/sync/rcu.rs
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! RCU support.
+//!
+//! C header: [`include/linux/rcupdate.h`](../../../../include/linux/rcupdate.h)
+
+use crate::bindings;
+use core::marker::PhantomData;
+
+/// Evidence that the RCU read side lock is held on the current thread/CPU.
+///
+/// The type is explicitly not `Send` because this property is per-thread/CPU.
+///
+/// # Invariants
+///
+/// The RCU read side lock is actually held while instances of this guard exist.
+pub struct Guard {
+ _not_send: PhantomData<*mut ()>,
+}
+
+impl Guard {
+ /// Acquires the RCU read side lock and returns a guard.
+ pub fn new() -> Self {
+ // SAFETY: An FFI call with no additional requirements.
+ unsafe { bindings::rcu_read_lock() };
+ // INVARIANT: The RCU read side lock was just acquired above.
+ Self {
+ _not_send: PhantomData,
+ }
+ }
+
+ /// Explicitly releases the RCU read side lock.
+ pub fn unlock(self) {}
+}
+
+impl Default for Guard {
+ fn default() -> Self {
+ Self::new()
+ }
+}
+
+impl Drop for Guard {
+ fn drop(&mut self) {
+ // SAFETY: By the type invariants, the rcu read side is locked, so it is ok to unlock it.
+ unsafe { bindings::rcu_read_unlock() };
+ }
+}
+
+/// Acquires the RCU read side lock.
+pub fn read_lock() -> Guard {
+ Guard::new()
+}
diff --git a/rust/kernel/sync/revocable.rs b/rust/kernel/sync/revocable.rs
new file mode 100644
index 000000000000..db716182a2a8
--- /dev/null
+++ b/rust/kernel/sync/revocable.rs
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Synchronisation primitives where access to their contents can be revoked at runtime.
+
+use crate::{
+ str::CStr,
+ sync::{Guard, Lock, LockClassKey, LockFactory, LockInfo, NeedsLockClass, ReadLock, WriteLock},
+ types::True,
+};
+use core::{
+ mem::MaybeUninit,
+ ops::{Deref, DerefMut},
+ pin::Pin,
+};
+
+/// The state within the revocable synchronisation primitive.
+///
+/// We don't use simply `Option<T>` because we need to drop in-place because the contents are
+/// implicitly pinned.
+///
+/// # Invariants
+///
+/// The `is_available` field determines if `data` is initialised.
+pub struct Inner<T> {
+ is_available: bool,
+ data: MaybeUninit<T>,
+}
+
+impl<T> Inner<T> {
+ fn new(data: T) -> Self {
+ // INVARIANT: `data` is initialised and `is_available` is `true`, so the state matches.
+ Self {
+ is_available: true,
+ data: MaybeUninit::new(data),
+ }
+ }
+
+ fn drop_in_place(&mut self) {
+ if !self.is_available {
+ // Already dropped.
+ return;
+ }
+
+ // INVARIANT: `data` is being dropped and `is_available` is set to `false`, so the state
+ // matches.
+ self.is_available = false;
+
+ // SAFETY: By the type invariants, `data` is valid because `is_available` was true.
+ unsafe { self.data.assume_init_drop() };
+ }
+}
+
+impl<T> Drop for Inner<T> {
+ fn drop(&mut self) {
+ self.drop_in_place();
+ }
+}
+
+/// Revocable synchronisation primitive.
+///
+/// That is, it wraps synchronisation primitives so that access to their contents can be revoked at
+/// runtime, rendering them inacessible.
+///
+/// Once access is revoked and all concurrent users complete (i.e., all existing instances of
+/// [`RevocableGuard`] are dropped), the wrapped object is also dropped.
+///
+/// For better ergonomics, we advise the use of specialisations of this struct, for example,
+/// [`super::RevocableMutex`] and [`super::RevocableRwSemaphore`]. Callers that do not need to
+/// sleep while holding on to a guard should use [`crate::revocable::Revocable`] instead, which is
+/// more efficient as it uses RCU to keep objects alive.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::sync::{Mutex, Revocable};
+/// # use kernel::revocable_init;
+/// # use core::pin::Pin;
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// fn add_two(v: &Revocable<Mutex<()>, Example>) -> Option<u32> {
+/// let mut guard = v.try_write()?;
+/// guard.a += 2;
+/// guard.b += 2;
+/// Some(guard.a + guard.b)
+/// }
+///
+/// // SAFETY: We call `revocable_init` immediately below.
+/// let mut v = unsafe { Revocable::<Mutex<()>, Example>::new(Example { a: 10, b: 20 }) };
+/// // SAFETY: We never move out of `v`.
+/// let pinned = unsafe { Pin::new_unchecked(&mut v) };
+/// revocable_init!(pinned, "example::v");
+/// assert_eq!(add_two(&v), Some(34));
+/// v.revoke();
+/// assert_eq!(add_two(&v), None);
+/// ```
+pub struct Revocable<F: LockFactory, T> {
+ inner: F::LockedType<Inner<T>>,
+}
+
+/// Safely initialises a [`Revocable`] instance with the given name, generating a new lock class.
+#[macro_export]
+macro_rules! revocable_init {
+ ($mutex:expr, $name:literal) => {
+ $crate::init_with_lockdep!($mutex, $name)
+ };
+}
+
+impl<F: LockFactory, T> Revocable<F, T> {
+ /// Creates a new revocable instance of the given lock.
+ ///
+ /// # Safety
+ ///
+ /// The caller must call [`Revocable::init`] before using the revocable synch primitive.
+ pub unsafe fn new(data: T) -> Self {
+ Self {
+ // SAFETY: The safety requirements of this function require that `Revocable::init`
+ // be called before the returned object can be used. Lock initialisation is called
+ // from `Revocable::init`.
+ inner: unsafe { F::new_lock(Inner::new(data)) },
+ }
+ }
+}
+
+impl<F: LockFactory, T> NeedsLockClass for Revocable<F, T>
+where
+ F::LockedType<Inner<T>>: NeedsLockClass,
+{
+ fn init(
+ self: Pin<&mut Self>,
+ name: &'static CStr,
+ key1: &'static LockClassKey,
+ key2: &'static LockClassKey,
+ ) {
+ // SAFETY: `inner` is pinned when `self` is.
+ let inner = unsafe { self.map_unchecked_mut(|r| &mut r.inner) };
+ inner.init(name, key1, key2);
+ }
+}
+
+impl<F: LockFactory, T> Revocable<F, T>
+where
+ F::LockedType<Inner<T>>: Lock<Inner = Inner<T>>,
+{
+ /// Revokes access to and drops the wrapped object.
+ ///
+ /// Revocation and dropping happen after ongoing accessors complete.
+ pub fn revoke(&self) {
+ self.lock().drop_in_place();
+ }
+
+ /// Tries to lock the \[revocable\] wrapped object in write (exclusive) mode.
+ ///
+ /// Returns `None` if the object has been revoked and is therefore no longer accessible.
+ ///
+ /// Returns a guard that gives access to the object otherwise; the object is guaranteed to
+ /// remain accessible while the guard is alive. Callers are allowed to sleep while holding on
+ /// to the returned guard.
+ pub fn try_write(&self) -> Option<RevocableGuard<'_, F, T, WriteLock>> {
+ let inner = self.lock();
+ if !inner.is_available {
+ return None;
+ }
+ Some(RevocableGuard::new(inner))
+ }
+
+ fn lock(&self) -> Guard<'_, F::LockedType<Inner<T>>> {
+ let ctx = self.inner.lock_noguard();
+ // SAFETY: The lock was acquired in the call above.
+ unsafe { Guard::new(&self.inner, ctx) }
+ }
+}
+
+impl<F: LockFactory, T> Revocable<F, T>
+where
+ F::LockedType<Inner<T>>: Lock<ReadLock, Inner = Inner<T>>,
+{
+ /// Tries to lock the \[revocable\] wrapped object in read (shared) mode.
+ ///
+ /// Returns `None` if the object has been revoked and is therefore no longer accessible.
+ ///
+ /// Returns a guard that gives access to the object otherwise; the object is guaranteed to
+ /// remain accessible while the guard is alive. Callers are allowed to sleep while holding on
+ /// to the returned guard.
+ pub fn try_read(&self) -> Option<RevocableGuard<'_, F, T, ReadLock>> {
+ let ctx = self.inner.lock_noguard();
+ // SAFETY: The lock was acquired in the call above.
+ let inner = unsafe { Guard::new(&self.inner, ctx) };
+ if !inner.is_available {
+ return None;
+ }
+ Some(RevocableGuard::new(inner))
+ }
+}
+
+/// A guard that allows access to a revocable object and keeps it alive.
+pub struct RevocableGuard<'a, F: LockFactory, T, I: LockInfo>
+where
+ F::LockedType<Inner<T>>: Lock<I, Inner = Inner<T>>,
+{
+ guard: Guard<'a, F::LockedType<Inner<T>>, I>,
+}
+
+impl<'a, F: LockFactory, T, I: LockInfo> RevocableGuard<'a, F, T, I>
+where
+ F::LockedType<Inner<T>>: Lock<I, Inner = Inner<T>>,
+{
+ fn new(guard: Guard<'a, F::LockedType<Inner<T>>, I>) -> Self {
+ Self { guard }
+ }
+}
+
+impl<F: LockFactory, T, I: LockInfo<Writable = True>> RevocableGuard<'_, F, T, I>
+where
+ F::LockedType<Inner<T>>: Lock<I, Inner = Inner<T>>,
+{
+ /// Returns a pinned mutable reference to the wrapped object.
+ pub fn as_pinned_mut(&mut self) -> Pin<&mut T> {
+ // SAFETY: Revocable mutexes must be pinned, so we choose to always project the data as
+ // pinned as well (i.e., we guarantee we never move it).
+ unsafe { Pin::new_unchecked(&mut *self) }
+ }
+}
+
+impl<F: LockFactory, T, I: LockInfo> Deref for RevocableGuard<'_, F, T, I>
+where
+ F::LockedType<Inner<T>>: Lock<I, Inner = Inner<T>>,
+{
+ type Target = T;
+
+ fn deref(&self) -> &Self::Target {
+ unsafe { &*self.guard.data.as_ptr() }
+ }
+}
+
+impl<F: LockFactory, T, I: LockInfo<Writable = True>> DerefMut for RevocableGuard<'_, F, T, I>
+where
+ F::LockedType<Inner<T>>: Lock<I, Inner = Inner<T>>,
+{
+ fn deref_mut(&mut self) -> &mut Self::Target {
+ unsafe { &mut *self.guard.data.as_mut_ptr() }
+ }
+}
diff --git a/rust/kernel/sync/smutex.rs b/rust/kernel/sync/smutex.rs
new file mode 100644
index 000000000000..6cf92260ace6
--- /dev/null
+++ b/rust/kernel/sync/smutex.rs
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! A simple mutex implementation.
+//!
+//! Differently from [`super::Mutex`], this implementation does not require pinning, so the
+//! ergonomics are much improved, though the implementation is not as feature-rich as the C-based
+//! one. The main advantage is that it doesn't impose unsafe blocks on callers.
+//!
+//! The mutex is made up of 2 words in addition to the data it protects. The first one is accessed
+//! concurrently by threads trying to acquire and release the mutex, it contains a "stack" of
+//! waiters and a "locked" bit; the second one is only accessible by the thread holding the mutex,
+//! it contains a queue of waiters. Waiters are moved from the stack to the queue when the mutex is
+//! next unlocked while the stack is non-empty and the queue is empty. A single waiter is popped
+//! from the wait queue when the owner of the mutex unlocks it.
+//!
+//! The initial state of the mutex is `<locked=0, stack=[], queue=[]>`, meaning that it isn't
+//! locked and both the waiter stack and queue are empty.
+//!
+//! A lock operation transitions the mutex to state `<locked=1, stack=[], queue=[]>`.
+//!
+//! An unlock operation transitions the mutex back to the initial state, however, an attempt to
+//! lock the mutex while it's already locked results in a waiter being created (on the stack) and
+//! pushed onto the stack, so the state is `<locked=1, stack=[W1], queue=[]>`.
+//!
+//! Another thread trying to lock the mutex results in another waiter being pushed onto the stack,
+//! so the state becomes `<locked=1, stack=[W2, W1], queue=[]>`.
+//!
+//! In such states (queue is empty but stack is non-empty), the unlock operation is performed in
+//! three steps:
+//! 1. The stack is popped (but the mutex remains locked), so the state is:
+//! `<locked=1, stack=[], queue=[]>`
+//! 2. The stack is turned into a queue by reversing it, so the state is:
+//! `<locked=1, stack=[], queue=[W1, W2]>
+//! 3. Finally, the lock is released, and the first waiter is awakened, so the state is:
+//! `<locked=0, stack=[], queue=[W2]>`
+//!
+//! The mutex remains accessible to any threads attempting to lock it in any of the intermediate
+//! states above. For example, while it is locked, other threads may add waiters to the stack
+//! (which is ok because we want to release the ones on the queue first); another example is that
+//! another thread may acquire the mutex before waiter W1 in the example above, this makes the
+//! mutex unfair but this is desirable because the thread is running already and may in fact
+//! release the lock before W1 manages to get scheduled -- it also mitigates the lock convoy
+//! problem when the releasing thread wants to immediately acquire the lock again: it will be
+//! allowed to do so (as long as W1 doesn't get to it first).
+//!
+//! When the waiter queue is non-empty, unlocking the mutex always results in the first waiter being
+//! popped form the queue and awakened.
+
+use super::{mutex::EmptyGuardContext, Guard, Lock, LockClassKey, LockFactory, LockIniter};
+use crate::{bindings, str::CStr, types::Opaque};
+use core::sync::atomic::{AtomicUsize, Ordering};
+use core::{cell::UnsafeCell, pin::Pin};
+
+/// The value that is OR'd into the [`Mutex::waiter_stack`] when the mutex is locked.
+const LOCKED: usize = 1;
+
+/// A simple mutex.
+///
+/// This is mutual-exclusion primitive. It guarantees that only one thread at a time may access the
+/// data it protects. When multiple threads attempt to lock the same mutex, only one at a time is
+/// allowed to progress, the others will block (sleep) until the mutex is unlocked, at which point
+/// another thread will be allowed to wake up and make progress.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::{Result, sync::Arc, sync::smutex::Mutex};
+///
+/// struct Example {
+/// a: u32,
+/// b: u32,
+/// }
+///
+/// static EXAMPLE: Mutex<Example> = Mutex::new(Example { a: 10, b: 20 });
+///
+/// fn inc_a(example: &Mutex<Example>) {
+/// let mut guard = example.lock();
+/// guard.a += 1;
+/// }
+///
+/// fn sum(example: &Mutex<Example>) -> u32 {
+/// let guard = example.lock();
+/// guard.a + guard.b
+/// }
+///
+/// fn try_new(a: u32, b: u32) -> Result<Arc<Mutex<Example>>> {
+/// Arc::try_new(Mutex::new(Example { a, b }))
+/// }
+///
+/// assert_eq!(EXAMPLE.lock().a, 10);
+/// assert_eq!(sum(&EXAMPLE), 30);
+///
+/// inc_a(&EXAMPLE);
+///
+/// assert_eq!(EXAMPLE.lock().a, 11);
+/// assert_eq!(sum(&EXAMPLE), 31);
+///
+/// # try_new(42, 43);
+/// ```
+pub struct Mutex<T: ?Sized> {
+ /// A stack of waiters.
+ ///
+ /// It is accessed atomically by threads lock/unlocking the mutex. Additionally, the
+ /// least-significant bit is used to indicate whether the mutex is locked or not.
+ waiter_stack: AtomicUsize,
+
+ /// A queue of waiters.
+ ///
+ /// This is only accessible to the holder of the mutex. When the owner of the mutex is
+ /// unlocking it, it will move waiters from the stack to the queue when the queue is empty and
+ /// the stack non-empty.
+ waiter_queue: UnsafeCell<*mut Waiter>,
+
+ /// The data protected by the mutex.
+ data: UnsafeCell<T>,
+}
+
+// SAFETY: `Mutex` can be transferred across thread boundaries iff the data it protects can.
+#[allow(clippy::non_send_fields_in_send_ty)]
+unsafe impl<T: ?Sized + Send> Send for Mutex<T> {}
+
+// SAFETY: `Mutex` serialises the interior mutability it provides, so it is `Sync` as long as the
+// data it protects is `Send`.
+unsafe impl<T: ?Sized + Send> Sync for Mutex<T> {}
+
+impl<T> Mutex<T> {
+ /// Creates a new instance of the mutex.
+ pub const fn new(data: T) -> Self {
+ Self {
+ waiter_stack: AtomicUsize::new(0),
+ waiter_queue: UnsafeCell::new(core::ptr::null_mut()),
+ data: UnsafeCell::new(data),
+ }
+ }
+}
+
+impl<T: ?Sized> Mutex<T> {
+ /// Locks the mutex and gives the caller access to the data protected by it. Only one thread at
+ /// a time is allowed to access the protected data.
+ pub fn lock(&self) -> Guard<'_, Self> {
+ let ctx = self.lock_noguard();
+ // SAFETY: The mutex was just acquired.
+ unsafe { Guard::new(self, ctx) }
+ }
+}
+
+impl<T> LockFactory for Mutex<T> {
+ type LockedType<U> = Mutex<U>;
+
+ unsafe fn new_lock<U>(data: U) -> Mutex<U> {
+ Mutex::new(data)
+ }
+}
+
+impl<T> LockIniter for Mutex<T> {
+ fn init_lock(self: Pin<&mut Self>, _name: &'static CStr, _key: &'static LockClassKey) {}
+}
+
+// SAFETY: The mutex implementation ensures mutual exclusion.
+unsafe impl<T: ?Sized> Lock for Mutex<T> {
+ type Inner = T;
+ type GuardContext = EmptyGuardContext;
+
+ fn lock_noguard(&self) -> EmptyGuardContext {
+ loop {
+ // Try the fast path: the caller owns the mutex if we manage to set the `LOCKED` bit.
+ //
+ // The `acquire` order matches with one of the `release` ones in `unlock`.
+ if self.waiter_stack.fetch_or(LOCKED, Ordering::Acquire) & LOCKED == 0 {
+ return EmptyGuardContext;
+ }
+
+ // Slow path: we'll likely need to wait, so initialise a local waiter struct.
+ let mut waiter = Waiter {
+ completion: Opaque::uninit(),
+ next: core::ptr::null_mut(),
+ };
+
+ // SAFETY: The completion object was just allocated on the stack and is valid for
+ // writes.
+ unsafe { bindings::init_completion(waiter.completion.get()) };
+
+ // Try to enqueue the waiter by pushing into onto the waiter stack. We want to do it
+ // only while the mutex is locked by another thread.
+ loop {
+ // We use relaxed here because we're just reading the value we'll CAS later (which
+ // has a stronger ordering on success).
+ let mut v = self.waiter_stack.load(Ordering::Relaxed);
+ if v & LOCKED == 0 {
+ // The mutex was released by another thread, so try to acquire it.
+ //
+ // The `acquire` order matches with one of the `release` ones in `unlock`.
+ v = self.waiter_stack.fetch_or(LOCKED, Ordering::Acquire);
+ if v & LOCKED == 0 {
+ return EmptyGuardContext;
+ }
+ }
+
+ waiter.next = (v & !LOCKED) as _;
+
+ // The `release` order matches with `acquire` in `unlock` when the stack is swapped
+ // out. We use release order here to ensure that the other thread can see our
+ // waiter fully initialised.
+ if self
+ .waiter_stack
+ .compare_exchange(
+ v,
+ (&mut waiter as *mut _ as usize) | LOCKED,
+ Ordering::Release,
+ Ordering::Relaxed,
+ )
+ .is_ok()
+ {
+ break;
+ }
+ }
+
+ // Wait for the owner to lock to wake this thread up.
+ //
+ // SAFETY: Completion object was previously initialised with `init_completion` and
+ // remains valid.
+ unsafe { bindings::wait_for_completion(waiter.completion.get()) };
+ }
+ }
+
+ unsafe fn unlock(&self, _: &mut EmptyGuardContext) {
+ // SAFETY: The caller owns the mutex, so it is safe to manipulate the local wait queue.
+ let mut waiter = unsafe { *self.waiter_queue.get() };
+ loop {
+ // If we have a non-empty local queue of waiters, pop the first one, release the mutex,
+ // and wake it up (the popped waiter).
+ if !waiter.is_null() {
+ // SAFETY: The caller owns the mutex, so it is safe to manipulate the local wait
+ // queue.
+ unsafe { *self.waiter_queue.get() = (*waiter).next };
+
+ // The `release` order matches with one of the `acquire` ones in `lock_noguard`.
+ self.waiter_stack.fetch_and(!LOCKED, Ordering::Release);
+
+ // Wake up the first waiter.
+ //
+ // SAFETY: The completion object was initialised before being added to the wait
+ // stack and is only removed above, when called completed. So it is safe for
+ // writes.
+ unsafe { bindings::complete_all((*waiter).completion.get()) };
+ return;
+ }
+
+ // Try the fast path when there are no local waiters.
+ //
+ // The `release` order matches with one of the `acquire` ones in `lock_noguard`.
+ if self
+ .waiter_stack
+ .compare_exchange(LOCKED, 0, Ordering::Release, Ordering::Relaxed)
+ .is_ok()
+ {
+ return;
+ }
+
+ // We don't have a local queue, so pull the whole stack off, reverse it, and use it as a
+ // local queue. Since we're manipulating this queue, we need to keep ownership of the
+ // mutex.
+ //
+ // The `acquire` order matches with the `release` one in `lock_noguard` where a waiter
+ // is pushed onto the stack. It ensures that we see the fully-initialised waiter.
+ let mut stack =
+ (self.waiter_stack.swap(LOCKED, Ordering::Acquire) & !LOCKED) as *mut Waiter;
+ while !stack.is_null() {
+ // SAFETY: The caller still owns the mutex, so it is safe to manipulate the
+ // elements of the wait queue, which will soon become that wait queue.
+ let next = unsafe { (*stack).next };
+
+ // SAFETY: Same as above.
+ unsafe { (*stack).next = waiter };
+
+ waiter = stack;
+ stack = next;
+ }
+ }
+ }
+
+ fn locked_data(&self) -> &UnsafeCell<T> {
+ &self.data
+ }
+}
+
+struct Waiter {
+ completion: Opaque<bindings::completion>,
+ next: *mut Waiter,
+}
diff --git a/rust/kernel/time.rs b/rust/kernel/time.rs
new file mode 100644
index 000000000000..881f45492268
--- /dev/null
+++ b/rust/kernel/time.rs
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! Timekeeping functions.
+//!
+//! C header: [`include/linux/ktime.h`](../../../../include/linux/ktime.h)
+//! C header: [`include/linux/timekeeping.h`](../../../../include/linux/timekeeping.h)
+
+use crate::bindings;
+use core::time::Duration;
+
+/// Returns the kernel time elapsed since boot, excluding time spent sleeping, as a [`Duration`].
+pub fn ktime_get() -> Duration {
+ Duration::from_nanos(unsafe { bindings::ktime_get() }.try_into().unwrap())
+}
+
+/// Returns the kernel time elapsed since boot, including time spent sleeping, as a [`Duration`].
+pub fn ktime_get_boottime() -> Duration {
+ Duration::from_nanos(
+ unsafe { bindings::ktime_get_with_offset(bindings::tk_offsets_TK_OFFS_BOOT) }
+ .try_into()
+ .unwrap(),
+ )
+}
diff --git a/rust/kernel/types.rs b/rust/kernel/types.rs
index e84e51ec9716..6c49adf161c3 100644
--- a/rust/kernel/types.rs
+++ b/rust/kernel/types.rs
@@ -2,7 +2,220 @@
//! Kernel types.
-use core::{cell::UnsafeCell, mem::MaybeUninit};
+use alloc::boxed::Box;
+use core::{
+ cell::UnsafeCell,
+ mem::MaybeUninit,
+ ops::{Deref, DerefMut},
+};
+
+/// Used to transfer ownership to and from foreign (non-Rust) languages.
+///
+/// Ownership is transferred from Rust to a foreign language by calling [`Self::into_foreign`] and
+/// later may be transferred back to Rust by calling [`Self::from_foreign`].
+///
+/// This trait is meant to be used in cases when Rust objects are stored in C objects and
+/// eventually "freed" back to Rust.
+pub trait ForeignOwnable: Sized {
+ /// Type of values borrowed between calls to [`ForeignOwnable::into_foreign`] and
+ /// [`ForeignOwnable::from_foreign`].
+ type Borrowed<'a>;
+
+ /// Converts a Rust-owned object to a foreign-owned one.
+ ///
+ /// The foreign representation is a pointer to void.
+ fn into_foreign(self) -> *const core::ffi::c_void;
+
+ /// Borrows a foreign-owned object.
+ ///
+ /// # Safety
+ ///
+ /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
+ /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
+ /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow_mut`]
+ /// for this object must have been dropped.
+ unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> Self::Borrowed<'a>;
+
+ /// Mutably borrows a foreign-owned object.
+ ///
+ /// # Safety
+ ///
+ /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
+ /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
+ /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] and
+ /// [`ForeignOwnable::borrow_mut`] for this object must have been dropped.
+ unsafe fn borrow_mut(ptr: *const core::ffi::c_void) -> ScopeGuard<Self, fn(Self)> {
+ // SAFETY: The safety requirements ensure that `ptr` came from a previous call to
+ // `into_foreign`.
+ ScopeGuard::new_with_data(unsafe { Self::from_foreign(ptr) }, |d| {
+ d.into_foreign();
+ })
+ }
+
+ /// Converts a foreign-owned object back to a Rust-owned one.
+ ///
+ /// # Safety
+ ///
+ /// `ptr` must have been returned by a previous call to [`ForeignOwnable::into_foreign`] for
+ /// which a previous matching [`ForeignOwnable::from_foreign`] hasn't been called yet.
+ /// Additionally, all instances (if any) of values returned by [`ForeignOwnable::borrow`] and
+ /// [`ForeignOwnable::borrow_mut`] for this object must have been dropped.
+ unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self;
+}
+
+impl<T: 'static> ForeignOwnable for Box<T> {
+ type Borrowed<'a> = &'a T;
+
+ fn into_foreign(self) -> *const core::ffi::c_void {
+ Box::into_raw(self) as _
+ }
+
+ unsafe fn borrow<'a>(ptr: *const core::ffi::c_void) -> &'a T {
+ // SAFETY: The safety requirements for this function ensure that the object is still alive,
+ // so it is safe to dereference the raw pointer.
+ // The safety requirements of `from_foreign` also ensure that the object remains alive for
+ // the lifetime of the returned value.
+ unsafe { &*ptr.cast() }
+ }
+
+ unsafe fn from_foreign(ptr: *const core::ffi::c_void) -> Self {
+ // SAFETY: The safety requirements of this function ensure that `ptr` comes from a previous
+ // call to `Self::into_foreign`.
+ unsafe { Box::from_raw(ptr as _) }
+ }
+}
+
+impl ForeignOwnable for () {
+ type Borrowed<'a> = ();
+
+ fn into_foreign(self) -> *const core::ffi::c_void {
+ core::ptr::NonNull::dangling().as_ptr()
+ }
+
+ unsafe fn borrow<'a>(_: *const core::ffi::c_void) -> Self::Borrowed<'a> {}
+
+ unsafe fn from_foreign(_: *const core::ffi::c_void) -> Self {}
+}
+
+/// Runs a cleanup function/closure when dropped.
+///
+/// The [`ScopeGuard::dismiss`] function prevents the cleanup function from running.
+///
+/// # Examples
+///
+/// In the example below, we have multiple exit paths and we want to log regardless of which one is
+/// taken:
+/// ```
+/// # use kernel::ScopeGuard;
+/// fn example1(arg: bool) {
+/// let _log = ScopeGuard::new(|| pr_info!("example1 completed\n"));
+///
+/// if arg {
+/// return;
+/// }
+///
+/// pr_info!("Do something...\n");
+/// }
+///
+/// # example1(false);
+/// # example1(true);
+/// ```
+///
+/// In the example below, we want to log the same message on all early exits but a different one on
+/// the main exit path:
+/// ```
+/// # use kernel::ScopeGuard;
+/// fn example2(arg: bool) {
+/// let log = ScopeGuard::new(|| pr_info!("example2 returned early\n"));
+///
+/// if arg {
+/// return;
+/// }
+///
+/// // (Other early returns...)
+///
+/// log.dismiss();
+/// pr_info!("example2 no early return\n");
+/// }
+///
+/// # example2(false);
+/// # example2(true);
+/// ```
+///
+/// In the example below, we need a mutable object (the vector) to be accessible within the log
+/// function, so we wrap it in the [`ScopeGuard`]:
+/// ```
+/// # use kernel::ScopeGuard;
+/// fn example3(arg: bool) -> Result {
+/// let mut vec =
+/// ScopeGuard::new_with_data(Vec::new(), |v| pr_info!("vec had {} elements\n", v.len()));
+///
+/// vec.try_push(10u8)?;
+/// if arg {
+/// return Ok(());
+/// }
+/// vec.try_push(20u8)?;
+/// Ok(())
+/// }
+///
+/// # assert_eq!(example3(false), Ok(()));
+/// # assert_eq!(example3(true), Ok(()));
+/// ```
+///
+/// # Invariants
+///
+/// The value stored in the struct is nearly always `Some(_)`, except between
+/// [`ScopeGuard::dismiss`] and [`ScopeGuard::drop`]: in this case, it will be `None` as the value
+/// will have been returned to the caller. Since [`ScopeGuard::dismiss`] consumes the guard,
+/// callers won't be able to use it anymore.
+pub struct ScopeGuard<T, F: FnOnce(T)>(Option<(T, F)>);
+
+impl<T, F: FnOnce(T)> ScopeGuard<T, F> {
+ /// Creates a new guarded object wrapping the given data and with the given cleanup function.
+ pub fn new_with_data(data: T, cleanup_func: F) -> Self {
+ // INVARIANT: The struct is being initialised with `Some(_)`.
+ Self(Some((data, cleanup_func)))
+ }
+
+ /// Prevents the cleanup function from running and returns the guarded data.
+ pub fn dismiss(mut self) -> T {
+ // INVARIANT: This is the exception case in the invariant; it is not visible to callers
+ // because this function consumes `self`.
+ self.0.take().unwrap().0
+ }
+}
+
+impl ScopeGuard<(), fn(())> {
+ /// Creates a new guarded object with the given cleanup function.
+ pub fn new(cleanup: impl FnOnce()) -> ScopeGuard<(), impl FnOnce(())> {
+ ScopeGuard::new_with_data((), move |_| cleanup())
+ }
+}
+
+impl<T, F: FnOnce(T)> Deref for ScopeGuard<T, F> {
+ type Target = T;
+
+ fn deref(&self) -> &T {
+ // The type invariants guarantee that `unwrap` will succeed.
+ &self.0.as_ref().unwrap().0
+ }
+}
+
+impl<T, F: FnOnce(T)> DerefMut for ScopeGuard<T, F> {
+ fn deref_mut(&mut self) -> &mut T {
+ // The type invariants guarantee that `unwrap` will succeed.
+ &mut self.0.as_mut().unwrap().0
+ }
+}
+
+impl<T, F: FnOnce(T)> Drop for ScopeGuard<T, F> {
+ fn drop(&mut self) {
+ // Run the cleanup function if one is still present.
+ if let Some((data, cleanup)) = self.0.take() {
+ cleanup(data)
+ }
+ }
+}
/// Stores an opaque value.
///
@@ -35,3 +248,84 @@ pub enum Either<L, R> {
/// Constructs an instance of [`Either`] containing a value of type `R`.
Right(R),
}
+
+/// A trait for boolean types.
+///
+/// This is meant to be used in type states to allow boolean constraints in implementation blocks.
+/// In the example below, the implementation containing `MyType::set_value` could _not_ be
+/// constrained to type states containing `Writable = true` if `Writable` were a constant instead
+/// of a type.
+///
+/// # Safety
+///
+/// No additional implementations of [`Bool`] should be provided, as [`True`] and [`False`] are
+/// already provided.
+///
+/// # Examples
+///
+/// ```
+/// # use kernel::{Bool, False, True};
+/// use core::marker::PhantomData;
+///
+/// // Type state specifies whether the type is writable.
+/// trait MyTypeState {
+/// type Writable: Bool;
+/// }
+///
+/// // In state S1, the type is writable.
+/// struct S1;
+/// impl MyTypeState for S1 {
+/// type Writable = True;
+/// }
+///
+/// // In state S2, the type is not writable.
+/// struct S2;
+/// impl MyTypeState for S2 {
+/// type Writable = False;
+/// }
+///
+/// struct MyType<T: MyTypeState> {
+/// value: u32,
+/// _p: PhantomData<T>,
+/// }
+///
+/// impl<T: MyTypeState> MyType<T> {
+/// fn new(value: u32) -> Self {
+/// Self {
+/// value,
+/// _p: PhantomData,
+/// }
+/// }
+/// }
+///
+/// // This implementation block only applies if the type state is writable.
+/// impl<T> MyType<T>
+/// where
+/// T: MyTypeState<Writable = True>,
+/// {
+/// fn set_value(&mut self, v: u32) {
+/// self.value = v;
+/// }
+/// }
+///
+/// let mut x = MyType::<S1>::new(10);
+/// let mut y = MyType::<S2>::new(20);
+///
+/// x.set_value(30);
+///
+/// // The code below fails to compile because `S2` is not writable.
+/// // y.set_value(40);
+/// ```
+pub unsafe trait Bool {}
+
+/// Represents the `true` value for types with [`Bool`] bound.
+pub struct True;
+
+// SAFETY: This is one of the only two implementations of `Bool`.
+unsafe impl Bool for True {}
+
+/// Represents the `false` value for types wth [`Bool`] bound.
+pub struct False;
+
+// SAFETY: This is one of the only two implementations of `Bool`.
+unsafe impl Bool for False {}
diff --git a/rust/kernel/user_ptr.rs b/rust/kernel/user_ptr.rs
new file mode 100644
index 000000000000..084535675c4a
--- /dev/null
+++ b/rust/kernel/user_ptr.rs
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! User pointers.
+//!
+//! C header: [`include/linux/uaccess.h`](../../../../include/linux/uaccess.h)
+
+use crate::{
+ bindings,
+ error::code::*,
+ error::Result,
+ io_buffer::{IoBufferReader, IoBufferWriter},
+};
+use alloc::vec::Vec;
+
+/// A reference to an area in userspace memory, which can be either
+/// read-only or read-write.
+///
+/// All methods on this struct are safe: invalid pointers return
+/// `EFAULT`. Concurrent access, *including data races to/from userspace
+/// memory*, is permitted, because fundamentally another userspace
+/// thread/process could always be modifying memory at the same time
+/// (in the same way that userspace Rust's [`std::io`] permits data races
+/// with the contents of files on disk). In the presence of a race, the
+/// exact byte values read/written are unspecified but the operation is
+/// well-defined. Kernelspace code should validate its copy of data
+/// after completing a read, and not expect that multiple reads of the
+/// same address will return the same value.
+///
+/// All APIs enforce the invariant that a given byte of memory from userspace
+/// may only be read once. By preventing double-fetches we avoid TOCTOU
+/// vulnerabilities. This is accomplished by taking `self` by value to prevent
+/// obtaining multiple readers on a given [`UserSlicePtr`], and the readers
+/// only permitting forward reads.
+///
+/// Constructing a [`UserSlicePtr`] performs no checks on the provided
+/// address and length, it can safely be constructed inside a kernel thread
+/// with no current userspace process. Reads and writes wrap the kernel APIs
+/// `copy_from_user` and `copy_to_user`, which check the memory map of the
+/// current process and enforce that the address range is within the user
+/// range (no additional calls to `access_ok` are needed).
+///
+/// [`std::io`]: https://doc.rust-lang.org/std/io/index.html
+pub struct UserSlicePtr(*mut core::ffi::c_void, usize);
+
+impl UserSlicePtr {
+ /// Constructs a user slice from a raw pointer and a length in bytes.
+ ///
+ /// # Safety
+ ///
+ /// Callers must be careful to avoid time-of-check-time-of-use
+ /// (TOCTOU) issues. The simplest way is to create a single instance of
+ /// [`UserSlicePtr`] per user memory block as it reads each byte at
+ /// most once.
+ pub unsafe fn new(ptr: *mut core::ffi::c_void, length: usize) -> Self {
+ UserSlicePtr(ptr, length)
+ }
+
+ /// Reads the entirety of the user slice.
+ ///
+ /// Returns `EFAULT` if the address does not currently point to
+ /// mapped, readable memory.
+ pub fn read_all(self) -> Result<Vec<u8>> {
+ self.reader().read_all()
+ }
+
+ /// Constructs a [`UserSlicePtrReader`].
+ pub fn reader(self) -> UserSlicePtrReader {
+ UserSlicePtrReader(self.0, self.1)
+ }
+
+ /// Writes the provided slice into the user slice.
+ ///
+ /// Returns `EFAULT` if the address does not currently point to
+ /// mapped, writable memory (in which case some data from before the
+ /// fault may be written), or `data` is larger than the user slice
+ /// (in which case no data is written).
+ pub fn write_all(self, data: &[u8]) -> Result {
+ self.writer().write_slice(data)
+ }
+
+ /// Constructs a [`UserSlicePtrWriter`].
+ pub fn writer(self) -> UserSlicePtrWriter {
+ UserSlicePtrWriter(self.0, self.1)
+ }
+
+ /// Constructs both a [`UserSlicePtrReader`] and a [`UserSlicePtrWriter`].
+ pub fn reader_writer(self) -> (UserSlicePtrReader, UserSlicePtrWriter) {
+ (
+ UserSlicePtrReader(self.0, self.1),
+ UserSlicePtrWriter(self.0, self.1),
+ )
+ }
+}
+
+/// A reader for [`UserSlicePtr`].
+///
+/// Used to incrementally read from the user slice.
+pub struct UserSlicePtrReader(*mut core::ffi::c_void, usize);
+
+impl IoBufferReader for UserSlicePtrReader {
+ /// Returns the number of bytes left to be read from this.
+ ///
+ /// Note that even reading less than this number of bytes may fail.
+ fn len(&self) -> usize {
+ self.1
+ }
+
+ /// Reads raw data from the user slice into a raw kernel buffer.
+ ///
+ /// # Safety
+ ///
+ /// The output buffer must be valid.
+ unsafe fn read_raw(&mut self, out: *mut u8, len: usize) -> Result {
+ if len > self.1 || len > u32::MAX as usize {
+ return Err(EFAULT);
+ }
+ let res = unsafe { bindings::copy_from_user(out as _, self.0, len as _) };
+ if res != 0 {
+ return Err(EFAULT);
+ }
+ // Since this is not a pointer to a valid object in our program,
+ // we cannot use `add`, which has C-style rules for defined
+ // behavior.
+ self.0 = self.0.wrapping_add(len);
+ self.1 -= len;
+ Ok(())
+ }
+}
+
+/// A writer for [`UserSlicePtr`].
+///
+/// Used to incrementally write into the user slice.
+pub struct UserSlicePtrWriter(*mut core::ffi::c_void, usize);
+
+impl IoBufferWriter for UserSlicePtrWriter {
+ fn len(&self) -> usize {
+ self.1
+ }
+
+ fn clear(&mut self, mut len: usize) -> Result {
+ let mut ret = Ok(());
+ if len > self.1 {
+ ret = Err(EFAULT);
+ len = self.1;
+ }
+
+ // SAFETY: The buffer will be validated by `clear_user`. We ensure that `len` is within
+ // bounds in the check above.
+ let left = unsafe { bindings::clear_user(self.0, len as _) } as usize;
+ if left != 0 {
+ ret = Err(EFAULT);
+ len -= left;
+ }
+
+ self.0 = self.0.wrapping_add(len);
+ self.1 -= len;
+ ret
+ }
+
+ unsafe fn write_raw(&mut self, data: *const u8, len: usize) -> Result {
+ if len > self.1 || len > u32::MAX as usize {
+ return Err(EFAULT);
+ }
+ let res = unsafe { bindings::copy_to_user(self.0, data as _, len as _) };
+ if res != 0 {
+ return Err(EFAULT);
+ }
+ // Since this is not a pointer to a valid object in our program,
+ // we cannot use `add`, which has C-style rules for defined
+ // behavior.
+ self.0 = self.0.wrapping_add(len);
+ self.1 -= len;
+ Ok(())
+ }
+}
diff --git a/rust/kernel/xarray.rs b/rust/kernel/xarray.rs
new file mode 100644
index 000000000000..4f09e8e613da
--- /dev/null
+++ b/rust/kernel/xarray.rs
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0
+
+//! XArray abstraction.
+//!
+//! C header: [`include/linux/xarray.h`](../../include/linux/xarray.h)
+
+use crate::{
+ bindings,
+ error::{Error, Result},
+ types::{ForeignOwnable, Opaque, ScopeGuard},
+};
+use core::{marker::PhantomData, ops::Deref};
+
+/// Flags passed to `XArray::new` to configure the `XArray`.
+type Flags = bindings::gfp_t;
+
+/// Flag values passed to `XArray::new` to configure the `XArray`.
+pub mod flags {
+ /// Use IRQ-safe locking
+ pub const LOCK_IRQ: super::Flags = bindings::BINDINGS_XA_FLAGS_LOCK_IRQ;
+ /// Use softirq-safe locking
+ pub const LOCK_BH: super::Flags = bindings::BINDINGS_XA_FLAGS_LOCK_BH;
+ /// Track which entries are free (distinct from None)
+ pub const TRACK_FREE: super::Flags = bindings::BINDINGS_XA_FLAGS_TRACK_FREE;
+ /// Initialize array index 0 as busy
+ pub const ZERO_BUSY: super::Flags = bindings::BINDINGS_XA_FLAGS_ZERO_BUSY;
+ /// Use GFP_ACCOUNT for internal memory allocations
+ pub const ACCOUNT: super::Flags = bindings::BINDINGS_XA_FLAGS_ACCOUNT;
+ /// Create an allocating `XArray` starting at index 0
+ pub const ALLOC: super::Flags = bindings::BINDINGS_XA_FLAGS_ALLOC;
+ /// Create an allocating `XArray` starting at index 1
+ pub const ALLOC1: super::Flags = bindings::BINDINGS_XA_FLAGS_ALLOC1;
+}
+
+/// Wrapper for a value owned by the `XArray` which holds the `XArray` lock until dropped.
+///
+/// # Invariants
+///
+/// The `*mut T` is always non-NULL and owned by the referenced `XArray`
+pub struct Guard<'a, T: ForeignOwnable>(*mut T, &'a Opaque<bindings::xarray>);
+
+impl<'a, T: ForeignOwnable> Guard<'a, T> {
+ /// Borrow the underlying value wrapped by the `Guard`.
+ ///
+ /// Returns a `T::Borrowed` type for the owned `ForeignOwnable` type.
+ pub fn borrow<'b>(&'b self) -> T::Borrowed<'b>
+ where
+ 'a: 'b,
+ {
+ // SAFETY: The value is owned by the `XArray`, the lifetime it is borrowed for must not
+ // outlive the `XArray` itself, nor the Guard that holds the lock ensuring the value
+ // remains in the `XArray`.
+ unsafe { T::borrow(self.0 as _) }
+ }
+}
+
+// Convenience impl for `ForeignOwnable` types whose `Borrowed`
+// form implements Deref.
+impl<'a, T: ForeignOwnable> Deref for Guard<'a, T>
+where
+ T::Borrowed<'static>: Deref,
+{
+ type Target = <T::Borrowed<'static> as Deref>::Target;
+
+ fn deref(&self) -> &Self::Target {
+ // SAFETY: See the `borrow()` method. The dereferenced `T::Borrowed` value
+ // must share the same lifetime, so we can return a reference to it.
+ // TODO: Is this really sound?
+ unsafe { &*(T::borrow(self.0 as _).deref() as *const _) }
+ }
+}
+
+impl<'a, T: ForeignOwnable> Drop for Guard<'a, T> {
+ fn drop(&mut self) {
+ // SAFETY: The XArray we have a reference to owns the C xarray object.
+ unsafe { bindings::xa_unlock(self.1.get()) };
+ }
+}
+
+/// Represents a reserved slot in an `XArray`, which does not yet have a value but has an assigned
+/// index and may not be allocated by any other user. If the Reservation is dropped without
+/// being filled, the entry is marked as available again.
+///
+/// Users must ensure that reserved slots are not filled by other mechanisms, or otherwise their
+/// contents may be dropped and replaced (which will print a warning).
+pub struct Reservation<'a, T: ForeignOwnable>(&'a XArray<T>, usize, PhantomData<T>);
+
+impl<'a, T: ForeignOwnable> Reservation<'a, T> {
+ /// Store a value into the reserved slot.
+ pub fn store(self, value: T) -> Result<usize> {
+ if self.0.replace(self.1, value)?.is_some() {
+ crate::pr_err!("XArray: Reservation stored but the entry already had data!\n");
+ // Consider it a success anyway, not much we can do
+ }
+ let index = self.1;
+ core::mem::forget(self);
+ Ok(index)
+ }
+
+ /// Returns the index of this reservation.
+ pub fn index(&self) -> usize {
+ self.1
+ }
+}
+
+impl<'a, T: ForeignOwnable> Drop for Reservation<'a, T> {
+ fn drop(&mut self) {
+ if self.0.remove(self.1).is_some() {
+ crate::pr_err!("XArray: Reservation dropped but the entry was not empty!\n");
+ }
+ }
+}
+
+/// An array which efficiently maps sparse integer indices to owned objects.
+///
+/// This is similar to a `Vec<Option<T>>`, but more efficient when there are holes in the
+/// index space, and can be efficiently grown.
+///
+/// This structure is expected to often be used with an inner type that can either be efficiently
+/// cloned, such as an `Arc<T>`.
+pub struct XArray<T: ForeignOwnable> {
+ xa: Opaque<bindings::xarray>,
+ _p: PhantomData<T>,
+}
+
+impl<T: ForeignOwnable> XArray<T> {
+ /// Creates a new `XArray` with the given flags.
+ pub fn new(flags: Flags) -> Result<XArray<T>> {
+ let xa = Opaque::uninit();
+
+ // SAFETY: We have just created `xa`. This data structure does not require
+ // pinning.
+ unsafe { bindings::xa_init_flags(xa.get(), flags) };
+
+ // INVARIANT: Initialize the `XArray` with a valid `xa`.
+ Ok(XArray {
+ xa,
+ _p: PhantomData,
+ })
+ }
+
+ /// Replaces an entry with a new value, returning the old value (if any).
+ pub fn replace(&self, index: usize, value: T) -> Result<Option<T>> {
+ let new = value.into_foreign();
+ let guard = ScopeGuard::new(|| unsafe {
+ T::from_foreign(new);
+ });
+
+ let old = unsafe {
+ bindings::xa_store(
+ self.xa.get(),
+ index.try_into()?,
+ new as *mut _,
+ bindings::GFP_KERNEL,
+ )
+ };
+
+ let err = unsafe { bindings::xa_err(old) };
+ if err != 0 {
+ Err(Error::from_kernel_errno(err))
+ } else if old.is_null() {
+ guard.dismiss();
+ Ok(None)
+ } else {
+ guard.dismiss();
+ Ok(Some(unsafe { T::from_foreign(old) }))
+ }
+ }
+
+ /// Replaces an entry with a new value, dropping the old value (if any).
+ pub fn set(&self, index: usize, value: T) -> Result {
+ self.replace(index, value)?;
+ Ok(())
+ }
+
+ /// Looks up and returns a reference to an entry in the array, returning a `Guard` if it
+ /// exists.
+ ///
+ /// This guard blocks all other actions on the `XArray`. Callers are expected to drop the
+ /// `Guard` eagerly to avoid blocking other users, such as by taking a clone of the value.
+ pub fn get(&self, index: usize) -> Option<Guard<'_, T>> {
+ // SAFETY: `self.xa` is always valid by the type invariant.
+ let p = unsafe {
+ bindings::xa_lock(self.xa.get());
+ bindings::xa_load(self.xa.get(), index.try_into().ok()?)
+ };
+
+ if p.is_null() {
+ unsafe { bindings::xa_unlock(self.xa.get()) };
+ None
+ } else {
+ Some(Guard(p as _, &self.xa))
+ }
+ }
+
+ /// Removes and returns an entry, returning it if it existed.
+ pub fn remove(&self, index: usize) -> Option<T> {
+ let p = unsafe { bindings::xa_erase(self.xa.get(), index.try_into().ok()?) };
+ if p.is_null() {
+ None
+ } else {
+ Some(unsafe { T::from_foreign(p) })
+ }
+ }
+
+ /// Allocate a new index in the array, optionally storing a new value into it, with
+ /// configurable bounds for the index range to allocate from.
+ ///
+ /// If `value` is `None`, then the index is reserved from further allocation but remains
+ /// free for storing a value into it.
+ pub fn alloc_limits(&self, value: Option<T>, min: u32, max: u32) -> Result<usize> {
+ let new = value.map_or(core::ptr::null(), |a| a.into_foreign());
+ let mut id: u32 = 0;
+
+ // SAFETY: `self.xa` is always valid by the type invariant. If this succeeds, it
+ // takes ownership of the passed `T` (if any). If it fails, we must drop the
+ // `T` again.
+ let ret = unsafe {
+ bindings::xa_alloc(
+ self.xa.get(),
+ &mut id,
+ new as *mut _,
+ bindings::xa_limit { min, max },
+ bindings::GFP_KERNEL,
+ )
+ };
+
+ if ret < 0 {
+ // Make sure to drop the value we failed to store
+ if !new.is_null() {
+ // SAFETY: If `new` is not NULL, it came from the `ForeignOwnable` we got
+ // from the caller.
+ unsafe { T::from_foreign(new) };
+ }
+ Err(Error::from_kernel_errno(ret))
+ } else {
+ Ok(id as usize)
+ }
+ }
+
+ /// Allocate a new index in the array, optionally storing a new value into it.
+ ///
+ /// If `value` is `None`, then the index is reserved from further allocation but remains
+ /// free for storing a value into it.
+ pub fn alloc(&self, value: Option<T>) -> Result<usize> {
+ self.alloc_limits(value, 0, u32::MAX)
+ }
+
+ /// Reserve a new index in the array within configurable bounds for the index.
+ ///
+ /// Returns a `Reservation` object, which can then be used to store a value at this index or
+ /// otherwise free it for reuse.
+ pub fn reserve_limits(&self, min: u32, max: u32) -> Result<Reservation<'_, T>> {
+ Ok(Reservation(
+ self,
+ self.alloc_limits(None, min, max)?,
+ PhantomData,
+ ))
+ }
+
+ /// Reserve a new index in the array.
+ ///
+ /// Returns a `Reservation` object, which can then be used to store a value at this index or
+ /// otherwise free it for reuse.
+ pub fn reserve(&self) -> Result<Reservation<'_, T>> {
+ Ok(Reservation(self, self.alloc(None)?, PhantomData))
+ }
+}
+
+impl<T: ForeignOwnable> Drop for XArray<T> {
+ fn drop(&mut self) {
+ // SAFETY: `self.xa` is valid by the type invariant, and as we have the only reference to
+ // the `XArray` we can safely iterate its contents and drop everything.
+ unsafe {
+ let mut index: core::ffi::c_ulong = 0;
+ let mut entry = bindings::xa_find(
+ self.xa.get(),
+ &mut index,
+ core::ffi::c_ulong::MAX,
+ bindings::BINDINGS_XA_PRESENT,
+ );
+ while !entry.is_null() {
+ T::from_foreign(entry);
+ entry = bindings::xa_find_after(
+ self.xa.get(),
+ &mut index,
+ core::ffi::c_ulong::MAX,
+ bindings::BINDINGS_XA_PRESENT,
+ );
+ }
+
+ bindings::xa_destroy(self.xa.get());
+ }
+ }
+}
+
+// SAFETY: XArray is thread-safe and all mutation operations are internally locked.
+unsafe impl<T: Send + ForeignOwnable> Send for XArray<T> {}
+unsafe impl<T: Sync + ForeignOwnable> Sync for XArray<T> {}
diff --git a/rust/macros/concat_idents.rs b/rust/macros/concat_idents.rs
index 7e4b450f3a50..d6614b900aa2 100644
--- a/rust/macros/concat_idents.rs
+++ b/rust/macros/concat_idents.rs
@@ -14,10 +14,28 @@ fn expect_ident(it: &mut token_stream::IntoIter) -> Ident {
pub(crate) fn concat_idents(ts: TokenStream) -> TokenStream {
let mut it = ts.into_iter();
- let a = expect_ident(&mut it);
- assert_eq!(expect_punct(&mut it), ',');
+ let mut out = TokenStream::new();
+ let a = loop {
+ let ident = expect_ident(&mut it);
+ let punct = expect_punct(&mut it);
+ match punct.as_char() {
+ ',' => break ident,
+ ':' => {
+ let punct2 = expect_punct(&mut it);
+ assert_eq!(punct2.as_char(), ':');
+ out.extend([
+ TokenTree::Ident(ident),
+ TokenTree::Punct(punct),
+ TokenTree::Punct(punct2),
+ ]);
+ }
+ _ => panic!("Expected , or ::"),
+ }
+ };
+
let b = expect_ident(&mut it);
assert!(it.next().is_none(), "only two idents can be concatenated");
let res = Ident::new(&format!("{a}{b}"), b.span());
- TokenStream::from_iter([TokenTree::Ident(res)])
+ out.extend([TokenTree::Ident(res)]);
+ out
}
diff --git a/rust/macros/helpers.rs b/rust/macros/helpers.rs
index cf7ad950dc1e..517f0bbfcf79 100644
--- a/rust/macros/helpers.rs
+++ b/rust/macros/helpers.rs
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-use proc_macro::{token_stream, TokenTree};
+use proc_macro::{token_stream, Group, Punct, TokenTree};
pub(crate) fn try_ident(it: &mut token_stream::IntoIter) -> Option<String> {
if let Some(TokenTree::Ident(ident)) = it.next() {
@@ -38,9 +38,9 @@ pub(crate) fn expect_ident(it: &mut token_stream::IntoIter) -> String {
try_ident(it).expect("Expected Ident")
}
-pub(crate) fn expect_punct(it: &mut token_stream::IntoIter) -> char {
+pub(crate) fn expect_punct(it: &mut token_stream::IntoIter) -> Punct {
if let TokenTree::Punct(punct) = it.next().expect("Reached end of token stream for Punct") {
- punct.as_char()
+ punct
} else {
panic!("Expected Punct");
}
@@ -56,8 +56,36 @@ pub(crate) fn expect_string_ascii(it: &mut token_stream::IntoIter) -> String {
string
}
+pub(crate) fn expect_literal(it: &mut token_stream::IntoIter) -> String {
+ try_literal(it).expect("Expected Literal")
+}
+
+pub(crate) fn expect_group(it: &mut token_stream::IntoIter) -> Group {
+ if let TokenTree::Group(group) = it.next().expect("Reached end of token stream for Group") {
+ group
+ } else {
+ panic!("Expected Group");
+ }
+}
+
pub(crate) fn expect_end(it: &mut token_stream::IntoIter) {
if it.next().is_some() {
panic!("Expected end");
}
}
+
+pub(crate) fn get_literal(it: &mut token_stream::IntoIter, expected_name: &str) -> String {
+ assert_eq!(expect_ident(it), expected_name);
+ assert_eq!(expect_punct(it), ':');
+ let literal = expect_literal(it);
+ assert_eq!(expect_punct(it), ',');
+ literal
+}
+
+pub(crate) fn get_string(it: &mut token_stream::IntoIter, expected_name: &str) -> String {
+ assert_eq!(expect_ident(it), expected_name);
+ assert_eq!(expect_punct(it), ':');
+ let string = expect_string(it);
+ assert_eq!(expect_punct(it), ',');
+ string
+}
diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs
index c1d385e345b9..3ab9bae4ab52 100644
--- a/rust/macros/lib.rs
+++ b/rust/macros/lib.rs
@@ -5,6 +5,7 @@
mod concat_idents;
mod helpers;
mod module;
+mod versions;
mod vtable;
use proc_macro::TokenStream;
@@ -73,6 +74,12 @@ pub fn module(ts: TokenStream) -> TokenStream {
module::module(ts)
}
+/// Declares multiple variants of a structure or impl code
+#[proc_macro_attribute]
+pub fn versions(attr: TokenStream, item: TokenStream) -> TokenStream {
+ versions::versions(attr, item)
+}
+
/// Declares or implements a vtable trait.
///
/// Linux's use of pure vtables is very close to Rust traits, but they differ
diff --git a/rust/macros/module.rs b/rust/macros/module.rs
index a7e363c2b044..1f30435d7ad4 100644
--- a/rust/macros/module.rs
+++ b/rust/macros/module.rs
@@ -1,9 +1,59 @@
// SPDX-License-Identifier: GPL-2.0
use crate::helpers::*;
-use proc_macro::{token_stream, Literal, TokenStream, TokenTree};
+use proc_macro::{token_stream, Delimiter, Group, Literal, TokenStream, TokenTree};
use std::fmt::Write;
+#[derive(Clone, PartialEq)]
+enum ParamType {
+ Ident(String),
+ Array { vals: String, max_length: usize },
+}
+
+fn expect_array_fields(it: &mut token_stream::IntoIter) -> ParamType {
+ assert_eq!(expect_punct(it), '<');
+ let vals = expect_ident(it);
+ assert_eq!(expect_punct(it), ',');
+ let max_length_str = expect_literal(it);
+ let max_length = max_length_str
+ .parse::<usize>()
+ .expect("Expected usize length");
+ assert_eq!(expect_punct(it), '>');
+ ParamType::Array { vals, max_length }
+}
+
+fn expect_type(it: &mut token_stream::IntoIter) -> ParamType {
+ if let TokenTree::Ident(ident) = it
+ .next()
+ .expect("Reached end of token stream for param type")
+ {
+ match ident.to_string().as_ref() {
+ "ArrayParam" => expect_array_fields(it),
+ _ => ParamType::Ident(ident.to_string()),
+ }
+ } else {
+ panic!("Expected Param Type")
+ }
+}
+
+fn expect_string_array(it: &mut token_stream::IntoIter) -> Vec<String> {
+ let group = expect_group(it);
+ assert_eq!(group.delimiter(), Delimiter::Bracket);
+ let mut values = Vec::new();
+ let mut it = group.stream().into_iter();
+
+ while let Some(val) = try_string(&mut it) {
+ assert!(val.is_ascii(), "Expected ASCII string");
+ values.push(val);
+ match it.next() {
+ Some(TokenTree::Punct(punct)) => assert_eq!(punct.as_char(), ','),
+ None => break,
+ _ => panic!("Expected ',' or end of array"),
+ }
+ }
+ values
+}
+
struct ModInfoBuilder<'a> {
module: &'a str,
counter: usize,
@@ -69,6 +119,113 @@ impl<'a> ModInfoBuilder<'a> {
self.emit_only_builtin(field, content);
self.emit_only_loadable(field, content);
}
+
+ fn emit_param(&mut self, field: &str, param: &str, content: &str) {
+ let content = format!("{param}:{content}", param = param, content = content);
+ self.emit(field, &content);
+ }
+}
+
+fn permissions_are_readonly(perms: &str) -> bool {
+ let (radix, digits) = if let Some(n) = perms.strip_prefix("0x") {
+ (16, n)
+ } else if let Some(n) = perms.strip_prefix("0o") {
+ (8, n)
+ } else if let Some(n) = perms.strip_prefix("0b") {
+ (2, n)
+ } else {
+ (10, perms)
+ };
+ match u32::from_str_radix(digits, radix) {
+ Ok(perms) => perms & 0o222 == 0,
+ Err(_) => false,
+ }
+}
+
+fn param_ops_path(param_type: &str) -> &'static str {
+ match param_type {
+ "bool" => "kernel::module_param::PARAM_OPS_BOOL",
+ "i8" => "kernel::module_param::PARAM_OPS_I8",
+ "u8" => "kernel::module_param::PARAM_OPS_U8",
+ "i16" => "kernel::module_param::PARAM_OPS_I16",
+ "u16" => "kernel::module_param::PARAM_OPS_U16",
+ "i32" => "kernel::module_param::PARAM_OPS_I32",
+ "u32" => "kernel::module_param::PARAM_OPS_U32",
+ "i64" => "kernel::module_param::PARAM_OPS_I64",
+ "u64" => "kernel::module_param::PARAM_OPS_U64",
+ "isize" => "kernel::module_param::PARAM_OPS_ISIZE",
+ "usize" => "kernel::module_param::PARAM_OPS_USIZE",
+ "str" => "kernel::module_param::PARAM_OPS_STR",
+ t => panic!("Unrecognized type {}", t),
+ }
+}
+
+#[allow(clippy::type_complexity)]
+fn try_simple_param_val(
+ param_type: &str,
+) -> Box<dyn Fn(&mut token_stream::IntoIter) -> Option<String>> {
+ match param_type {
+ "bool" => Box::new(try_ident),
+ "str" => Box::new(|param_it| {
+ try_string(param_it)
+ .map(|s| format!("kernel::module_param::StringParam::Ref(b\"{}\")", s))
+ }),
+ _ => Box::new(try_literal),
+ }
+}
+
+fn get_default(param_type: &ParamType, param_it: &mut token_stream::IntoIter) -> String {
+ let try_param_val = match param_type {
+ ParamType::Ident(ref param_type)
+ | ParamType::Array {
+ vals: ref param_type,
+ max_length: _,
+ } => try_simple_param_val(param_type),
+ };
+ assert_eq!(expect_ident(param_it), "default");
+ assert_eq!(expect_punct(param_it), ':');
+ let default = match param_type {
+ ParamType::Ident(_) => try_param_val(param_it).expect("Expected default param value"),
+ ParamType::Array {
+ vals: _,
+ max_length: _,
+ } => {
+ let group = expect_group(param_it);
+ assert_eq!(group.delimiter(), Delimiter::Bracket);
+ let mut default_vals = Vec::new();
+ let mut it = group.stream().into_iter();
+
+ while let Some(default_val) = try_param_val(&mut it) {
+ default_vals.push(default_val);
+ match it.next() {
+ Some(TokenTree::Punct(punct)) => assert_eq!(punct.as_char(), ','),
+ None => break,
+ _ => panic!("Expected ',' or end of array default values"),
+ }
+ }
+
+ let mut default_array = "kernel::module_param::ArrayParam::create(&[".to_string();
+ default_array.push_str(
+ &default_vals
+ .iter()
+ .map(|val| val.to_string())
+ .collect::<Vec<String>>()
+ .join(","),
+ );
+ default_array.push_str("])");
+ default_array
+ }
+ };
+ assert_eq!(expect_punct(param_it), ',');
+ default
+}
+
+fn generated_array_ops_name(vals: &str, max_length: usize) -> String {
+ format!(
+ "__generated_array_ops_{vals}_{max_length}",
+ vals = vals,
+ max_length = max_length
+ )
}
#[derive(Debug, Default)]
@@ -78,15 +235,23 @@ struct ModuleInfo {
name: String,
author: Option<String>,
description: Option<String>,
- alias: Option<String>,
+ alias: Option<Vec<String>>,
+ params: Option<Group>,
}
impl ModuleInfo {
fn parse(it: &mut token_stream::IntoIter) -> Self {
let mut info = ModuleInfo::default();
- const EXPECTED_KEYS: &[&str] =
- &["type", "name", "author", "description", "license", "alias"];
+ const EXPECTED_KEYS: &[&str] = &[
+ "type",
+ "name",
+ "author",
+ "description",
+ "license",
+ "alias",
+ "params",
+ ];
const REQUIRED_KEYS: &[&str] = &["type", "name", "license"];
let mut seen_keys = Vec::new();
@@ -104,7 +269,7 @@ impl ModuleInfo {
);
}
- assert_eq!(expect_punct(it), ':');
+ assert_eq!(expect_punct(it).as_char(), ':');
match key.as_str() {
"type" => info.type_ = expect_ident(it),
@@ -112,14 +277,15 @@ impl ModuleInfo {
"author" => info.author = Some(expect_string(it)),
"description" => info.description = Some(expect_string(it)),
"license" => info.license = expect_string_ascii(it),
- "alias" => info.alias = Some(expect_string_ascii(it)),
+ "alias" => info.alias = Some(expect_string_array(it)),
+ "params" => info.params = Some(expect_group(it)),
_ => panic!(
"Unknown key \"{}\". Valid keys are: {:?}.",
key, EXPECTED_KEYS
),
}
- assert_eq!(expect_punct(it), ',');
+ assert_eq!(expect_punct(it).as_char(), ',');
seen_keys.push(key);
}
@@ -163,8 +329,10 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
modinfo.emit("description", &description);
}
modinfo.emit("license", &info.license);
- if let Some(alias) = info.alias {
- modinfo.emit("alias", &alias);
+ if let Some(aliases) = info.alias {
+ for alias in aliases {
+ modinfo.emit("alias", &alias);
+ }
}
// Built-in modules also export the `file` modinfo string.
@@ -172,6 +340,195 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
std::env::var("RUST_MODFILE").expect("Unable to fetch RUST_MODFILE environmental variable");
modinfo.emit_only_builtin("file", &file);
+ let mut array_types_to_generate = Vec::new();
+ if let Some(params) = info.params {
+ assert_eq!(params.delimiter(), Delimiter::Brace);
+
+ let mut it = params.stream().into_iter();
+
+ loop {
+ let param_name = match it.next() {
+ Some(TokenTree::Ident(ident)) => ident.to_string(),
+ Some(_) => panic!("Expected Ident or end"),
+ None => break,
+ };
+
+ assert_eq!(expect_punct(&mut it), ':');
+ let param_type = expect_type(&mut it);
+ let group = expect_group(&mut it);
+ assert_eq!(expect_punct(&mut it), ',');
+
+ assert_eq!(group.delimiter(), Delimiter::Brace);
+
+ let mut param_it = group.stream().into_iter();
+ let param_default = get_default(&param_type, &mut param_it);
+ let param_permissions = get_literal(&mut param_it, "permissions");
+ let param_description = get_string(&mut param_it, "description");
+ expect_end(&mut param_it);
+
+ // TODO: More primitive types.
+ // TODO: Other kinds: unsafes, etc.
+ let (param_kernel_type, ops): (String, _) = match param_type {
+ ParamType::Ident(ref param_type) => (
+ param_type.to_string(),
+ param_ops_path(param_type).to_string(),
+ ),
+ ParamType::Array {
+ ref vals,
+ max_length,
+ } => {
+ array_types_to_generate.push((vals.clone(), max_length));
+ (
+ format!("__rust_array_param_{}_{}", vals, max_length),
+ generated_array_ops_name(vals, max_length),
+ )
+ }
+ };
+
+ modinfo.emit_param("parmtype", &param_name, &param_kernel_type);
+ modinfo.emit_param("parm", &param_name, &param_description);
+ let param_type_internal = match param_type {
+ ParamType::Ident(ref param_type) => match param_type.as_ref() {
+ "str" => "kernel::module_param::StringParam".to_string(),
+ other => other.to_string(),
+ },
+ ParamType::Array {
+ ref vals,
+ max_length,
+ } => format!(
+ "kernel::module_param::ArrayParam<{vals}, {max_length}>",
+ vals = vals,
+ max_length = max_length
+ ),
+ };
+ let read_func = if permissions_are_readonly(&param_permissions) {
+ format!(
+ "
+ fn read(&self)
+ -> &<{param_type_internal} as kernel::module_param::ModuleParam>::Value {{
+ // SAFETY: Parameters do not need to be locked because they are
+ // read only or sysfs is not enabled.
+ unsafe {{
+ <{param_type_internal} as kernel::module_param::ModuleParam>::value(
+ &__{name}_{param_name}_value
+ )
+ }}
+ }}
+ ",
+ name = info.name,
+ param_name = param_name,
+ param_type_internal = param_type_internal,
+ )
+ } else {
+ format!(
+ "
+ fn read<'lck>(&self, lock: &'lck kernel::KParamGuard)
+ -> &'lck <{param_type_internal} as kernel::module_param::ModuleParam>::Value {{
+ // SAFETY: Parameters are locked by `KParamGuard`.
+ unsafe {{
+ <{param_type_internal} as kernel::module_param::ModuleParam>::value(
+ &__{name}_{param_name}_value
+ )
+ }}
+ }}
+ ",
+ name = info.name,
+ param_name = param_name,
+ param_type_internal = param_type_internal,
+ )
+ };
+ let kparam = format!(
+ "
+ kernel::bindings::kernel_param__bindgen_ty_1 {{
+ arg: unsafe {{ &__{name}_{param_name}_value }}
+ as *const _ as *mut core::ffi::c_void,
+ }},
+ ",
+ name = info.name,
+ param_name = param_name,
+ );
+ write!(
+ modinfo.buffer,
+ "
+ static mut __{name}_{param_name}_value: {param_type_internal} = {param_default};
+
+ struct __{name}_{param_name};
+
+ impl __{name}_{param_name} {{ {read_func} }}
+
+ const {param_name}: __{name}_{param_name} = __{name}_{param_name};
+
+ // Note: the C macro that generates the static structs for the `__param` section
+ // asks for them to be `aligned(sizeof(void *))`. However, that was put in place
+ // in 2003 in commit 38d5b085d2a0 (\"[PATCH] Fix over-alignment problem on x86-64\")
+ // to undo GCC over-alignment of static structs of >32 bytes. It seems that is
+ // not the case anymore, so we simplify to a transparent representation here
+ // in the expectation that it is not needed anymore.
+ // TODO: Revisit this to confirm the above comment and remove it if it happened.
+ #[repr(transparent)]
+ struct __{name}_{param_name}_RacyKernelParam(kernel::bindings::kernel_param);
+
+ unsafe impl Sync for __{name}_{param_name}_RacyKernelParam {{
+ }}
+
+ #[cfg(not(MODULE))]
+ const __{name}_{param_name}_name: *const core::ffi::c_char =
+ b\"{name}.{param_name}\\0\" as *const _ as *const core::ffi::c_char;
+
+ #[cfg(MODULE)]
+ const __{name}_{param_name}_name: *const core::ffi::c_char =
+ b\"{param_name}\\0\" as *const _ as *const core::ffi::c_char;
+
+ #[link_section = \"__param\"]
+ #[used]
+ static __{name}_{param_name}_struct: __{name}_{param_name}_RacyKernelParam =
+ __{name}_{param_name}_RacyKernelParam(kernel::bindings::kernel_param {{
+ name: __{name}_{param_name}_name,
+ // SAFETY: `__this_module` is constructed by the kernel at load time
+ // and will not be freed until the module is unloaded.
+ #[cfg(MODULE)]
+ mod_: unsafe {{ &kernel::bindings::__this_module as *const _ as *mut _ }},
+ #[cfg(not(MODULE))]
+ mod_: core::ptr::null_mut(),
+ ops: unsafe {{ &{ops} }} as *const kernel::bindings::kernel_param_ops,
+ perm: {permissions},
+ level: -1,
+ flags: 0,
+ __bindgen_anon_1: {kparam}
+ }});
+ ",
+ name = info.name,
+ param_type_internal = param_type_internal,
+ read_func = read_func,
+ param_default = param_default,
+ param_name = param_name,
+ ops = ops,
+ permissions = param_permissions,
+ kparam = kparam,
+ )
+ .unwrap();
+ }
+ }
+
+ let mut generated_array_types = String::new();
+
+ for (vals, max_length) in array_types_to_generate {
+ let ops_name = generated_array_ops_name(&vals, max_length);
+ write!(
+ generated_array_types,
+ "
+ kernel::make_param_ops!(
+ {ops_name},
+ kernel::module_param::ArrayParam<{vals}, {{ {max_length} }}>
+ );
+ ",
+ ops_name = ops_name,
+ vals = vals,
+ max_length = max_length,
+ )
+ .unwrap();
+ }
+
format!(
"
/// The module name.
@@ -250,7 +607,7 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
}}
fn __init() -> core::ffi::c_int {{
- match <{type_} as kernel::Module>::init(&THIS_MODULE) {{
+ match <{type_} as kernel::Module>::init(kernel::c_str!(\"{name}\"), &THIS_MODULE) {{
Ok(m) => {{
unsafe {{
__MOD = Some(m);
@@ -271,10 +628,13 @@ pub(crate) fn module(ts: TokenStream) -> TokenStream {
}}
{modinfo}
+
+ {generated_array_types}
",
type_ = info.type_,
name = info.name,
modinfo = modinfo.buffer,
+ generated_array_types = generated_array_types,
initcall_section = ".initcall6.init"
)
.parse()
diff --git a/rust/macros/versions.rs b/rust/macros/versions.rs
new file mode 100644
index 000000000000..6dfdfd7d25c5
--- /dev/null
+++ b/rust/macros/versions.rs
@@ -0,0 +1,289 @@
+use proc_macro::{Group, Ident, Punct, Spacing, Span, TokenStream, TokenTree};
+
+//use crate::helpers::expect_punct;
+
+fn expect_group(it: &mut impl Iterator<Item = TokenTree>) -> Group {
+ if let Some(TokenTree::Group(group)) = it.next() {
+ group
+ } else {
+ panic!("Expected Group")
+ }
+}
+
+fn expect_punct(it: &mut impl Iterator<Item = TokenTree>) -> String {
+ if let Some(TokenTree::Punct(punct)) = it.next() {
+ punct.to_string()
+ } else {
+ panic!("Expected Group")
+ }
+}
+
+fn drop_until_punct(it: &mut impl Iterator<Item = TokenTree>, delimiter: &str) {
+ let mut depth: isize = 0;
+ for token in it.by_ref() {
+ if let TokenTree::Punct(punct) = token {
+ match punct.as_char() {
+ '<' => {
+ depth += 1;
+ }
+ '>' => {
+ depth -= 1;
+ }
+ _ => {
+ if depth == 0 && delimiter.contains(&punct.to_string()) {
+ break;
+ }
+ }
+ }
+ }
+ }
+}
+
+struct VersionConfig {
+ fields: &'static [&'static str],
+ enums: &'static [&'static [&'static str]],
+ versions: &'static [&'static [&'static str]],
+}
+
+static AGX_VERSIONS: VersionConfig = VersionConfig {
+ fields: &["G", "V"],
+ enums: &[&["G13", "G14"], &["V12_3", "V12_4", "V13_0B4", "V13_2"]],
+ versions: &[
+ &["G13", "V12_3"],
+ &["G14", "V12_4"],
+ // &["G13", "V13_0B4"],
+ // &["G14", "V13_0B4"],
+ &["G13", "V13_2"],
+ &["G14", "V13_2"],
+ ],
+};
+
+fn check_version(
+ config: &VersionConfig,
+ ver: &[usize],
+ it: &mut impl Iterator<Item = TokenTree>,
+) -> bool {
+ let first = it.next().unwrap();
+ let val: bool = match &first {
+ TokenTree::Group(group) => check_version(config, ver, &mut group.stream().into_iter()),
+ TokenTree::Ident(ident) => {
+ let key = config
+ .fields
+ .iter()
+ .position(|&r| r == ident.to_string())
+ .unwrap_or_else(|| panic!("Unknown field {}", ident));
+ let mut operator = expect_punct(it);
+ let mut rhs_token = it.next().unwrap();
+ if let TokenTree::Punct(punct) = &rhs_token {
+ operator.extend(std::iter::once(punct.as_char()));
+ rhs_token = it.next().unwrap();
+ }
+ let rhs_name = if let TokenTree::Ident(ident) = &rhs_token {
+ ident.to_string()
+ } else {
+ panic!("Unexpected token {}", ident)
+ };
+
+ let rhs = config.enums[key]
+ .iter()
+ .position(|&r| r == rhs_name)
+ .unwrap_or_else(|| panic!("Unknown value for {}:{}", ident, rhs_name));
+ let lhs = ver[key];
+
+ match operator.as_str() {
+ "==" => lhs == rhs,
+ "!=" => lhs != rhs,
+ ">" => lhs > rhs,
+ ">=" => lhs >= rhs,
+ "<" => lhs < rhs,
+ "<=" => lhs <= rhs,
+ _ => panic!("Unknown operator {}", operator),
+ }
+ }
+ _ => {
+ panic!("Unknown token {}", first)
+ }
+ };
+
+ let boolop = it.next();
+ match boolop {
+ Some(TokenTree::Punct(punct)) => {
+ let right = expect_punct(it);
+ if right != punct.to_string() {
+ panic!("Unexpected op {}{}", punct, right);
+ }
+ match punct.as_char() {
+ '&' => val && check_version(config, ver, it),
+ '|' => val || check_version(config, ver, it),
+ _ => panic!("Unexpected op {}{}", right, right),
+ }
+ }
+ Some(a) => panic!("Unexpected op {}", a),
+ None => val,
+ }
+}
+
+fn filter_versions(
+ config: &VersionConfig,
+ tag: &str,
+ ver: &[usize],
+ tree: impl IntoIterator<Item = TokenTree>,
+ is_struct: bool,
+) -> Vec<TokenTree> {
+ let mut out = Vec::<TokenTree>::new();
+ let mut it = tree.into_iter();
+
+ while let Some(token) = it.next() {
+ let mut tail: Option<TokenTree> = None;
+ match &token {
+ TokenTree::Punct(punct) if punct.to_string() == "#" => {
+ let group = expect_group(&mut it);
+ let mut grp_it = group.stream().into_iter();
+ let attr = grp_it.next().unwrap();
+ match attr {
+ TokenTree::Ident(ident) if ident.to_string() == "ver" => {
+ if check_version(config, ver, &mut grp_it) {
+ } else if is_struct {
+ drop_until_punct(&mut it, ",");
+ } else {
+ let first = it.next().unwrap();
+ match &first {
+ TokenTree::Group(_) => (),
+ _ => {
+ drop_until_punct(&mut it, ",;");
+ }
+ }
+ }
+ }
+ _ => {
+ out.push(token.clone());
+ out.push(TokenTree::Group(group.clone()));
+ }
+ }
+ continue;
+ }
+ TokenTree::Punct(punct) if punct.to_string() == ":" => {
+ let next = it.next();
+ match next {
+ Some(TokenTree::Punct(punct)) if punct.to_string() == ":" => {
+ let next = it.next();
+ match next {
+ Some(TokenTree::Ident(idtag)) if idtag.to_string() == "ver" => {
+ let ident = match out.pop() {
+ Some(TokenTree::Ident(ident)) => ident,
+ a => panic!("$ver not following ident: {:?}", a),
+ };
+ let name = ident.to_string() + tag;
+ let new_ident = Ident::new(name.as_str(), ident.span());
+ out.push(TokenTree::Ident(new_ident));
+ continue;
+ }
+ Some(a) => {
+ out.push(token.clone());
+ out.push(token.clone());
+ tail = Some(a);
+ }
+ None => {
+ out.push(token.clone());
+ out.push(token.clone());
+ }
+ }
+ }
+ Some(a) => {
+ out.push(token.clone());
+ tail = Some(a);
+ }
+ None => {
+ out.push(token.clone());
+ continue;
+ }
+ }
+ }
+ _ => {
+ tail = Some(token);
+ }
+ }
+ match &tail {
+ Some(TokenTree::Group(group)) => {
+ let new_body =
+ filter_versions(config, tag, ver, &mut group.stream().into_iter(), is_struct);
+ let mut stream = TokenStream::new();
+ stream.extend(new_body);
+ let mut filtered_group = Group::new(group.delimiter(), stream);
+ filtered_group.set_span(group.span());
+ out.push(TokenTree::Group(filtered_group));
+ }
+ Some(token) => {
+ out.push(token.clone());
+ }
+ None => {}
+ }
+ }
+
+ out
+}
+
+pub(crate) fn versions(attr: TokenStream, item: TokenStream) -> TokenStream {
+ let config = match attr.to_string().as_str() {
+ "AGX" => &AGX_VERSIONS,
+ _ => panic!("Unknown version group {}", attr),
+ };
+
+ let mut it = item.into_iter();
+ let mut out = TokenStream::new();
+ let mut body: Vec<TokenTree> = Vec::new();
+ let mut is_struct = false;
+
+ while let Some(token) = it.next() {
+ match token {
+ TokenTree::Punct(punct) if punct.to_string() == "#" => {
+ body.push(TokenTree::Punct(punct));
+ body.push(it.next().unwrap());
+ }
+ TokenTree::Ident(ident)
+ if ["struct", "enum", "union", "const", "type"]
+ .contains(&ident.to_string().as_str()) =>
+ {
+ is_struct = ident.to_string() != "const";
+ body.push(TokenTree::Ident(ident));
+ body.push(it.next().unwrap());
+ // This isn't valid syntax in a struct definition, so add it for the user
+ body.push(TokenTree::Punct(Punct::new(':', Spacing::Joint)));
+ body.push(TokenTree::Punct(Punct::new(':', Spacing::Alone)));
+ body.push(TokenTree::Ident(Ident::new("ver", Span::call_site())));
+ break;
+ }
+ TokenTree::Ident(ident) if ident.to_string() == "impl" => {
+ body.push(TokenTree::Ident(ident));
+ break;
+ }
+ TokenTree::Ident(ident) if ident.to_string() == "fn" => {
+ body.push(TokenTree::Ident(ident));
+ break;
+ }
+ _ => {
+ body.push(token);
+ }
+ }
+ }
+
+ body.extend(it);
+
+ for ver in config.versions {
+ let tag = ver.join("");
+ let mut ver_num = Vec::<usize>::new();
+ for (i, comp) in ver.iter().enumerate() {
+ let idx = config.enums[i].iter().position(|&r| r == *comp).unwrap();
+ ver_num.push(idx);
+ }
+ out.extend(filter_versions(
+ config,
+ &tag,
+ &ver_num,
+ body.clone(),
+ is_struct,
+ ));
+ }
+
+ out
+}
diff --git a/samples/rust/rust_print.rs b/samples/rust/rust_print.rs
index 8b39d9cef6d1..165a8d7b1c07 100644
--- a/samples/rust/rust_print.rs
+++ b/samples/rust/rust_print.rs
@@ -15,6 +15,30 @@ module! {
struct RustPrint;
+fn arc_print() -> Result {
+ use kernel::sync::*;
+
+ let a = Arc::try_new(1)?;
+ let b = UniqueArc::try_new("hello, world")?;
+
+ // Prints the value of data in `a`.
+ pr_info!("{}", a);
+
+ // Uses ":?" to print debug fmt of `b`.
+ pr_info!("{:?}", b);
+
+ let a: Arc<&str> = b.into();
+ let c = a.clone();
+
+ // Uses `dbg` to print, will move `c`.
+ dbg!(c);
+
+ // Prints debug fmt with pretty-print "#" and number-in-hex "x".
+ pr_info!("{:#x?}", a);
+
+ Ok(())
+}
+
impl kernel::Module for RustPrint {
fn init(_module: &'static ThisModule) -> Result<Self> {
pr_info!("Rust printing macros sample (init)\n");
@@ -43,6 +67,8 @@ impl kernel::Module for RustPrint {
pr_cont!(" is {}", "continued");
pr_cont!(" with {}\n", "args");
+ arc_print()?;
+
Ok(RustPrint)
}
}
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index a0d5c6cca76d..d43fbd12570a 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -277,7 +277,7 @@ $(obj)/%.lst: $(src)/%.c FORCE
# Compile Rust sources (.rs)
# ---------------------------------------------------------------------------
-rust_allowed_features := core_ffi_c
+rust_allowed_features := allocator_api,const_refs_to_cell,new_uninit
rust_common_cmd = \
RUST_MODFILE=$(modfile) $(RUSTC_OR_CLIPPY) $(rust_flags) \
diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs
index 3c6cbe2b278d..fe0e4ba54492 100644
--- a/scripts/generate_rust_target.rs
+++ b/scripts/generate_rust_target.rs
@@ -148,7 +148,18 @@ fn main() {
let mut ts = TargetSpec::new();
// `llvm-target`s are taken from `scripts/Makefile.clang`.
- if cfg.has("X86_64") {
+ if cfg.has("ARM64") {
+ ts.push("arch", "aarch64");
+ ts.push(
+ "data-layout",
+ "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
+ );
+ ts.push("disable-redzone", true);
+ ts.push("features", "+strict-align,+neon,+fp-armv8");
+ ts.push("llvm-target", "aarch64-linux-gnu");
+ ts.push("max-atomic-width", 128);
+ ts.push("target-pointer-width", "64");
+ } else if cfg.has("X86_64") {
ts.push("arch", "x86_64");
ts.push(
"data-layout",
diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh
index a814f1efb39d..a368a3e56bca 100755
--- a/scripts/min-tool-version.sh
+++ b/scripts/min-tool-version.sh
@@ -31,7 +31,7 @@ llvm)
fi
;;
rustc)
- echo 1.62.0
+ echo 1.66.0
;;
bindgen)
echo 0.56.0