diff options
Diffstat (limited to 'rust/alloc')
-rw-r--r-- | rust/alloc/borrow.rs | 2 | ||||
-rw-r--r-- | rust/alloc/boxed.rs | 2 | ||||
-rw-r--r-- | rust/alloc/boxed/thin.rs | 219 | ||||
-rw-r--r-- | rust/alloc/ffi/c_str.rs | 1203 | ||||
-rw-r--r-- | rust/alloc/ffi/mod.rs | 93 | ||||
-rw-r--r-- | rust/alloc/fmt.rs | 614 | ||||
-rw-r--r-- | rust/alloc/lib.rs | 7 | ||||
-rw-r--r-- | rust/alloc/macros.rs | 128 | ||||
-rw-r--r-- | rust/alloc/raw_vec.rs | 44 | ||||
-rw-r--r-- | rust/alloc/slice.rs | 93 | ||||
-rw-r--r-- | rust/alloc/str.rs | 641 | ||||
-rw-r--r-- | rust/alloc/string.rs | 2944 | ||||
-rw-r--r-- | rust/alloc/vec/into_iter.rs | 1 | ||||
-rw-r--r-- | rust/alloc/vec/mod.rs | 286 | ||||
-rw-r--r-- | rust/alloc/vec/set_len_on_drop.rs | 30 | ||||
-rw-r--r-- | rust/alloc/vec/spec_extend.rs | 174 |
16 files changed, 6465 insertions, 16 deletions
diff --git a/rust/alloc/borrow.rs b/rust/alloc/borrow.rs index dde4957200d4..ca8e3dfa7004 100644 --- a/rust/alloc/borrow.rs +++ b/rust/alloc/borrow.rs @@ -13,7 +13,7 @@ use core::ops::{Add, AddAssign}; #[stable(feature = "rust1", since = "1.0.0")] pub use core::borrow::{Borrow, BorrowMut}; -use core::fmt; +use crate::fmt; #[cfg(not(no_global_oom_handling))] use crate::string::String; diff --git a/rust/alloc/boxed.rs b/rust/alloc/boxed.rs index dcfe87b14f3a..8fd296421dec 100644 --- a/rust/alloc/boxed.rs +++ b/rust/alloc/boxed.rs @@ -165,11 +165,9 @@ use crate::str::from_boxed_utf8_unchecked; #[cfg(not(no_global_oom_handling))] use crate::vec::Vec; -#[cfg(not(no_thin))] #[unstable(feature = "thin_box", issue = "92791")] pub use thin::ThinBox; -#[cfg(not(no_thin))] mod thin; /// A pointer type for heap allocation. diff --git a/rust/alloc/boxed/thin.rs b/rust/alloc/boxed/thin.rs new file mode 100644 index 000000000000..9135203114fc --- /dev/null +++ b/rust/alloc/boxed/thin.rs @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Based on +// https://github.com/matthieu-m/rfc2580/blob/b58d1d3cba0d4b5e859d3617ea2d0943aaa31329/examples/thin.rs +// by matthieu-m +use crate::alloc::{self, Layout, LayoutError}; +use core::fmt::{self, Debug, Display, Formatter}; +use core::marker::PhantomData; +#[cfg(not(no_global_oom_handling))] +use core::marker::Unsize; +use core::mem; +use core::ops::{Deref, DerefMut}; +use core::ptr::Pointee; +use core::ptr::{self, NonNull}; + +/// ThinBox. +/// +/// A thin pointer for heap allocation, regardless of T. +/// +/// # Examples +/// +/// ``` +/// #![feature(thin_box)] +/// use std::boxed::ThinBox; +/// +/// let five = ThinBox::new(5); +/// let thin_slice = ThinBox::<[i32]>::new_unsize([1, 2, 3, 4]); +/// +/// use std::mem::{size_of, size_of_val}; +/// let size_of_ptr = size_of::<*const ()>(); +/// assert_eq!(size_of_ptr, size_of_val(&five)); +/// assert_eq!(size_of_ptr, size_of_val(&thin_slice)); +/// ``` +#[unstable(feature = "thin_box", issue = "92791")] +pub struct ThinBox<T: ?Sized> { + ptr: WithHeader<<T as Pointee>::Metadata>, + _marker: PhantomData<T>, +} + +#[unstable(feature = "thin_box", issue = "92791")] +impl<T> ThinBox<T> { + /// Moves a type to the heap with its `Metadata` stored in the heap allocation instead of on + /// the stack. + /// + /// # Examples + /// + /// ``` + /// #![feature(thin_box)] + /// use std::boxed::ThinBox; + /// + /// let five = ThinBox::new(5); + /// ``` + #[cfg(not(no_global_oom_handling))] + pub fn new(value: T) -> Self { + let meta = ptr::metadata(&value); + let ptr = WithHeader::new(meta, value); + ThinBox { ptr, _marker: PhantomData } + } +} + +#[unstable(feature = "thin_box", issue = "92791")] +impl<Dyn: ?Sized> ThinBox<Dyn> { + /// Moves a type to the heap with its `Metadata` stored in the heap allocation instead of on + /// the stack. + /// + /// # Examples + /// + /// ``` + /// #![feature(thin_box)] + /// use std::boxed::ThinBox; + /// + /// let thin_slice = ThinBox::<[i32]>::new_unsize([1, 2, 3, 4]); + /// ``` + #[cfg(not(no_global_oom_handling))] + pub fn new_unsize<T>(value: T) -> Self + where + T: Unsize<Dyn>, + { + let meta = ptr::metadata(&value as &Dyn); + let ptr = WithHeader::new(meta, value); + ThinBox { ptr, _marker: PhantomData } + } +} + +#[unstable(feature = "thin_box", issue = "92791")] +impl<T: ?Sized + Debug> Debug for ThinBox<T> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Debug::fmt(self.deref(), f) + } +} + +#[unstable(feature = "thin_box", issue = "92791")] +impl<T: ?Sized + Display> Display for ThinBox<T> { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + Display::fmt(self.deref(), f) + } +} + +#[unstable(feature = "thin_box", issue = "92791")] +impl<T: ?Sized> Deref for ThinBox<T> { + type Target = T; + + fn deref(&self) -> &T { + let value = self.data(); + let metadata = self.meta(); + let pointer = ptr::from_raw_parts(value as *const (), metadata); + unsafe { &*pointer } + } +} + +#[unstable(feature = "thin_box", issue = "92791")] +impl<T: ?Sized> DerefMut for ThinBox<T> { + fn deref_mut(&mut self) -> &mut T { + let value = self.data(); + let metadata = self.meta(); + let pointer = ptr::from_raw_parts_mut::<T>(value as *mut (), metadata); + unsafe { &mut *pointer } + } +} + +#[unstable(feature = "thin_box", issue = "92791")] +impl<T: ?Sized> Drop for ThinBox<T> { + fn drop(&mut self) { + unsafe { + let value = self.deref_mut(); + let value = value as *mut T; + self.ptr.drop::<T>(value); + } + } +} + +#[unstable(feature = "thin_box", issue = "92791")] +impl<T: ?Sized> ThinBox<T> { + fn meta(&self) -> <T as Pointee>::Metadata { + // Safety: + // - NonNull and valid. + unsafe { *self.ptr.header() } + } + + fn data(&self) -> *mut u8 { + self.ptr.value() + } +} + +/// A pointer to type-erased data, guaranteed to have a header `H` before the pointed-to location. +struct WithHeader<H>(NonNull<u8>, PhantomData<H>); + +impl<H> WithHeader<H> { + #[cfg(not(no_global_oom_handling))] + fn new<T>(header: H, value: T) -> WithHeader<H> { + let value_layout = Layout::new::<T>(); + let Ok((layout, value_offset)) = Self::alloc_layout(value_layout) else { + // We pass an empty layout here because we do not know which layout caused the + // arithmetic overflow in `Layout::extend` and `handle_alloc_error` takes `Layout` as + // its argument rather than `Result<Layout, LayoutError>`, also this function has been + // stable since 1.28 ._. + // + // On the other hand, look at this gorgeous turbofish! + alloc::handle_alloc_error(Layout::new::<()>()); + }; + + unsafe { + let ptr = alloc::alloc(layout); + + if ptr.is_null() { + alloc::handle_alloc_error(layout); + } + // Safety: + // - The size is at least `aligned_header_size`. + let ptr = ptr.add(value_offset) as *mut _; + + let ptr = NonNull::new_unchecked(ptr); + + let result = WithHeader(ptr, PhantomData); + ptr::write(result.header(), header); + ptr::write(result.value().cast(), value); + + result + } + } + + // Safety: + // - Assumes that `value` can be dereferenced. + unsafe fn drop<T: ?Sized>(&self, value: *mut T) { + unsafe { + // SAFETY: Layout must have been computable if we're in drop + let (layout, value_offset) = + Self::alloc_layout(Layout::for_value_raw(value)).unwrap_unchecked(); + + ptr::drop_in_place::<T>(value); + // We only drop the value because the Pointee trait requires that the metadata is copy + // aka trivially droppable + alloc::dealloc(self.0.as_ptr().sub(value_offset), layout); + } + } + + fn header(&self) -> *mut H { + // Safety: + // - At least `size_of::<H>()` bytes are allocated ahead of the pointer. + // - We know that H will be aligned because the middle pointer is aligned to the greater + // of the alignment of the header and the data and the header size includes the padding + // needed to align the header. Subtracting the header size from the aligned data pointer + // will always result in an aligned header pointer, it just may not point to the + // beginning of the allocation. + unsafe { self.0.as_ptr().sub(Self::header_size()) as *mut H } + } + + fn value(&self) -> *mut u8 { + self.0.as_ptr() + } + + const fn header_size() -> usize { + mem::size_of::<H>() + } + + fn alloc_layout(value_layout: Layout) -> Result<(Layout, usize), LayoutError> { + Layout::new::<H>().extend(value_layout) + } +} diff --git a/rust/alloc/ffi/c_str.rs b/rust/alloc/ffi/c_str.rs new file mode 100644 index 000000000000..5e2f4073771a --- /dev/null +++ b/rust/alloc/ffi/c_str.rs @@ -0,0 +1,1203 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +#[cfg(test)] +mod tests; + +use crate::borrow::{Cow, ToOwned}; +use crate::boxed::Box; +use crate::rc::Rc; +use crate::slice::hack::into_vec; +use crate::string::String; +use crate::vec::Vec; +use core::borrow::Borrow; +use core::ffi::{c_char, CStr}; +use core::fmt; +use core::mem; +use core::num::NonZeroU8; +use core::ops; +use core::ptr; +use core::slice; +use core::slice::memchr; +use core::str::{self, Utf8Error}; + +#[cfg(target_has_atomic = "ptr")] +use crate::sync::Arc; + +/// A type representing an owned, C-compatible, nul-terminated string with no nul bytes in the +/// middle. +/// +/// This type serves the purpose of being able to safely generate a +/// C-compatible string from a Rust byte slice or vector. An instance of this +/// type is a static guarantee that the underlying bytes contain no interior 0 +/// bytes ("nul characters") and that the final byte is 0 ("nul terminator"). +/// +/// `CString` is to <code>&[CStr]</code> as [`String`] is to <code>&[str]</code>: the former +/// in each pair are owned strings; the latter are borrowed +/// references. +/// +/// # Creating a `CString` +/// +/// A `CString` is created from either a byte slice or a byte vector, +/// or anything that implements <code>[Into]<[Vec]<[u8]>></code> (for +/// example, you can build a `CString` straight out of a [`String`] or +/// a <code>&[str]</code>, since both implement that trait). +/// +/// The [`CString::new`] method will actually check that the provided <code>&[[u8]]</code> +/// does not have 0 bytes in the middle, and return an error if it +/// finds one. +/// +/// # Extracting a raw pointer to the whole C string +/// +/// `CString` implements an [`as_ptr`][`CStr::as_ptr`] method through the [`Deref`] +/// trait. This method will give you a `*const c_char` which you can +/// feed directly to extern functions that expect a nul-terminated +/// string, like C's `strdup()`. Notice that [`as_ptr`][`CStr::as_ptr`] returns a +/// read-only pointer; if the C code writes to it, that causes +/// undefined behavior. +/// +/// # Extracting a slice of the whole C string +/// +/// Alternatively, you can obtain a <code>&[[u8]]</code> slice from a +/// `CString` with the [`CString::as_bytes`] method. Slices produced in this +/// way do *not* contain the trailing nul terminator. This is useful +/// when you will be calling an extern function that takes a `*const +/// u8` argument which is not necessarily nul-terminated, plus another +/// argument with the length of the string — like C's `strndup()`. +/// You can of course get the slice's length with its +/// [`len`][slice::len] method. +/// +/// If you need a <code>&[[u8]]</code> slice *with* the nul terminator, you +/// can use [`CString::as_bytes_with_nul`] instead. +/// +/// Once you have the kind of slice you need (with or without a nul +/// terminator), you can call the slice's own +/// [`as_ptr`][slice::as_ptr] method to get a read-only raw pointer to pass to +/// extern functions. See the documentation for that function for a +/// discussion on ensuring the lifetime of the raw pointer. +/// +/// [str]: prim@str "str" +/// [`Deref`]: ops::Deref +/// +/// # Examples +/// +/// ```ignore (extern-declaration) +/// # fn main() { +/// use std::ffi::CString; +/// use std::os::raw::c_char; +/// +/// extern "C" { +/// fn my_printer(s: *const c_char); +/// } +/// +/// // We are certain that our string doesn't have 0 bytes in the middle, +/// // so we can .expect() +/// let c_to_print = CString::new("Hello, world!").expect("CString::new failed"); +/// unsafe { +/// my_printer(c_to_print.as_ptr()); +/// } +/// # } +/// ``` +/// +/// # Safety +/// +/// `CString` is intended for working with traditional C-style strings +/// (a sequence of non-nul bytes terminated by a single nul byte); the +/// primary use case for these kinds of strings is interoperating with C-like +/// code. Often you will need to transfer ownership to/from that external +/// code. It is strongly recommended that you thoroughly read through the +/// documentation of `CString` before use, as improper ownership management +/// of `CString` instances can lead to invalid memory accesses, memory leaks, +/// and other memory errors. +#[derive(PartialEq, PartialOrd, Eq, Ord, Hash, Clone)] +#[cfg_attr(not(test), rustc_diagnostic_item = "cstring_type")] +#[unstable(feature = "alloc_c_string", issue = "94079")] +pub struct CString { + // Invariant 1: the slice ends with a zero byte and has a length of at least one. + // Invariant 2: the slice contains only one zero byte. + // Improper usage of unsafe function can break Invariant 2, but not Invariant 1. + inner: Box<[u8]>, +} + +/// An error indicating that an interior nul byte was found. +/// +/// While Rust strings may contain nul bytes in the middle, C strings +/// can't, as that byte would effectively truncate the string. +/// +/// This error is created by the [`new`][`CString::new`] method on +/// [`CString`]. See its documentation for more. +/// +/// # Examples +/// +/// ``` +/// use std::ffi::{CString, NulError}; +/// +/// let _: NulError = CString::new(b"f\0oo".to_vec()).unwrap_err(); +/// ``` +#[derive(Clone, PartialEq, Eq, Debug)] +#[unstable(feature = "alloc_c_string", issue = "94079")] +pub struct NulError(usize, Vec<u8>); + +#[derive(Clone, PartialEq, Eq, Debug)] +enum FromBytesWithNulErrorKind { + InteriorNul(usize), + NotNulTerminated, +} + +/// An error indicating that a nul byte was not in the expected position. +/// +/// The vector used to create a [`CString`] must have one and only one nul byte, +/// positioned at the end. +/// +/// This error is created by the [`CString::from_vec_with_nul`] method. +/// See its documentation for more. +/// +/// # Examples +/// +/// ``` +/// use std::ffi::{CString, FromVecWithNulError}; +/// +/// let _: FromVecWithNulError = CString::from_vec_with_nul(b"f\0oo".to_vec()).unwrap_err(); +/// ``` +#[derive(Clone, PartialEq, Eq, Debug)] +#[unstable(feature = "alloc_c_string", issue = "94079")] +pub struct FromVecWithNulError { + error_kind: FromBytesWithNulErrorKind, + bytes: Vec<u8>, +} + +#[stable(feature = "cstring_from_vec_with_nul", since = "1.58.0")] +impl FromVecWithNulError { + /// Returns a slice of [`u8`]s bytes that were attempted to convert to a [`CString`]. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::ffi::CString; + /// + /// // Some invalid bytes in a vector + /// let bytes = b"f\0oo".to_vec(); + /// + /// let value = CString::from_vec_with_nul(bytes.clone()); + /// + /// assert_eq!(&bytes[..], value.unwrap_err().as_bytes()); + /// ``` + #[must_use] + #[stable(feature = "cstring_from_vec_with_nul", since = "1.58.0")] + pub fn as_bytes(&self) -> &[u8] { + &self.bytes[..] + } + + /// Returns the bytes that were attempted to convert to a [`CString`]. + /// + /// This method is carefully constructed to avoid allocation. It will + /// consume the error, moving out the bytes, so that a copy of the bytes + /// does not need to be made. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::ffi::CString; + /// + /// // Some invalid bytes in a vector + /// let bytes = b"f\0oo".to_vec(); + /// + /// let value = CString::from_vec_with_nul(bytes.clone()); + /// + /// assert_eq!(bytes, value.unwrap_err().into_bytes()); + /// ``` + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "cstring_from_vec_with_nul", since = "1.58.0")] + pub fn into_bytes(self) -> Vec<u8> { + self.bytes + } +} + +/// An error indicating invalid UTF-8 when converting a [`CString`] into a [`String`]. +/// +/// `CString` is just a wrapper over a buffer of bytes with a nul terminator; +/// [`CString::into_string`] performs UTF-8 validation on those bytes and may +/// return this error. +/// +/// This `struct` is created by [`CString::into_string()`]. See +/// its documentation for more. +#[derive(Clone, PartialEq, Eq, Debug)] +#[unstable(feature = "alloc_c_string", issue = "94079")] +pub struct IntoStringError { + inner: CString, + error: Utf8Error, +} + +impl CString { + /// Creates a new C-compatible string from a container of bytes. + /// + /// This function will consume the provided data and use the + /// underlying bytes to construct a new string, ensuring that + /// there is a trailing 0 byte. This trailing 0 byte will be + /// appended by this function; the provided data should *not* + /// contain any 0 bytes in it. + /// + /// # Examples + /// + /// ```ignore (extern-declaration) + /// use std::ffi::CString; + /// use std::os::raw::c_char; + /// + /// extern "C" { fn puts(s: *const c_char); } + /// + /// let to_print = CString::new("Hello!").expect("CString::new failed"); + /// unsafe { + /// puts(to_print.as_ptr()); + /// } + /// ``` + /// + /// # Errors + /// + /// This function will return an error if the supplied bytes contain an + /// internal 0 byte. The [`NulError`] returned will contain the bytes as well as + /// the position of the nul byte. + #[stable(feature = "rust1", since = "1.0.0")] + pub fn new<T: Into<Vec<u8>>>(t: T) -> Result<CString, NulError> { + trait SpecNewImpl { + fn spec_new_impl(self) -> Result<CString, NulError>; + } + + impl<T: Into<Vec<u8>>> SpecNewImpl for T { + default fn spec_new_impl(self) -> Result<CString, NulError> { + let bytes: Vec<u8> = self.into(); + match memchr::memchr(0, &bytes) { + Some(i) => Err(NulError(i, bytes)), + None => Ok(unsafe { CString::_from_vec_unchecked(bytes) }), + } + } + } + + // Specialization for avoiding reallocation + #[inline(always)] // Without that it is not inlined into specializations + fn spec_new_impl_bytes(bytes: &[u8]) -> Result<CString, NulError> { + // We cannot have such large slice that we would overflow here + // but using `checked_add` allows LLVM to assume that capacity never overflows + // and generate twice shorter code. + // `saturating_add` doesn't help for some reason. + let capacity = bytes.len().checked_add(1).unwrap(); + + // Allocate before validation to avoid duplication of allocation code. + // We still need to allocate and copy memory even if we get an error. + let mut buffer = Vec::with_capacity(capacity); + buffer.extend(bytes); + + // Check memory of self instead of new buffer. + // This allows better optimizations if lto enabled. + match memchr::memchr(0, bytes) { + Some(i) => Err(NulError(i, buffer)), + None => Ok(unsafe { CString::_from_vec_unchecked(buffer) }), + } + } + + impl SpecNewImpl for &'_ [u8] { + fn spec_new_impl(self) -> Result<CString, NulError> { + spec_new_impl_bytes(self) + } + } + + impl SpecNewImpl for &'_ str { + fn spec_new_impl(self) -> Result<CString, NulError> { + spec_new_impl_bytes(self.as_bytes()) + } + } + + impl SpecNewImpl for &'_ mut [u8] { + fn spec_new_impl(self) -> Result<CString, NulError> { + spec_new_impl_bytes(self) + } + } + + t.spec_new_impl() + } + + /// Creates a C-compatible string by consuming a byte vector, + /// without checking for interior 0 bytes. + /// + /// Trailing 0 byte will be appended by this function. + /// + /// This method is equivalent to [`CString::new`] except that no runtime + /// assertion is made that `v` contains no 0 bytes, and it requires an + /// actual byte vector, not anything that can be converted to one with Into. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let raw = b"foo".to_vec(); + /// unsafe { + /// let c_string = CString::from_vec_unchecked(raw); + /// } + /// ``` + #[must_use] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn from_vec_unchecked(v: Vec<u8>) -> Self { + debug_assert!(memchr::memchr(0, &v).is_none()); + unsafe { Self::_from_vec_unchecked(v) } + } + + unsafe fn _from_vec_unchecked(mut v: Vec<u8>) -> Self { + v.reserve_exact(1); + v.push(0); + Self { inner: v.into_boxed_slice() } + } + + /// Retakes ownership of a `CString` that was transferred to C via + /// [`CString::into_raw`]. + /// + /// Additionally, the length of the string will be recalculated from the pointer. + /// + /// # Safety + /// + /// This should only ever be called with a pointer that was earlier + /// obtained by calling [`CString::into_raw`]. Other usage (e.g., trying to take + /// ownership of a string that was allocated by foreign code) is likely to lead + /// to undefined behavior or allocator corruption. + /// + /// It should be noted that the length isn't just "recomputed," but that + /// the recomputed length must match the original length from the + /// [`CString::into_raw`] call. This means the [`CString::into_raw`]/`from_raw` + /// methods should not be used when passing the string to C functions that can + /// modify the string's length. + /// + /// > **Note:** If you need to borrow a string that was allocated by + /// > foreign code, use [`CStr`]. If you need to take ownership of + /// > a string that was allocated by foreign code, you will need to + /// > make your own provisions for freeing it appropriately, likely + /// > with the foreign code's API to do that. + /// + /// # Examples + /// + /// Creates a `CString`, pass ownership to an `extern` function (via raw pointer), then retake + /// ownership with `from_raw`: + /// + /// ```ignore (extern-declaration) + /// use std::ffi::CString; + /// use std::os::raw::c_char; + /// + /// extern "C" { + /// fn some_extern_function(s: *mut c_char); + /// } + /// + /// let c_string = CString::new("Hello!").expect("CString::new failed"); + /// let raw = c_string.into_raw(); + /// unsafe { + /// some_extern_function(raw); + /// let c_string = CString::from_raw(raw); + /// } + /// ``` + #[must_use = "call `drop(from_raw(ptr))` if you intend to drop the `CString`"] + #[stable(feature = "cstr_memory", since = "1.4.0")] + pub unsafe fn from_raw(ptr: *mut c_char) -> CString { + // SAFETY: This is called with a pointer that was obtained from a call + // to `CString::into_raw` and the length has not been modified. As such, + // we know there is a NUL byte (and only one) at the end and that the + // information about the size of the allocation is correct on Rust's + // side. + unsafe { + extern "C" { + /// Provided by libc or compiler_builtins. + fn strlen(s: *const c_char) -> usize; + } + let len = strlen(ptr) + 1; // Including the NUL byte + let slice = slice::from_raw_parts_mut(ptr, len as usize); + CString { inner: Box::from_raw(slice as *mut [c_char] as *mut [u8]) } + } + } + + /// Consumes the `CString` and transfers ownership of the string to a C caller. + /// + /// The pointer which this function returns must be returned to Rust and reconstituted using + /// [`CString::from_raw`] to be properly deallocated. Specifically, one + /// should *not* use the standard C `free()` function to deallocate + /// this string. + /// + /// Failure to call [`CString::from_raw`] will lead to a memory leak. + /// + /// The C side must **not** modify the length of the string (by writing a + /// `null` somewhere inside the string or removing the final one) before + /// it makes it back into Rust using [`CString::from_raw`]. See the safety section + /// in [`CString::from_raw`]. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let c_string = CString::new("foo").expect("CString::new failed"); + /// + /// let ptr = c_string.into_raw(); + /// + /// unsafe { + /// assert_eq!(b'f', *ptr as u8); + /// assert_eq!(b'o', *ptr.offset(1) as u8); + /// assert_eq!(b'o', *ptr.offset(2) as u8); + /// assert_eq!(b'\0', *ptr.offset(3) as u8); + /// + /// // retake pointer to free memory + /// let _ = CString::from_raw(ptr); + /// } + /// ``` + #[inline] + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "cstr_memory", since = "1.4.0")] + pub fn into_raw(self) -> *mut c_char { + Box::into_raw(self.into_inner()) as *mut c_char + } + + /// Converts the `CString` into a [`String`] if it contains valid UTF-8 data. + /// + /// On failure, ownership of the original `CString` is returned. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let valid_utf8 = vec![b'f', b'o', b'o']; + /// let cstring = CString::new(valid_utf8).expect("CString::new failed"); + /// assert_eq!(cstring.into_string().expect("into_string() call failed"), "foo"); + /// + /// let invalid_utf8 = vec![b'f', 0xff, b'o', b'o']; + /// let cstring = CString::new(invalid_utf8).expect("CString::new failed"); + /// let err = cstring.into_string().err().expect("into_string().err() failed"); + /// assert_eq!(err.utf8_error().valid_up_to(), 1); + /// ``` + #[stable(feature = "cstring_into", since = "1.7.0")] + pub fn into_string(self) -> Result<String, IntoStringError> { + String::from_utf8(self.into_bytes()).map_err(|e| IntoStringError { + error: e.utf8_error(), + inner: unsafe { Self::_from_vec_unchecked(e.into_bytes()) }, + }) + } + + /// Consumes the `CString` and returns the underlying byte buffer. + /// + /// The returned buffer does **not** contain the trailing nul + /// terminator, and it is guaranteed to not have any interior nul + /// bytes. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let c_string = CString::new("foo").expect("CString::new failed"); + /// let bytes = c_string.into_bytes(); + /// assert_eq!(bytes, vec![b'f', b'o', b'o']); + /// ``` + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "cstring_into", since = "1.7.0")] + pub fn into_bytes(self) -> Vec<u8> { + let mut vec = into_vec(self.into_inner()); + let _nul = vec.pop(); + debug_assert_eq!(_nul, Some(0u8)); + vec + } + + /// Equivalent to [`CString::into_bytes()`] except that the + /// returned vector includes the trailing nul terminator. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let c_string = CString::new("foo").expect("CString::new failed"); + /// let bytes = c_string.into_bytes_with_nul(); + /// assert_eq!(bytes, vec![b'f', b'o', b'o', b'\0']); + /// ``` + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "cstring_into", since = "1.7.0")] + pub fn into_bytes_with_nul(self) -> Vec<u8> { + into_vec(self.into_inner()) + } + + /// Returns the contents of this `CString` as a slice of bytes. + /// + /// The returned slice does **not** contain the trailing nul + /// terminator, and it is guaranteed to not have any interior nul + /// bytes. If you need the nul terminator, use + /// [`CString::as_bytes_with_nul`] instead. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let c_string = CString::new("foo").expect("CString::new failed"); + /// let bytes = c_string.as_bytes(); + /// assert_eq!(bytes, &[b'f', b'o', b'o']); + /// ``` + #[inline] + #[must_use] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn as_bytes(&self) -> &[u8] { + // SAFETY: CString has a length at least 1 + unsafe { self.inner.get_unchecked(..self.inner.len() - 1) } + } + + /// Equivalent to [`CString::as_bytes()`] except that the + /// returned slice includes the trailing nul terminator. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let c_string = CString::new("foo").expect("CString::new failed"); + /// let bytes = c_string.as_bytes_with_nul(); + /// assert_eq!(bytes, &[b'f', b'o', b'o', b'\0']); + /// ``` + #[inline] + #[must_use] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn as_bytes_with_nul(&self) -> &[u8] { + &self.inner + } + + /// Extracts a [`CStr`] slice containing the entire string. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::{CString, CStr}; + /// + /// let c_string = CString::new(b"foo".to_vec()).expect("CString::new failed"); + /// let cstr = c_string.as_c_str(); + /// assert_eq!(cstr, + /// CStr::from_bytes_with_nul(b"foo\0").expect("CStr::from_bytes_with_nul failed")); + /// ``` + #[inline] + #[must_use] + #[stable(feature = "as_c_str", since = "1.20.0")] + pub fn as_c_str(&self) -> &CStr { + &*self + } + + /// Converts this `CString` into a boxed [`CStr`]. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::{CString, CStr}; + /// + /// let c_string = CString::new(b"foo".to_vec()).expect("CString::new failed"); + /// let boxed = c_string.into_boxed_c_str(); + /// assert_eq!(&*boxed, + /// CStr::from_bytes_with_nul(b"foo\0").expect("CStr::from_bytes_with_nul failed")); + /// ``` + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "into_boxed_c_str", since = "1.20.0")] + pub fn into_boxed_c_str(self) -> Box<CStr> { + unsafe { Box::from_raw(Box::into_raw(self.into_inner()) as *mut CStr) } + } + + /// Bypass "move out of struct which implements [`Drop`] trait" restriction. + #[inline] + fn into_inner(self) -> Box<[u8]> { + // Rationale: `mem::forget(self)` invalidates the previous call to `ptr::read(&self.inner)` + // so we use `ManuallyDrop` to ensure `self` is not dropped. + // Then we can return the box directly without invalidating it. + // See https://github.com/rust-lang/rust/issues/62553. + let this = mem::ManuallyDrop::new(self); + unsafe { ptr::read(&this.inner) } + } + + /// Converts a <code>[Vec]<[u8]></code> to a [`CString`] without checking the + /// invariants on the given [`Vec`]. + /// + /// # Safety + /// + /// The given [`Vec`] **must** have one nul byte as its last element. + /// This means it cannot be empty nor have any other nul byte anywhere else. + /// + /// # Example + /// + /// ``` + /// use std::ffi::CString; + /// assert_eq!( + /// unsafe { CString::from_vec_with_nul_unchecked(b"abc\0".to_vec()) }, + /// unsafe { CString::from_vec_unchecked(b"abc".to_vec()) } + /// ); + /// ``` + #[must_use] + #[stable(feature = "cstring_from_vec_with_nul", since = "1.58.0")] + pub unsafe fn from_vec_with_nul_unchecked(v: Vec<u8>) -> Self { + debug_assert!(memchr::memchr(0, &v).unwrap() + 1 == v.len()); + unsafe { Self::_from_vec_with_nul_unchecked(v) } + } + + unsafe fn _from_vec_with_nul_unchecked(v: Vec<u8>) -> Self { + Self { inner: v.into_boxed_slice() } + } + + /// Attempts to converts a <code>[Vec]<[u8]></code> to a [`CString`]. + /// + /// Runtime checks are present to ensure there is only one nul byte in the + /// [`Vec`], its last element. + /// + /// # Errors + /// + /// If a nul byte is present and not the last element or no nul bytes + /// is present, an error will be returned. + /// + /// # Examples + /// + /// A successful conversion will produce the same result as [`CString::new`] + /// when called without the ending nul byte. + /// + /// ``` + /// use std::ffi::CString; + /// assert_eq!( + /// CString::from_vec_with_nul(b"abc\0".to_vec()) + /// .expect("CString::from_vec_with_nul failed"), + /// CString::new(b"abc".to_vec()).expect("CString::new failed") + /// ); + /// ``` + /// + /// An incorrectly formatted [`Vec`] will produce an error. + /// + /// ``` + /// use std::ffi::{CString, FromVecWithNulError}; + /// // Interior nul byte + /// let _: FromVecWithNulError = CString::from_vec_with_nul(b"a\0bc".to_vec()).unwrap_err(); + /// // No nul byte + /// let _: FromVecWithNulError = CString::from_vec_with_nul(b"abc".to_vec()).unwrap_err(); + /// ``` + #[stable(feature = "cstring_from_vec_with_nul", since = "1.58.0")] + pub fn from_vec_with_nul(v: Vec<u8>) -> Result<Self, FromVecWithNulError> { + let nul_pos = memchr::memchr(0, &v); + match nul_pos { + Some(nul_pos) if nul_pos + 1 == v.len() => { + // SAFETY: We know there is only one nul byte, at the end + // of the vec. + Ok(unsafe { Self::_from_vec_with_nul_unchecked(v) }) + } + Some(nul_pos) => Err(FromVecWithNulError { + error_kind: FromBytesWithNulErrorKind::InteriorNul(nul_pos), + bytes: v, + }), + None => Err(FromVecWithNulError { + error_kind: FromBytesWithNulErrorKind::NotNulTerminated, + bytes: v, + }), + } + } +} + +// Turns this `CString` into an empty string to prevent +// memory-unsafe code from working by accident. Inline +// to prevent LLVM from optimizing it away in debug builds. +#[stable(feature = "cstring_drop", since = "1.13.0")] +impl Drop for CString { + #[inline] + fn drop(&mut self) { + unsafe { + *self.inner.get_unchecked_mut(0) = 0; + } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Deref for CString { + type Target = CStr; + + #[inline] + fn deref(&self) -> &CStr { + unsafe { CStr::from_bytes_with_nul_unchecked(self.as_bytes_with_nul()) } + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for CString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +#[stable(feature = "cstring_into", since = "1.7.0")] +impl From<CString> for Vec<u8> { + /// Converts a [`CString`] into a <code>[Vec]<[u8]></code>. + /// + /// The conversion consumes the [`CString`], and removes the terminating NUL byte. + #[inline] + fn from(s: CString) -> Vec<u8> { + s.into_bytes() + } +} + +#[stable(feature = "cstr_default", since = "1.10.0")] +impl Default for CString { + /// Creates an empty `CString`. + fn default() -> CString { + let a: &CStr = Default::default(); + a.to_owned() + } +} + +#[stable(feature = "cstr_borrow", since = "1.3.0")] +impl Borrow<CStr> for CString { + #[inline] + fn borrow(&self) -> &CStr { + self + } +} + +#[stable(feature = "cstring_from_cow_cstr", since = "1.28.0")] +impl<'a> From<Cow<'a, CStr>> for CString { + /// Converts a `Cow<'a, CStr>` into a `CString`, by copying the contents if they are + /// borrowed. + #[inline] + fn from(s: Cow<'a, CStr>) -> Self { + s.into_owned() + } +} + +#[cfg(not(test))] +#[stable(feature = "box_from_c_str", since = "1.17.0")] +impl From<&CStr> for Box<CStr> { + /// Converts a `&CStr` into a `Box<CStr>`, + /// by copying the contents into a newly allocated [`Box`]. + fn from(s: &CStr) -> Box<CStr> { + let boxed: Box<[u8]> = Box::from(s.to_bytes_with_nul()); + unsafe { Box::from_raw(Box::into_raw(boxed) as *mut CStr) } + } +} + +#[stable(feature = "box_from_cow", since = "1.45.0")] +impl From<Cow<'_, CStr>> for Box<CStr> { + /// Converts a `Cow<'a, CStr>` into a `Box<CStr>`, + /// by copying the contents if they are borrowed. + #[inline] + fn from(cow: Cow<'_, CStr>) -> Box<CStr> { + match cow { + Cow::Borrowed(s) => Box::from(s), + Cow::Owned(s) => Box::from(s), + } + } +} + +#[stable(feature = "c_string_from_box", since = "1.18.0")] +impl From<Box<CStr>> for CString { + /// Converts a <code>[Box]<[CStr]></code> into a [`CString`] without copying or allocating. + #[inline] + fn from(s: Box<CStr>) -> CString { + let raw = Box::into_raw(s) as *mut [u8]; + CString { inner: unsafe { Box::from_raw(raw) } } + } +} + +#[stable(feature = "cstring_from_vec_of_nonzerou8", since = "1.43.0")] +impl From<Vec<NonZeroU8>> for CString { + /// Converts a <code>[Vec]<[NonZeroU8]></code> into a [`CString`] without + /// copying nor checking for inner null bytes. + #[inline] + fn from(v: Vec<NonZeroU8>) -> CString { + unsafe { + // Transmute `Vec<NonZeroU8>` to `Vec<u8>`. + let v: Vec<u8> = { + // SAFETY: + // - transmuting between `NonZeroU8` and `u8` is sound; + // - `alloc::Layout<NonZeroU8> == alloc::Layout<u8>`. + let (ptr, len, cap): (*mut NonZeroU8, _, _) = Vec::into_raw_parts(v); + Vec::from_raw_parts(ptr.cast::<u8>(), len, cap) + }; + // SAFETY: `v` cannot contain null bytes, given the type-level + // invariant of `NonZeroU8`. + Self::_from_vec_unchecked(v) + } + } +} + +#[cfg(not(test))] +#[stable(feature = "more_box_slice_clone", since = "1.29.0")] +impl Clone for Box<CStr> { + #[inline] + fn clone(&self) -> Self { + (**self).into() + } +} + +#[stable(feature = "box_from_c_string", since = "1.20.0")] +impl From<CString> for Box<CStr> { + /// Converts a [`CString`] into a <code>[Box]<[CStr]></code> without copying or allocating. + #[inline] + fn from(s: CString) -> Box<CStr> { + s.into_boxed_c_str() + } +} + +#[stable(feature = "cow_from_cstr", since = "1.28.0")] +impl<'a> From<CString> for Cow<'a, CStr> { + /// Converts a [`CString`] into an owned [`Cow`] without copying or allocating. + #[inline] + fn from(s: CString) -> Cow<'a, CStr> { + Cow::Owned(s) + } +} + +#[stable(feature = "cow_from_cstr", since = "1.28.0")] +impl<'a> From<&'a CStr> for Cow<'a, CStr> { + /// Converts a [`CStr`] into a borrowed [`Cow`] without copying or allocating. + #[inline] + fn from(s: &'a CStr) -> Cow<'a, CStr> { + Cow::Borrowed(s) + } +} + +#[stable(feature = "cow_from_cstr", since = "1.28.0")] +impl<'a> From<&'a CString> for Cow<'a, CStr> { + /// Converts a `&`[`CString`] into a borrowed [`Cow`] without copying or allocating. + #[inline] + fn from(s: &'a CString) -> Cow<'a, CStr> { + Cow::Borrowed(s.as_c_str()) + } +} + +#[cfg(target_has_atomic = "ptr")] +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From<CString> for Arc<CStr> { + /// Converts a [`CString`] into an <code>[Arc]<[CStr]></code> by moving the [`CString`] + /// data into a new [`Arc`] buffer. + #[inline] + fn from(s: CString) -> Arc<CStr> { + let arc: Arc<[u8]> = Arc::from(s.into_inner()); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const CStr) } + } +} + +#[cfg(target_has_atomic = "ptr")] +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From<&CStr> for Arc<CStr> { + /// Converts a `&CStr` into a `Arc<CStr>`, + /// by copying the contents into a newly allocated [`Arc`]. + #[inline] + fn from(s: &CStr) -> Arc<CStr> { + let arc: Arc<[u8]> = Arc::from(s.to_bytes_with_nul()); + unsafe { Arc::from_raw(Arc::into_raw(arc) as *const CStr) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From<CString> for Rc<CStr> { + /// Converts a [`CString`] into an <code>[Rc]<[CStr]></code> by moving the [`CString`] + /// data into a new [`Arc`] buffer. + #[inline] + fn from(s: CString) -> Rc<CStr> { + let rc: Rc<[u8]> = Rc::from(s.into_inner()); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const CStr) } + } +} + +#[stable(feature = "shared_from_slice2", since = "1.24.0")] +impl From<&CStr> for Rc<CStr> { + /// Converts a `&CStr` into a `Rc<CStr>`, + /// by copying the contents into a newly allocated [`Rc`]. + #[inline] + fn from(s: &CStr) -> Rc<CStr> { + let rc: Rc<[u8]> = Rc::from(s.to_bytes_with_nul()); + unsafe { Rc::from_raw(Rc::into_raw(rc) as *const CStr) } + } +} + +#[cfg(not(test))] +#[stable(feature = "default_box_extra", since = "1.17.0")] +impl Default for Box<CStr> { + fn default() -> Box<CStr> { + let boxed: Box<[u8]> = Box::from([0]); + unsafe { Box::from_raw(Box::into_raw(boxed) as *mut CStr) } + } +} + +impl NulError { + /// Returns the position of the nul byte in the slice that caused + /// [`CString::new`] to fail. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let nul_error = CString::new("foo\0bar").unwrap_err(); + /// assert_eq!(nul_error.nul_position(), 3); + /// + /// let nul_error = CString::new("foo bar\0").unwrap_err(); + /// assert_eq!(nul_error.nul_position(), 7); + /// ``` + #[must_use] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn nul_position(&self) -> usize { + self.0 + } + + /// Consumes this error, returning the underlying vector of bytes which + /// generated the error in the first place. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let nul_error = CString::new("foo\0bar").unwrap_err(); + /// assert_eq!(nul_error.into_vec(), b"foo\0bar"); + /// ``` + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_vec(self) -> Vec<u8> { + self.1 + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for NulError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "nul byte found in provided data at position: {}", self.0) + } +} + +#[stable(feature = "cstring_from_vec_with_nul", since = "1.58.0")] +impl fmt::Display for FromVecWithNulError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.error_kind { + FromBytesWithNulErrorKind::InteriorNul(pos) => { + write!(f, "data provided contains an interior nul byte at pos {pos}") + } + FromBytesWithNulErrorKind::NotNulTerminated => { + write!(f, "data provided is not nul terminated") + } + } + } +} + +impl IntoStringError { + /// Consumes this error, returning original [`CString`] which generated the + /// error. + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "cstring_into", since = "1.7.0")] + pub fn into_cstring(self) -> CString { + self.inner + } + + /// Access the underlying UTF-8 error that was the cause of this error. + #[must_use] + #[stable(feature = "cstring_into", since = "1.7.0")] + pub fn utf8_error(&self) -> Utf8Error { + self.error + } + + #[doc(hidden)] + #[unstable(feature = "cstr_internals", issue = "none")] + pub fn __source(&self) -> &Utf8Error { + &self.error + } +} + +impl IntoStringError { + fn description(&self) -> &str { + "C string contained non-utf8 bytes" + } +} + +#[stable(feature = "cstring_into", since = "1.7.0")] +impl fmt::Display for IntoStringError { + #[allow(deprecated, deprecated_in_future)] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.description().fmt(f) + } +} + +#[stable(feature = "cstr_borrow", since = "1.3.0")] +impl ToOwned for CStr { + type Owned = CString; + + fn to_owned(&self) -> CString { + CString { inner: self.to_bytes_with_nul().into() } + } + + fn clone_into(&self, target: &mut CString) { + let mut b = into_vec(mem::take(&mut target.inner)); + self.to_bytes_with_nul().clone_into(&mut b); + target.inner = b.into_boxed_slice(); + } +} + +#[stable(feature = "cstring_asref", since = "1.7.0")] +impl From<&CStr> for CString { + fn from(s: &CStr) -> CString { + s.to_owned() + } +} + +#[stable(feature = "cstring_asref", since = "1.7.0")] +impl ops::Index<ops::RangeFull> for CString { + type Output = CStr; + + #[inline] + fn index(&self, _index: ops::RangeFull) -> &CStr { + self + } +} + +#[stable(feature = "cstring_asref", since = "1.7.0")] +impl AsRef<CStr> for CString { + #[inline] + fn as_ref(&self) -> &CStr { + self + } +} + +#[cfg(bootstrap)] +#[doc(hidden)] +#[unstable(feature = "cstr_internals", issue = "none")] +pub trait CStrExt { + /// Converts a `CStr` into a <code>[Cow]<[str]></code>. + /// + /// If the contents of the `CStr` are valid UTF-8 data, this + /// function will return a <code>[Cow]::[Borrowed]\(&[str])</code> + /// with the corresponding <code>&[str]</code> slice. Otherwise, it will + /// replace any invalid UTF-8 sequences with + /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD] and return a + /// <code>[Cow]::[Owned]\(&[str])</code> with the result. + /// + /// [str]: prim@str "str" + /// [Borrowed]: Cow::Borrowed + /// [Owned]: Cow::Owned + /// [U+FFFD]: crate::char::REPLACEMENT_CHARACTER "std::char::REPLACEMENT_CHARACTER" + /// + /// # Examples + /// + /// Calling `to_string_lossy` on a `CStr` containing valid UTF-8: + /// + /// ``` + /// use std::borrow::Cow; + /// use std::ffi::CStr; + /// + /// let cstr = CStr::from_bytes_with_nul(b"Hello World\0") + /// .expect("CStr::from_bytes_with_nul failed"); + /// assert_eq!(cstr.to_string_lossy(), Cow::Borrowed("Hello World")); + /// ``` + /// + /// Calling `to_string_lossy` on a `CStr` containing invalid UTF-8: + /// + /// ``` + /// use std::borrow::Cow; + /// use std::ffi::CStr; + /// + /// let cstr = CStr::from_bytes_with_nul(b"Hello \xF0\x90\x80World\0") + /// .expect("CStr::from_bytes_with_nul failed"); + /// assert_eq!( + /// cstr.to_string_lossy(), + /// Cow::Owned(String::from("Hello �World")) as Cow<'_, str> + /// ); + /// ``` + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[stable(feature = "cstr_to_str", since = "1.4.0")] + fn to_string_lossy(&self) -> Cow<'_, str>; + + /// Converts a <code>[Box]<[CStr]></code> into a [`CString`] without copying or allocating. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let c_string = CString::new(b"foo".to_vec()).expect("CString::new failed"); + /// let boxed = c_string.into_boxed_c_str(); + /// assert_eq!(boxed.into_c_string(), CString::new("foo").expect("CString::new failed")); + /// ``` + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "into_boxed_c_str", since = "1.20.0")] + fn into_c_string(self: Box<Self>) -> CString; +} + +#[cfg(bootstrap)] +#[unstable(feature = "cstr_internals", issue = "none")] +impl CStrExt for CStr { + fn to_string_lossy(&self) -> Cow<'_, str> { + String::from_utf8_lossy(self.to_bytes()) + } + + fn into_c_string(self: Box<Self>) -> CString { + CString::from(self) + } +} + +#[cfg(not(bootstrap))] +#[cfg(not(test))] +impl CStr { + /// Converts a `CStr` into a <code>[Cow]<[str]></code>. + /// + /// If the contents of the `CStr` are valid UTF-8 data, this + /// function will return a <code>[Cow]::[Borrowed]\(&[str])</code> + /// with the corresponding <code>&[str]</code> slice. Otherwise, it will + /// replace any invalid UTF-8 sequences with + /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD] and return a + /// <code>[Cow]::[Owned]\(&[str])</code> with the result. + /// + /// [str]: prim@str "str" + /// [Borrowed]: Cow::Borrowed + /// [Owned]: Cow::Owned + /// [U+FFFD]: core::char::REPLACEMENT_CHARACTER "std::char::REPLACEMENT_CHARACTER" + /// + /// # Examples + /// + /// Calling `to_string_lossy` on a `CStr` containing valid UTF-8: + /// + /// ``` + /// use std::borrow::Cow; + /// use std::ffi::CStr; + /// + /// let cstr = CStr::from_bytes_with_nul(b"Hello World\0") + /// .expect("CStr::from_bytes_with_nul failed"); + /// assert_eq!(cstr.to_string_lossy(), Cow::Borrowed("Hello World")); + /// ``` + /// + /// Calling `to_string_lossy` on a `CStr` containing invalid UTF-8: + /// + /// ``` + /// use std::borrow::Cow; + /// use std::ffi::CStr; + /// + /// let cstr = CStr::from_bytes_with_nul(b"Hello \xF0\x90\x80World\0") + /// .expect("CStr::from_bytes_with_nul failed"); + /// assert_eq!( + /// cstr.to_string_lossy(), + /// Cow::Owned(String::from("Hello �World")) as Cow<'_, str> + /// ); + /// ``` + #[rustc_allow_incoherent_impl] + #[must_use = "this returns the result of the operation, \ + without modifying the original"] + #[stable(feature = "cstr_to_str", since = "1.4.0")] + pub fn to_string_lossy(&self) -> Cow<'_, str> { + String::from_utf8_lossy(self.to_bytes()) + } + + /// Converts a <code>[Box]<[CStr]></code> into a [`CString`] without copying or allocating. + /// + /// # Examples + /// + /// ``` + /// use std::ffi::CString; + /// + /// let c_string = CString::new(b"foo".to_vec()).expect("CString::new failed"); + /// let boxed = c_string.into_boxed_c_str(); + /// assert_eq!(boxed.into_c_string(), CString::new("foo").expect("CString::new failed")); + /// ``` + #[rustc_allow_incoherent_impl] + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "into_boxed_c_str", since = "1.20.0")] + pub fn into_c_string(self: Box<Self>) -> CString { + CString::from(self) + } +} diff --git a/rust/alloc/ffi/mod.rs b/rust/alloc/ffi/mod.rs new file mode 100644 index 000000000000..56d429785339 --- /dev/null +++ b/rust/alloc/ffi/mod.rs @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Utilities related to FFI bindings. +//! +//! This module provides utilities to handle data across non-Rust +//! interfaces, like other programming languages and the underlying +//! operating system. It is mainly of use for FFI (Foreign Function +//! Interface) bindings and code that needs to exchange C-like strings +//! with other languages. +//! +//! # Overview +//! +//! Rust represents owned strings with the [`String`] type, and +//! borrowed slices of strings with the [`str`] primitive. Both are +//! always in UTF-8 encoding, and may contain nul bytes in the middle, +//! i.e., if you look at the bytes that make up the string, there may +//! be a `\0` among them. Both `String` and `str` store their length +//! explicitly; there are no nul terminators at the end of strings +//! like in C. +//! +//! C strings are different from Rust strings: +//! +//! * **Encodings** - Rust strings are UTF-8, but C strings may use +//! other encodings. If you are using a string from C, you should +//! check its encoding explicitly, rather than just assuming that it +//! is UTF-8 like you can do in Rust. +//! +//! * **Character size** - C strings may use `char` or `wchar_t`-sized +//! characters; please **note** that C's `char` is different from Rust's. +//! The C standard leaves the actual sizes of those types open to +//! interpretation, but defines different APIs for strings made up of +//! each character type. Rust strings are always UTF-8, so different +//! Unicode characters will be encoded in a variable number of bytes +//! each. The Rust type [`char`] represents a '[Unicode scalar +//! value]', which is similar to, but not the same as, a '[Unicode +//! code point]'. +//! +//! * **Nul terminators and implicit string lengths** - Often, C +//! strings are nul-terminated, i.e., they have a `\0` character at the +//! end. The length of a string buffer is not stored, but has to be +//! calculated; to compute the length of a string, C code must +//! manually call a function like `strlen()` for `char`-based strings, +//! or `wcslen()` for `wchar_t`-based ones. Those functions return +//! the number of characters in the string excluding the nul +//! terminator, so the buffer length is really `len+1` characters. +//! Rust strings don't have a nul terminator; their length is always +//! stored and does not need to be calculated. While in Rust +//! accessing a string's length is an *O*(1) operation (because the +//! length is stored); in C it is an *O*(*n*) operation because the +//! length needs to be computed by scanning the string for the nul +//! terminator. +//! +//! * **Internal nul characters** - When C strings have a nul +//! terminator character, this usually means that they cannot have nul +//! characters in the middle — a nul character would essentially +//! truncate the string. Rust strings *can* have nul characters in +//! the middle, because nul does not have to mark the end of the +//! string in Rust. +//! +//! # Representations of non-Rust strings +//! +//! [`CString`] and [`CStr`] are useful when you need to transfer +//! UTF-8 strings to and from languages with a C ABI, like Python. +//! +//! * **From Rust to C:** [`CString`] represents an owned, C-friendly +//! string: it is nul-terminated, and has no internal nul characters. +//! Rust code can create a [`CString`] out of a normal string (provided +//! that the string doesn't have nul characters in the middle), and +//! then use a variety of methods to obtain a raw <code>\*mut [u8]</code> that can +//! then be passed as an argument to functions which use the C +//! conventions for strings. +//! +//! * **From C to Rust:** [`CStr`] represents a borrowed C string; it +//! is what you would use to wrap a raw <code>\*const [u8]</code> that you got from +//! a C function. A [`CStr`] is guaranteed to be a nul-terminated array +//! of bytes. Once you have a [`CStr`], you can convert it to a Rust +//! <code>&[str]</code> if it's valid UTF-8, or lossily convert it by adding +//! replacement characters. +//! +//! [`String`]: crate::string::String +//! [`CStr`]: core::ffi::CStr + +#![unstable(feature = "alloc_ffi", issue = "94079")] + +#[cfg(bootstrap)] +#[unstable(feature = "cstr_internals", issue = "none")] +pub use self::c_str::CStrExt; +#[unstable(feature = "alloc_c_string", issue = "94079")] +pub use self::c_str::FromVecWithNulError; +#[unstable(feature = "alloc_c_string", issue = "94079")] +pub use self::c_str::{CString, IntoStringError, NulError}; + +mod c_str; diff --git a/rust/alloc/fmt.rs b/rust/alloc/fmt.rs new file mode 100644 index 000000000000..b9c4d2926d23 --- /dev/null +++ b/rust/alloc/fmt.rs @@ -0,0 +1,614 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Utilities for formatting and printing `String`s. +//! +//! This module contains the runtime support for the [`format!`] syntax extension. +//! This macro is implemented in the compiler to emit calls to this module in +//! order to format arguments at runtime into strings. +//! +//! # Usage +//! +//! The [`format!`] macro is intended to be familiar to those coming from C's +//! `printf`/`fprintf` functions or Python's `str.format` function. +//! +//! Some examples of the [`format!`] extension are: +//! +//! ``` +//! format!("Hello"); // => "Hello" +//! format!("Hello, {}!", "world"); // => "Hello, world!" +//! format!("The number is {}", 1); // => "The number is 1" +//! format!("{:?}", (3, 4)); // => "(3, 4)" +//! format!("{value}", value=4); // => "4" +//! let people = "Rustaceans"; +//! format!("Hello {people}!"); // => "Hello Rustaceans!" +//! format!("{} {}", 1, 2); // => "1 2" +//! format!("{:04}", 42); // => "0042" with leading zeros +//! format!("{:#?}", (100, 200)); // => "( +//! // 100, +//! // 200, +//! // )" +//! ``` +//! +//! From these, you can see that the first argument is a format string. It is +//! required by the compiler for this to be a string literal; it cannot be a +//! variable passed in (in order to perform validity checking). The compiler +//! will then parse the format string and determine if the list of arguments +//! provided is suitable to pass to this format string. +//! +//! To convert a single value to a string, use the [`to_string`] method. This +//! will use the [`Display`] formatting trait. +//! +//! ## Positional parameters +//! +//! Each formatting argument is allowed to specify which value argument it's +//! referencing, and if omitted it is assumed to be "the next argument". For +//! example, the format string `{} {} {}` would take three parameters, and they +//! would be formatted in the same order as they're given. The format string +//! `{2} {1} {0}`, however, would format arguments in reverse order. +//! +//! Things can get a little tricky once you start intermingling the two types of +//! positional specifiers. The "next argument" specifier can be thought of as an +//! iterator over the argument. Each time a "next argument" specifier is seen, +//! the iterator advances. This leads to behavior like this: +//! +//! ``` +//! format!("{1} {} {0} {}", 1, 2); // => "2 1 1 2" +//! ``` +//! +//! The internal iterator over the argument has not been advanced by the time +//! the first `{}` is seen, so it prints the first argument. Then upon reaching +//! the second `{}`, the iterator has advanced forward to the second argument. +//! Essentially, parameters that explicitly name their argument do not affect +//! parameters that do not name an argument in terms of positional specifiers. +//! +//! A format string is required to use all of its arguments, otherwise it is a +//! compile-time error. You may refer to the same argument more than once in the +//! format string. +//! +//! ## Named parameters +//! +//! Rust itself does not have a Python-like equivalent of named parameters to a +//! function, but the [`format!`] macro is a syntax extension that allows it to +//! leverage named parameters. Named parameters are listed at the end of the +//! argument list and have the syntax: +//! +//! ```text +//! identifier '=' expression +//! ``` +//! +//! For example, the following [`format!`] expressions all use named arguments: +//! +//! ``` +//! format!("{argument}", argument = "test"); // => "test" +//! format!("{name} {}", 1, name = 2); // => "2 1" +//! format!("{a} {c} {b}", a="a", b='b', c=3); // => "a 3 b" +//! ``` +//! +//! If a named parameter does not appear in the argument list, `format!` will +//! reference a variable with that name in the current scope. +//! +//! ``` +//! let argument = 2 + 2; +//! format!("{argument}"); // => "4" +//! +//! fn make_string(a: u32, b: &str) -> String { +//! format!("{b} {a}") +//! } +//! make_string(927, "label"); // => "label 927" +//! ``` +//! +//! It is not valid to put positional parameters (those without names) after +//! arguments that have names. Like with positional parameters, it is not +//! valid to provide named parameters that are unused by the format string. +//! +//! # Formatting Parameters +//! +//! Each argument being formatted can be transformed by a number of formatting +//! parameters (corresponding to `format_spec` in [the syntax](#syntax)). These +//! parameters affect the string representation of what's being formatted. +//! +//! ## Width +//! +//! ``` +//! // All of these print "Hello x !" +//! println!("Hello {:5}!", "x"); +//! println!("Hello {:1$}!", "x", 5); +//! println!("Hello {1:0$}!", 5, "x"); +//! println!("Hello {:width$}!", "x", width = 5); +//! let width = 5; +//! println!("Hello {:width$}!", "x"); +//! ``` +//! +//! This is a parameter for the "minimum width" that the format should take up. +//! If the value's string does not fill up this many characters, then the +//! padding specified by fill/alignment will be used to take up the required +//! space (see below). +//! +//! The value for the width can also be provided as a [`usize`] in the list of +//! parameters by adding a postfix `$`, indicating that the second argument is +//! a [`usize`] specifying the width. +//! +//! Referring to an argument with the dollar syntax does not affect the "next +//! argument" counter, so it's usually a good idea to refer to arguments by +//! position, or use named arguments. +//! +//! ## Fill/Alignment +//! +//! ``` +//! assert_eq!(format!("Hello {:<5}!", "x"), "Hello x !"); +//! assert_eq!(format!("Hello {:-<5}!", "x"), "Hello x----!"); +//! assert_eq!(format!("Hello {:^5}!", "x"), "Hello x !"); +//! assert_eq!(format!("Hello {:>5}!", "x"), "Hello x!"); +//! ``` +//! +//! The optional fill character and alignment is provided normally in conjunction with the +//! [`width`](#width) parameter. It must be defined before `width`, right after the `:`. +//! This indicates that if the value being formatted is smaller than +//! `width` some extra characters will be printed around it. +//! Filling comes in the following variants for different alignments: +//! +//! * `[fill]<` - the argument is left-aligned in `width` columns +//! * `[fill]^` - the argument is center-aligned in `width` columns +//! * `[fill]>` - the argument is right-aligned in `width` columns +//! +//! The default [fill/alignment](#fillalignment) for non-numerics is a space and +//! left-aligned. The +//! default for numeric formatters is also a space character but with right-alignment. If +//! the `0` flag (see below) is specified for numerics, then the implicit fill character is +//! `0`. +//! +//! Note that alignment might not be implemented by some types. In particular, it +//! is not generally implemented for the `Debug` trait. A good way to ensure +//! padding is applied is to format your input, then pad this resulting string +//! to obtain your output: +//! +//! ``` +//! println!("Hello {:^15}!", format!("{:?}", Some("hi"))); // => "Hello Some("hi") !" +//! ``` +//! +//! ## Sign/`#`/`0` +//! +//! ``` +//! assert_eq!(format!("Hello {:+}!", 5), "Hello +5!"); +//! assert_eq!(format!("{:#x}!", 27), "0x1b!"); +//! assert_eq!(format!("Hello {:05}!", 5), "Hello 00005!"); +//! assert_eq!(format!("Hello {:05}!", -5), "Hello -0005!"); +//! assert_eq!(format!("{:#010x}!", 27), "0x0000001b!"); +//! ``` +//! +//! These are all flags altering the behavior of the formatter. +//! +//! * `+` - This is intended for numeric types and indicates that the sign +//! should always be printed. Positive signs are never printed by +//! default, and the negative sign is only printed by default for signed values. +//! This flag indicates that the correct sign (`+` or `-`) should always be printed. +//! * `-` - Currently not used +//! * `#` - This flag indicates that the "alternate" form of printing should +//! be used. The alternate forms are: +//! * `#?` - pretty-print the [`Debug`] formatting (adds linebreaks and indentation) +//! * `#x` - precedes the argument with a `0x` +//! * `#X` - precedes the argument with a `0x` +//! * `#b` - precedes the argument with a `0b` +//! * `#o` - precedes the argument with a `0o` +//! * `0` - This is used to indicate for integer formats that the padding to `width` should +//! both be done with a `0` character as well as be sign-aware. A format +//! like `{:08}` would yield `00000001` for the integer `1`, while the +//! same format would yield `-0000001` for the integer `-1`. Notice that +//! the negative version has one fewer zero than the positive version. +//! Note that padding zeros are always placed after the sign (if any) +//! and before the digits. When used together with the `#` flag, a similar +//! rule applies: padding zeros are inserted after the prefix but before +//! the digits. The prefix is included in the total width. +//! +//! ## Precision +//! +//! For non-numeric types, this can be considered a "maximum width". If the resulting string is +//! longer than this width, then it is truncated down to this many characters and that truncated +//! value is emitted with proper `fill`, `alignment` and `width` if those parameters are set. +//! +//! For integral types, this is ignored. +//! +//! For floating-point types, this indicates how many digits after the decimal point should be +//! printed. +//! +//! There are three possible ways to specify the desired `precision`: +//! +//! 1. An integer `.N`: +//! +//! the integer `N` itself is the precision. +//! +//! 2. An integer or name followed by dollar sign `.N$`: +//! +//! use format *argument* `N` (which must be a `usize`) as the precision. +//! +//! 3. An asterisk `.*`: +//! +//! `.*` means that this `{...}` is associated with *two* format inputs rather than one: +//! - If a format string in the fashion of `{:<spec>.*}` is used, then the first input holds +//! the `usize` precision, and the second holds the value to print. +//! - If a format string in the fashion of `{<arg>:<spec>.*}` is used, then the `<arg>` part +//! refers to the value to print, and the `precision` is taken like it was specified with an +//! omitted positional parameter (`{}` instead of `{<arg>:}`). +//! +//! For example, the following calls all print the same thing `Hello x is 0.01000`: +//! +//! ``` +//! // Hello {arg 0 ("x")} is {arg 1 (0.01) with precision specified inline (5)} +//! println!("Hello {0} is {1:.5}", "x", 0.01); +//! +//! // Hello {arg 1 ("x")} is {arg 2 (0.01) with precision specified in arg 0 (5)} +//! println!("Hello {1} is {2:.0$}", 5, "x", 0.01); +//! +//! // Hello {arg 0 ("x")} is {arg 2 (0.01) with precision specified in arg 1 (5)} +//! println!("Hello {0} is {2:.1$}", "x", 5, 0.01); +//! +//! // Hello {next arg -> arg 0 ("x")} is {second of next two args -> arg 2 (0.01) with precision +//! // specified in first of next two args -> arg 1 (5)} +//! println!("Hello {} is {:.*}", "x", 5, 0.01); +//! +//! // Hello {arg 1 ("x")} is {arg 2 (0.01) with precision +//! // specified in next arg -> arg 0 (5)} +//! println!("Hello {1} is {2:.*}", 5, "x", 0.01); +//! +//! // Hello {next arg -> arg 0 ("x")} is {arg 2 (0.01) with precision +//! // specified in next arg -> arg 1 (5)} +//! println!("Hello {} is {2:.*}", "x", 5, 0.01); +//! +//! // Hello {next arg -> arg 0 ("x")} is {arg "number" (0.01) with precision specified +//! // in arg "prec" (5)} +//! println!("Hello {} is {number:.prec$}", "x", prec = 5, number = 0.01); +//! ``` +//! +//! While these: +//! +//! ``` +//! println!("{}, `{name:.*}` has 3 fractional digits", "Hello", 3, name=1234.56); +//! println!("{}, `{name:.*}` has 3 characters", "Hello", 3, name="1234.56"); +//! println!("{}, `{name:>8.*}` has 3 right-aligned characters", "Hello", 3, name="1234.56"); +//! ``` +//! +//! print three significantly different things: +//! +//! ```text +//! Hello, `1234.560` has 3 fractional digits +//! Hello, `123` has 3 characters +//! Hello, ` 123` has 3 right-aligned characters +//! ``` +//! +//! ## Localization +//! +//! In some programming languages, the behavior of string formatting functions +//! depends on the operating system's locale setting. The format functions +//! provided by Rust's standard library do not have any concept of locale and +//! will produce the same results on all systems regardless of user +//! configuration. +//! +//! For example, the following code will always print `1.5` even if the system +//! locale uses a decimal separator other than a dot. +//! +//! ``` +//! println!("The value is {}", 1.5); +//! ``` +//! +//! # Escaping +//! +//! The literal characters `{` and `}` may be included in a string by preceding +//! them with the same character. For example, the `{` character is escaped with +//! `{{` and the `}` character is escaped with `}}`. +//! +//! ``` +//! assert_eq!(format!("Hello {{}}"), "Hello {}"); +//! assert_eq!(format!("{{ Hello"), "{ Hello"); +//! ``` +//! +//! # Syntax +//! +//! To summarize, here you can find the full grammar of format strings. +//! The syntax for the formatting language used is drawn from other languages, +//! so it should not be too alien. Arguments are formatted with Python-like +//! syntax, meaning that arguments are surrounded by `{}` instead of the C-like +//! `%`. The actual grammar for the formatting syntax is: +//! +//! ```text +//! format_string := text [ maybe_format text ] * +//! maybe_format := '{' '{' | '}' '}' | format +//! format := '{' [ argument ] [ ':' format_spec ] [ ws ] * '}' +//! argument := integer | identifier +//! +//! format_spec := [[fill]align][sign]['#']['0'][width]['.' precision]type +//! fill := character +//! align := '<' | '^' | '>' +//! sign := '+' | '-' +//! width := count +//! precision := count | '*' +//! type := '' | '?' | 'x?' | 'X?' | identifier +//! count := parameter | integer +//! parameter := argument '$' +//! ``` +//! In the above grammar, +//! - `text` must not contain any `'{'` or `'}'` characters, +//! - `ws` is any character for which [`char::is_whitespace`] returns `true`, has no semantic +//! meaning and is completely optional, +//! - `integer` is a decimal integer that may contain leading zeroes and +//! - `identifier` is an `IDENTIFIER_OR_KEYWORD` (not an `IDENTIFIER`) as defined by the [Rust language reference](https://doc.rust-lang.org/reference/identifiers.html). +//! +//! # Formatting traits +//! +//! When requesting that an argument be formatted with a particular type, you +//! are actually requesting that an argument ascribes to a particular trait. +//! This allows multiple actual types to be formatted via `{:x}` (like [`i8`] as +//! well as [`isize`]). The current mapping of types to traits is: +//! +//! * *nothing* ⇒ [`Display`] +//! * `?` ⇒ [`Debug`] +//! * `x?` ⇒ [`Debug`] with lower-case hexadecimal integers +//! * `X?` ⇒ [`Debug`] with upper-case hexadecimal integers +//! * `o` ⇒ [`Octal`] +//! * `x` ⇒ [`LowerHex`] +//! * `X` ⇒ [`UpperHex`] +//! * `p` ⇒ [`Pointer`] +//! * `b` ⇒ [`Binary`] +//! * `e` ⇒ [`LowerExp`] +//! * `E` ⇒ [`UpperExp`] +//! +//! What this means is that any type of argument which implements the +//! [`fmt::Binary`][`Binary`] trait can then be formatted with `{:b}`. Implementations +//! are provided for these traits for a number of primitive types by the +//! standard library as well. If no format is specified (as in `{}` or `{:6}`), +//! then the format trait used is the [`Display`] trait. +//! +//! When implementing a format trait for your own type, you will have to +//! implement a method of the signature: +//! +//! ``` +//! # #![allow(dead_code)] +//! # use std::fmt; +//! # struct Foo; // our custom type +//! # impl fmt::Display for Foo { +//! fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +//! # write!(f, "testing, testing") +//! # } } +//! ``` +//! +//! Your type will be passed as `self` by-reference, and then the function +//! should emit output into the Formatter `f` which implements `fmt::Write`. It is up to each +//! format trait implementation to correctly adhere to the requested formatting parameters. +//! The values of these parameters can be accessed with methods of the +//! [`Formatter`] struct. In order to help with this, the [`Formatter`] struct also +//! provides some helper methods. +//! +//! Additionally, the return value of this function is [`fmt::Result`] which is a +//! type alias of <code>[Result]<(), [std::fmt::Error]></code>. Formatting implementations +//! should ensure that they propagate errors from the [`Formatter`] (e.g., when +//! calling [`write!`]). However, they should never return errors spuriously. That +//! is, a formatting implementation must and may only return an error if the +//! passed-in [`Formatter`] returns an error. This is because, contrary to what +//! the function signature might suggest, string formatting is an infallible +//! operation. This function only returns a result because writing to the +//! underlying stream might fail and it must provide a way to propagate the fact +//! that an error has occurred back up the stack. +//! +//! An example of implementing the formatting traits would look +//! like: +//! +//! ``` +//! use std::fmt; +//! +//! #[derive(Debug)] +//! struct Vector2D { +//! x: isize, +//! y: isize, +//! } +//! +//! impl fmt::Display for Vector2D { +//! fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +//! // The `f` value implements the `Write` trait, which is what the +//! // write! macro is expecting. Note that this formatting ignores the +//! // various flags provided to format strings. +//! write!(f, "({}, {})", self.x, self.y) +//! } +//! } +//! +//! // Different traits allow different forms of output of a type. The meaning +//! // of this format is to print the magnitude of a vector. +//! impl fmt::Binary for Vector2D { +//! fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { +//! let magnitude = (self.x * self.x + self.y * self.y) as f64; +//! let magnitude = magnitude.sqrt(); +//! +//! // Respect the formatting flags by using the helper method +//! // `pad_integral` on the Formatter object. See the method +//! // documentation for details, and the function `pad` can be used +//! // to pad strings. +//! let decimals = f.precision().unwrap_or(3); +//! let string = format!("{:.*}", decimals, magnitude); +//! f.pad_integral(true, "", &string) +//! } +//! } +//! +//! fn main() { +//! let myvector = Vector2D { x: 3, y: 4 }; +//! +//! println!("{myvector}"); // => "(3, 4)" +//! println!("{myvector:?}"); // => "Vector2D {x: 3, y:4}" +//! println!("{myvector:10.3b}"); // => " 5.000" +//! } +//! ``` +//! +//! ### `fmt::Display` vs `fmt::Debug` +//! +//! These two formatting traits have distinct purposes: +//! +//! - [`fmt::Display`][`Display`] implementations assert that the type can be faithfully +//! represented as a UTF-8 string at all times. It is **not** expected that +//! all types implement the [`Display`] trait. +//! - [`fmt::Debug`][`Debug`] implementations should be implemented for **all** public types. +//! Output will typically represent the internal state as faithfully as possible. +//! The purpose of the [`Debug`] trait is to facilitate debugging Rust code. In +//! most cases, using `#[derive(Debug)]` is sufficient and recommended. +//! +//! Some examples of the output from both traits: +//! +//! ``` +//! assert_eq!(format!("{} {:?}", 3, 4), "3 4"); +//! assert_eq!(format!("{} {:?}", 'a', 'b'), "a 'b'"); +//! assert_eq!(format!("{} {:?}", "foo\n", "bar\n"), "foo\n \"bar\\n\""); +//! ``` +//! +//! # Related macros +//! +//! There are a number of related macros in the [`format!`] family. The ones that +//! are currently implemented are: +//! +//! ```ignore (only-for-syntax-highlight) +//! format! // described above +//! write! // first argument is either a &mut io::Write or a &mut fmt::Write, the destination +//! writeln! // same as write but appends a newline +//! print! // the format string is printed to the standard output +//! println! // same as print but appends a newline +//! eprint! // the format string is printed to the standard error +//! eprintln! // same as eprint but appends a newline +//! format_args! // described below. +//! ``` +//! +//! ### `write!` +//! +//! [`write!`] and [`writeln!`] are two macros which are used to emit the format string +//! to a specified stream. This is used to prevent intermediate allocations of +//! format strings and instead directly write the output. Under the hood, this +//! function is actually invoking the [`write_fmt`] function defined on the +//! [`std::io::Write`] and the [`std::fmt::Write`] trait. Example usage is: +//! +//! ``` +//! # #![allow(unused_must_use)] +//! use std::io::Write; +//! let mut w = Vec::new(); +//! write!(&mut w, "Hello {}!", "world"); +//! ``` +//! +//! ### `print!` +//! +//! This and [`println!`] emit their output to stdout. Similarly to the [`write!`] +//! macro, the goal of these macros is to avoid intermediate allocations when +//! printing output. Example usage is: +//! +//! ``` +//! print!("Hello {}!", "world"); +//! println!("I have a newline {}", "character at the end"); +//! ``` +//! ### `eprint!` +//! +//! The [`eprint!`] and [`eprintln!`] macros are identical to +//! [`print!`] and [`println!`], respectively, except they emit their +//! output to stderr. +//! +//! ### `format_args!` +//! +//! [`format_args!`] is a curious macro used to safely pass around +//! an opaque object describing the format string. This object +//! does not require any heap allocations to create, and it only +//! references information on the stack. Under the hood, all of +//! the related macros are implemented in terms of this. First +//! off, some example usage is: +//! +//! ``` +//! # #![allow(unused_must_use)] +//! use std::fmt; +//! use std::io::{self, Write}; +//! +//! let mut some_writer = io::stdout(); +//! write!(&mut some_writer, "{}", format_args!("print with a {}", "macro")); +//! +//! fn my_fmt_fn(args: fmt::Arguments) { +//! write!(&mut io::stdout(), "{}", args); +//! } +//! my_fmt_fn(format_args!(", or a {} too", "function")); +//! ``` +//! +//! The result of the [`format_args!`] macro is a value of type [`fmt::Arguments`]. +//! This structure can then be passed to the [`write`] and [`format`] functions +//! inside this module in order to process the format string. +//! The goal of this macro is to even further prevent intermediate allocations +//! when dealing with formatting strings. +//! +//! For example, a logging library could use the standard formatting syntax, but +//! it would internally pass around this structure until it has been determined +//! where output should go to. +//! +//! [`fmt::Result`]: Result "fmt::Result" +//! [Result]: core::result::Result "std::result::Result" +//! [std::fmt::Error]: Error "fmt::Error" +//! [`write`]: write() "fmt::write" +//! [`to_string`]: crate::string::ToString::to_string "ToString::to_string" +//! [`write_fmt`]: ../../std/io/trait.Write.html#method.write_fmt +//! [`std::io::Write`]: ../../std/io/trait.Write.html +//! [`std::fmt::Write`]: ../../std/fmt/trait.Write.html +//! [`print!`]: ../../std/macro.print.html "print!" +//! [`println!`]: ../../std/macro.println.html "println!" +//! [`eprint!`]: ../../std/macro.eprint.html "eprint!" +//! [`eprintln!`]: ../../std/macro.eprintln.html "eprintln!" +//! [`format_args!`]: ../../std/macro.format_args.html "format_args!" +//! [`fmt::Arguments`]: Arguments "fmt::Arguments" +//! [`format`]: format() "fmt::format" + +#![stable(feature = "rust1", since = "1.0.0")] + +#[unstable(feature = "fmt_internals", issue = "none")] +pub use core::fmt::rt; +#[stable(feature = "fmt_flags_align", since = "1.28.0")] +pub use core::fmt::Alignment; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::Error; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{write, ArgumentV1, Arguments}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{Binary, Octal}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{Debug, Display}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{DebugList, DebugMap, DebugSet, DebugStruct, DebugTuple}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{Formatter, Result, Write}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{LowerExp, UpperExp}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::fmt::{LowerHex, Pointer, UpperHex}; + +#[cfg(not(no_global_oom_handling))] +use crate::string; + +/// The `format` function takes an [`Arguments`] struct and returns the resulting +/// formatted string. +/// +/// The [`Arguments`] instance can be created with the [`format_args!`] macro. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// use std::fmt; +/// +/// let s = fmt::format(format_args!("Hello, {}!", "world")); +/// assert_eq!(s, "Hello, world!"); +/// ``` +/// +/// Please note that using [`format!`] might be preferable. +/// Example: +/// +/// ``` +/// let s = format!("Hello, {}!", "world"); +/// assert_eq!(s, "Hello, world!"); +/// ``` +/// +/// [`format_args!`]: core::format_args +/// [`format!`]: crate::format +#[cfg(not(no_global_oom_handling))] +#[must_use] +#[stable(feature = "rust1", since = "1.0.0")] +pub fn format(args: Arguments<'_>) -> string::String { + let capacity = args.estimated_capacity(); + let mut output = string::String::with_capacity(capacity); + output.write_fmt(args).expect("a formatting trait implementation returned an error"); + output +} diff --git a/rust/alloc/lib.rs b/rust/alloc/lib.rs index 233bcd5e4654..03d2ce1df814 100644 --- a/rust/alloc/lib.rs +++ b/rust/alloc/lib.rs @@ -194,7 +194,6 @@ extern crate std; extern crate test; // Module with internal macros used by other modules (needs to be included before other modules). -#[cfg(not(no_macros))] #[macro_use] mod macros; @@ -219,17 +218,13 @@ pub mod borrow; pub mod collections; #[cfg(not(no_global_oom_handling))] pub mod ffi; -#[cfg(not(no_fmt))] pub mod fmt; #[cfg(not(no_rc))] pub mod rc; pub mod slice; -#[cfg(not(no_str))] pub mod str; -#[cfg(not(no_string))] pub mod string; -#[cfg(not(no_sync))] -#[cfg(target_has_atomic = "ptr")] +#[cfg(all(not(no_sync), target_has_atomic = "ptr"))] pub mod sync; #[cfg(all(not(no_global_oom_handling), target_has_atomic = "ptr"))] pub mod task; diff --git a/rust/alloc/macros.rs b/rust/alloc/macros.rs new file mode 100644 index 000000000000..fa7bacfd5847 --- /dev/null +++ b/rust/alloc/macros.rs @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +/// Creates a [`Vec`] containing the arguments. +/// +/// `vec!` allows `Vec`s to be defined with the same syntax as array expressions. +/// There are two forms of this macro: +/// +/// - Create a [`Vec`] containing a given list of elements: +/// +/// ``` +/// let v = vec![1, 2, 3]; +/// assert_eq!(v[0], 1); +/// assert_eq!(v[1], 2); +/// assert_eq!(v[2], 3); +/// ``` +/// +/// - Create a [`Vec`] from a given element and size: +/// +/// ``` +/// let v = vec![1; 3]; +/// assert_eq!(v, [1, 1, 1]); +/// ``` +/// +/// Note that unlike array expressions this syntax supports all elements +/// which implement [`Clone`] and the number of elements doesn't have to be +/// a constant. +/// +/// This will use `clone` to duplicate an expression, so one should be careful +/// using this with types having a nonstandard `Clone` implementation. For +/// example, `vec![Rc::new(1); 5]` will create a vector of five references +/// to the same boxed integer value, not five references pointing to independently +/// boxed integers. +/// +/// Also, note that `vec![expr; 0]` is allowed, and produces an empty vector. +/// This will still evaluate `expr`, however, and immediately drop the resulting value, so +/// be mindful of side effects. +/// +/// [`Vec`]: crate::vec::Vec +#[cfg(all(not(no_global_oom_handling), not(test)))] +#[macro_export] +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_diagnostic_item = "vec_macro"] +#[allow_internal_unstable(box_syntax, liballoc_internals)] +macro_rules! vec { + () => ( + $crate::__rust_force_expr!($crate::vec::Vec::new()) + ); + ($elem:expr; $n:expr) => ( + $crate::__rust_force_expr!($crate::vec::from_elem($elem, $n)) + ); + ($($x:expr),+ $(,)?) => ( + $crate::__rust_force_expr!(<[_]>::into_vec(box [$($x),+])) + ); +} + +// HACK(japaric): with cfg(test) the inherent `[T]::into_vec` method, which is +// required for this macro definition, is not available. Instead use the +// `slice::into_vec` function which is only available with cfg(test) +// NB see the slice::hack module in slice.rs for more information +#[cfg(all(not(no_global_oom_handling), test))] +#[cfg_attr(not(bootstrap), allow(unused_macro_rules))] +macro_rules! vec { + () => ( + $crate::vec::Vec::new() + ); + ($elem:expr; $n:expr) => ( + $crate::vec::from_elem($elem, $n) + ); + ($($x:expr),*) => ( + $crate::slice::into_vec(box [$($x),*]) + ); + ($($x:expr,)*) => (vec![$($x),*]) +} + +/// Creates a `String` using interpolation of runtime expressions. +/// +/// The first argument `format!` receives is a format string. This must be a string +/// literal. The power of the formatting string is in the `{}`s contained. +/// +/// Additional parameters passed to `format!` replace the `{}`s within the +/// formatting string in the order given unless named or positional parameters +/// are used; see [`std::fmt`] for more information. +/// +/// A common use for `format!` is concatenation and interpolation of strings. +/// The same convention is used with [`print!`] and [`write!`] macros, +/// depending on the intended destination of the string. +/// +/// To convert a single value to a string, use the [`to_string`] method. This +/// will use the [`Display`] formatting trait. +/// +/// [`std::fmt`]: ../std/fmt/index.html +/// [`print!`]: ../std/macro.print.html +/// [`write!`]: core::write +/// [`to_string`]: crate::string::ToString +/// [`Display`]: core::fmt::Display +/// +/// # Panics +/// +/// `format!` panics if a formatting trait implementation returns an error. +/// This indicates an incorrect implementation +/// since `fmt::Write for String` never returns an error itself. +/// +/// # Examples +/// +/// ``` +/// format!("test"); +/// format!("hello {}", "world!"); +/// format!("x = {}, y = {y}", 10, y = 30); +/// ``` +#[macro_export] +#[stable(feature = "rust1", since = "1.0.0")] +#[cfg_attr(not(test), rustc_diagnostic_item = "format_macro")] +macro_rules! format { + ($($arg:tt)*) => {{ + let res = $crate::fmt::format($crate::__export::format_args!($($arg)*)); + res + }} +} + +/// Force AST node to an expression to improve diagnostics in pattern position. +#[doc(hidden)] +#[macro_export] +#[unstable(feature = "liballoc_internals", issue = "none", reason = "implementation detail")] +macro_rules! __rust_force_expr { + ($e:expr) => { + $e + }; +} diff --git a/rust/alloc/raw_vec.rs b/rust/alloc/raw_vec.rs index daf5f2da7168..59e353bfe5d3 100644 --- a/rust/alloc/raw_vec.rs +++ b/rust/alloc/raw_vec.rs @@ -20,10 +20,10 @@ use crate::collections::TryReserveErrorKind::*; #[cfg(test)] mod tests; -#[cfg(not(no_global_oom_handling))] enum AllocInit { /// The contents of the new memory are uninitialized. Uninitialized, + #[allow(dead_code)] /// The new memory is guaranteed to be zeroed. Zeroed, } @@ -133,6 +133,13 @@ impl<T, A: Allocator> RawVec<T, A> { Self::allocate_in(capacity, AllocInit::Uninitialized, alloc) } + /// Like `try_with_capacity`, but parameterized over the choice of + /// allocator for the returned `RawVec`. + #[inline] + pub fn try_with_capacity_in(capacity: usize, alloc: A) -> Result<Self, TryReserveError> { + Self::try_allocate_in(capacity, AllocInit::Uninitialized, alloc) + } + /// Like `with_capacity_zeroed`, but parameterized over the choice /// of allocator for the returned `RawVec`. #[cfg(not(no_global_oom_handling))] @@ -203,6 +210,30 @@ impl<T, A: Allocator> RawVec<T, A> { } } + fn try_allocate_in(capacity: usize, init: AllocInit, alloc: A) -> Result<Self, TryReserveError> { + // Don't allocate here because `Drop` will not deallocate when `capacity` is 0. + if mem::size_of::<T>() == 0 || capacity == 0 { + return Ok(Self::new_in(alloc)); + } + + let layout = Layout::array::<T>(capacity).map_err(|_| CapacityOverflow)?; + alloc_guard(layout.size())?; + let result = match init { + AllocInit::Uninitialized => alloc.allocate(layout), + AllocInit::Zeroed => alloc.allocate_zeroed(layout), + }; + let ptr = result.map_err(|_| AllocError { layout, non_exhaustive: () })?; + + // Allocators currently return a `NonNull<[u8]>` whose length + // matches the size requested. If that ever changes, the capacity + // here should change to `ptr.len() / mem::size_of::<T>()`. + Ok(Self { + ptr: unsafe { Unique::new_unchecked(ptr.cast().as_ptr()) }, + cap: capacity, + alloc, + }) + } + /// Reconstitutes a `RawVec` from a pointer, capacity, and allocator. /// /// # Safety @@ -360,6 +391,16 @@ impl<T, A: Allocator> RawVec<T, A> { pub fn shrink_to_fit(&mut self, cap: usize) { handle_reserve(self.shrink(cap)); } + + /// Tries to shrink the buffer down to the specified capacity. If the given amount + /// is 0, actually completely deallocates. + /// + /// # Panics + /// + /// Panics if the given amount is *larger* than the current capacity. + pub fn try_shrink_to_fit(&mut self, cap: usize) -> Result<(), TryReserveError> { + self.shrink(cap) + } } impl<T, A: Allocator> RawVec<T, A> { @@ -429,7 +470,6 @@ impl<T, A: Allocator> RawVec<T, A> { Ok(()) } - #[allow(dead_code)] fn shrink(&mut self, cap: usize) -> Result<(), TryReserveError> { assert!(cap <= self.capacity(), "Tried to shrink to a larger capacity"); diff --git a/rust/alloc/slice.rs b/rust/alloc/slice.rs index e444e97fa145..d53f6051f3a8 100644 --- a/rust/alloc/slice.rs +++ b/rust/alloc/slice.rs @@ -95,11 +95,11 @@ use core::mem::size_of; use core::ptr; use crate::alloc::Allocator; -#[cfg(not(no_global_oom_handling))] use crate::alloc::Global; #[cfg(not(no_global_oom_handling))] use crate::borrow::ToOwned; use crate::boxed::Box; +use crate::collections::TryReserveError; use crate::vec::Vec; #[unstable(feature = "slice_range", issue = "76393")] @@ -159,6 +159,7 @@ pub(crate) mod hack { use core::alloc::Allocator; use crate::boxed::Box; + use crate::collections::TryReserveError; use crate::vec::Vec; // We shouldn't add inline attribute to this since this is used in @@ -178,6 +179,11 @@ pub(crate) mod hack { T::to_vec(s, alloc) } + #[inline] + pub fn try_to_vec<T: TryConvertVec, A: Allocator>(s: &[T], alloc: A) -> Result<Vec<T, A>, TryReserveError> { + T::try_to_vec(s, alloc) + } + #[cfg(not(no_global_oom_handling))] pub trait ConvertVec { fn to_vec<A: Allocator>(s: &[Self], alloc: A) -> Vec<Self, A> @@ -185,6 +191,12 @@ pub(crate) mod hack { Self: Sized; } + pub trait TryConvertVec { + fn try_to_vec<A: Allocator>(s: &[Self], alloc: A) -> Result<Vec<Self, A>, TryReserveError> + where + Self: Sized; + } + #[cfg(not(no_global_oom_handling))] impl<T: Clone> ConvertVec for T { #[inline] @@ -237,6 +249,42 @@ pub(crate) mod hack { v } } + + impl<T: Clone> TryConvertVec for T { + #[inline] + default fn try_to_vec<A: Allocator>(s: &[Self], alloc: A) -> Result<Vec<Self, A>, TryReserveError> { + struct DropGuard<'a, T, A: Allocator> { + vec: &'a mut Vec<T, A>, + num_init: usize, + } + impl<'a, T, A: Allocator> Drop for DropGuard<'a, T, A> { + #[inline] + fn drop(&mut self) { + // SAFETY: + // items were marked initialized in the loop below + unsafe { + self.vec.set_len(self.num_init); + } + } + } + let mut vec = Vec::try_with_capacity_in(s.len(), alloc)?; + let mut guard = DropGuard { vec: &mut vec, num_init: 0 }; + let slots = guard.vec.spare_capacity_mut(); + // .take(slots.len()) is necessary for LLVM to remove bounds checks + // and has better codegen than zip. + for (i, b) in s.iter().enumerate().take(slots.len()) { + guard.num_init = i; + slots[i].write(b.clone()); + } + core::mem::forget(guard); + // SAFETY: + // the vec was allocated and initialized above to at least this length. + unsafe { + vec.set_len(s.len()); + } + Ok(vec) + } + } } #[cfg(not(test))] @@ -483,6 +531,25 @@ impl<T> [T] { self.to_vec_in(Global) } + /// Tries to copy `self` into a new `Vec`. + /// + /// # Examples + /// + /// ``` + /// let s = [10, 40, 30]; + /// let x = s.try_to_vec().unwrap(); + /// // Here, `s` and `x` can be modified independently. + /// ``` + #[rustc_allow_incoherent_impl] + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_to_vec(&self) -> Result<Vec<T>, TryReserveError> + where + T: Clone, + { + self.try_to_vec_in(Global) + } + /// Copies `self` into a new `Vec` with an allocator. /// /// # Examples @@ -508,6 +575,30 @@ impl<T> [T] { hack::to_vec(self, alloc) } + /// Tries to copy `self` into a new `Vec` with an allocator. + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let s = [10, 40, 30]; + /// let x = s.try_to_vec_in(System).unwrap(); + /// // Here, `s` and `x` can be modified independently. + /// ``` + #[rustc_allow_incoherent_impl] + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_to_vec_in<A: Allocator>(&self, alloc: A) -> Result<Vec<T, A>, TryReserveError> + where + T: Clone, + { + // N.B., see the `hack` module in this file for more details. + hack::try_to_vec(self, alloc) + } + /// Converts `self` into a vector without clones or allocation. /// /// The resulting vector can be converted back into a box via diff --git a/rust/alloc/str.rs b/rust/alloc/str.rs new file mode 100644 index 000000000000..4e3aec690fdb --- /dev/null +++ b/rust/alloc/str.rs @@ -0,0 +1,641 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! Unicode string slices. +//! +//! *[See also the `str` primitive type](str).* +//! +//! The `&str` type is one of the two main string types, the other being `String`. +//! Unlike its `String` counterpart, its contents are borrowed. +//! +//! # Basic Usage +//! +//! A basic string declaration of `&str` type: +//! +//! ``` +//! let hello_world = "Hello, World!"; +//! ``` +//! +//! Here we have declared a string literal, also known as a string slice. +//! String literals have a static lifetime, which means the string `hello_world` +//! is guaranteed to be valid for the duration of the entire program. +//! We can explicitly specify `hello_world`'s lifetime as well: +//! +//! ``` +//! let hello_world: &'static str = "Hello, world!"; +//! ``` + +#![stable(feature = "rust1", since = "1.0.0")] +// Many of the usings in this module are only used in the test configuration. +// It's cleaner to just turn off the unused_imports warning than to fix them. +#![allow(unused_imports)] + +use core::borrow::{Borrow, BorrowMut}; +use core::iter::FusedIterator; +use core::mem; +use core::ptr; +use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher}; +use core::unicode::conversions; + +use crate::borrow::ToOwned; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::slice::{Concat, Join, SliceIndex}; +use crate::string::String; +use crate::vec::Vec; + +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::pattern; +#[stable(feature = "encode_utf16", since = "1.8.0")] +pub use core::str::EncodeUtf16; +#[stable(feature = "split_ascii_whitespace", since = "1.34.0")] +pub use core::str::SplitAsciiWhitespace; +#[stable(feature = "split_inclusive", since = "1.51.0")] +pub use core::str::SplitInclusive; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::SplitWhitespace; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{from_utf8, from_utf8_mut, Bytes, CharIndices, Chars}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{from_utf8_unchecked, from_utf8_unchecked_mut, ParseBoolError}; +#[stable(feature = "str_escape", since = "1.34.0")] +pub use core::str::{EscapeDebug, EscapeDefault, EscapeUnicode}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{FromStr, Utf8Error}; +#[allow(deprecated)] +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{Lines, LinesAny}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{MatchIndices, RMatchIndices}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{Matches, RMatches}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{RSplit, Split}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{RSplitN, SplitN}; +#[stable(feature = "rust1", since = "1.0.0")] +pub use core::str::{RSplitTerminator, SplitTerminator}; + +/// Note: `str` in `Concat<str>` is not meaningful here. +/// This type parameter of the trait only exists to enable another impl. +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl<S: Borrow<str>> Concat<str> for [S] { + type Output = String; + + fn concat(slice: &Self) -> String { + Join::join(slice, "") + } +} + +#[cfg(not(no_global_oom_handling))] +#[unstable(feature = "slice_concat_ext", issue = "27747")] +impl<S: Borrow<str>> Join<&str> for [S] { + type Output = String; + + fn join(slice: &Self, sep: &str) -> String { + unsafe { String::from_utf8_unchecked(join_generic_copy(slice, sep.as_bytes())) } + } +} + +#[cfg(not(no_global_oom_handling))] +macro_rules! specialize_for_lengths { + ($separator:expr, $target:expr, $iter:expr; $($num:expr),*) => {{ + let mut target = $target; + let iter = $iter; + let sep_bytes = $separator; + match $separator.len() { + $( + // loops with hardcoded sizes run much faster + // specialize the cases with small separator lengths + $num => { + for s in iter { + copy_slice_and_advance!(target, sep_bytes); + let content_bytes = s.borrow().as_ref(); + copy_slice_and_advance!(target, content_bytes); + } + }, + )* + _ => { + // arbitrary non-zero size fallback + for s in iter { + copy_slice_and_advance!(target, sep_bytes); + let content_bytes = s.borrow().as_ref(); + copy_slice_and_advance!(target, content_bytes); + } + } + } + target + }} +} + +#[cfg(not(no_global_oom_handling))] +macro_rules! copy_slice_and_advance { + ($target:expr, $bytes:expr) => { + let len = $bytes.len(); + let (head, tail) = { $target }.split_at_mut(len); + head.copy_from_slice($bytes); + $target = tail; + }; +} + +// Optimized join implementation that works for both Vec<T> (T: Copy) and String's inner vec +// Currently (2018-05-13) there is a bug with type inference and specialization (see issue #36262) +// For this reason SliceConcat<T> is not specialized for T: Copy and SliceConcat<str> is the +// only user of this function. It is left in place for the time when that is fixed. +// +// the bounds for String-join are S: Borrow<str> and for Vec-join Borrow<[T]> +// [T] and str both impl AsRef<[T]> for some T +// => s.borrow().as_ref() and we always have slices +#[cfg(not(no_global_oom_handling))] +fn join_generic_copy<B, T, S>(slice: &[S], sep: &[T]) -> Vec<T> +where + T: Copy, + B: AsRef<[T]> + ?Sized, + S: Borrow<B>, +{ + let sep_len = sep.len(); + let mut iter = slice.iter(); + + // the first slice is the only one without a separator preceding it + let first = match iter.next() { + Some(first) => first, + None => return vec![], + }; + + // compute the exact total length of the joined Vec + // if the `len` calculation overflows, we'll panic + // we would have run out of memory anyway and the rest of the function requires + // the entire Vec pre-allocated for safety + let reserved_len = sep_len + .checked_mul(iter.len()) + .and_then(|n| { + slice.iter().map(|s| s.borrow().as_ref().len()).try_fold(n, usize::checked_add) + }) + .expect("attempt to join into collection with len > usize::MAX"); + + // prepare an uninitialized buffer + let mut result = Vec::with_capacity(reserved_len); + debug_assert!(result.capacity() >= reserved_len); + + result.extend_from_slice(first.borrow().as_ref()); + + unsafe { + let pos = result.len(); + let target = result.spare_capacity_mut().get_unchecked_mut(..reserved_len - pos); + + // Convert the separator and slices to slices of MaybeUninit + // to simplify implementation in specialize_for_lengths + let sep_uninit = core::slice::from_raw_parts(sep.as_ptr().cast(), sep.len()); + let iter_uninit = iter.map(|it| { + let it = it.borrow().as_ref(); + core::slice::from_raw_parts(it.as_ptr().cast(), it.len()) + }); + + // copy separator and slices over without bounds checks + // generate loops with hardcoded offsets for small separators + // massive improvements possible (~ x2) + let remain = specialize_for_lengths!(sep_uninit, target, iter_uninit; 0, 1, 2, 3, 4); + + // A weird borrow implementation may return different + // slices for the length calculation and the actual copy. + // Make sure we don't expose uninitialized bytes to the caller. + let result_len = reserved_len - remain.len(); + result.set_len(result_len); + } + result +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Borrow<str> for String { + #[inline] + fn borrow(&self) -> &str { + &self[..] + } +} + +#[stable(feature = "string_borrow_mut", since = "1.36.0")] +impl BorrowMut<str> for String { + #[inline] + fn borrow_mut(&mut self) -> &mut str { + &mut self[..] + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl ToOwned for str { + type Owned = String; + #[inline] + fn to_owned(&self) -> String { + unsafe { String::from_utf8_unchecked(self.as_bytes().to_owned()) } + } + + fn clone_into(&self, target: &mut String) { + let mut b = mem::take(target).into_bytes(); + self.as_bytes().clone_into(&mut b); + *target = unsafe { String::from_utf8_unchecked(b) } + } +} + +/// Methods for string slices. +#[cfg(not(test))] +impl str { + /// Converts a `Box<str>` into a `Box<[u8]>` without copying or allocating. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "this is a string"; + /// let boxed_str = s.to_owned().into_boxed_str(); + /// let boxed_bytes = boxed_str.into_boxed_bytes(); + /// assert_eq!(*boxed_bytes, *s.as_bytes()); + /// ``` + #[rustc_allow_incoherent_impl] + #[stable(feature = "str_box_extras", since = "1.20.0")] + #[must_use = "`self` will be dropped if the result is not used"] + #[inline] + pub fn into_boxed_bytes(self: Box<str>) -> Box<[u8]> { + self.into() + } + + /// Replaces all matches of a pattern with another string. + /// + /// `replace` creates a new [`String`], and copies the data from this string slice into it. + /// While doing so, it attempts to find matches of a pattern. If it finds any, it + /// replaces them with the replacement string slice. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "this is old"; + /// + /// assert_eq!("this is new", s.replace("old", "new")); + /// ``` + /// + /// When the pattern doesn't match: + /// + /// ``` + /// let s = "this is old"; + /// assert_eq!(s, s.replace("cookie monster", "little lamb")); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[must_use = "this returns the replaced string as a new allocation, \ + without modifying the original"] + #[stable(feature = "rust1", since = "1.0.0")] + #[inline] + pub fn replace<'a, P: Pattern<'a>>(&'a self, from: P, to: &str) -> String { + let mut result = String::new(); + let mut last_end = 0; + for (start, part) in self.match_indices(from) { + result.push_str(unsafe { self.get_unchecked(last_end..start) }); + result.push_str(to); + last_end = start + part.len(); + } + result.push_str(unsafe { self.get_unchecked(last_end..self.len()) }); + result + } + + /// Replaces first N matches of a pattern with another string. + /// + /// `replacen` creates a new [`String`], and copies the data from this string slice into it. + /// While doing so, it attempts to find matches of a pattern. If it finds any, it + /// replaces them with the replacement string slice at most `count` times. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "foo foo 123 foo"; + /// assert_eq!("new new 123 foo", s.replacen("foo", "new", 2)); + /// assert_eq!("faa fao 123 foo", s.replacen('o', "a", 3)); + /// assert_eq!("foo foo new23 foo", s.replacen(char::is_numeric, "new", 1)); + /// ``` + /// + /// When the pattern doesn't match: + /// + /// ``` + /// let s = "this is old"; + /// assert_eq!(s, s.replacen("cookie monster", "little lamb", 10)); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[must_use = "this returns the replaced string as a new allocation, \ + without modifying the original"] + #[stable(feature = "str_replacen", since = "1.16.0")] + pub fn replacen<'a, P: Pattern<'a>>(&'a self, pat: P, to: &str, count: usize) -> String { + // Hope to reduce the times of re-allocation + let mut result = String::with_capacity(32); + let mut last_end = 0; + for (start, part) in self.match_indices(pat).take(count) { + result.push_str(unsafe { self.get_unchecked(last_end..start) }); + result.push_str(to); + last_end = start + part.len(); + } + result.push_str(unsafe { self.get_unchecked(last_end..self.len()) }); + result + } + + /// Returns the lowercase equivalent of this string slice, as a new [`String`]. + /// + /// 'Lowercase' is defined according to the terms of the Unicode Derived Core Property + /// `Lowercase`. + /// + /// Since some characters can expand into multiple characters when changing + /// the case, this function returns a [`String`] instead of modifying the + /// parameter in-place. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "HELLO"; + /// + /// assert_eq!("hello", s.to_lowercase()); + /// ``` + /// + /// A tricky example, with sigma: + /// + /// ``` + /// let sigma = "Σ"; + /// + /// assert_eq!("σ", sigma.to_lowercase()); + /// + /// // but at the end of a word, it's ς, not σ: + /// let odysseus = "ὈΔΥΣΣΕΎΣ"; + /// + /// assert_eq!("ὀδυσσεύς", odysseus.to_lowercase()); + /// ``` + /// + /// Languages without case are not changed: + /// + /// ``` + /// let new_year = "农历新年"; + /// + /// assert_eq!(new_year, new_year.to_lowercase()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[must_use = "this returns the lowercase string as a new String, \ + without modifying the original"] + #[stable(feature = "unicode_case_mapping", since = "1.2.0")] + pub fn to_lowercase(&self) -> String { + let mut s = String::with_capacity(self.len()); + for (i, c) in self[..].char_indices() { + if c == 'Σ' { + // Σ maps to σ, except at the end of a word where it maps to ς. + // This is the only conditional (contextual) but language-independent mapping + // in `SpecialCasing.txt`, + // so hard-code it rather than have a generic "condition" mechanism. + // See https://github.com/rust-lang/rust/issues/26035 + map_uppercase_sigma(self, i, &mut s) + } else { + match conversions::to_lower(c) { + [a, '\0', _] => s.push(a), + [a, b, '\0'] => { + s.push(a); + s.push(b); + } + [a, b, c] => { + s.push(a); + s.push(b); + s.push(c); + } + } + } + } + return s; + + fn map_uppercase_sigma(from: &str, i: usize, to: &mut String) { + // See https://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992 + // for the definition of `Final_Sigma`. + debug_assert!('Σ'.len_utf8() == 2); + let is_word_final = case_ignoreable_then_cased(from[..i].chars().rev()) + && !case_ignoreable_then_cased(from[i + 2..].chars()); + to.push_str(if is_word_final { "ς" } else { "σ" }); + } + + fn case_ignoreable_then_cased<I: Iterator<Item = char>>(iter: I) -> bool { + use core::unicode::{Case_Ignorable, Cased}; + match iter.skip_while(|&c| Case_Ignorable(c)).next() { + Some(c) => Cased(c), + None => false, + } + } + } + + /// Returns the uppercase equivalent of this string slice, as a new [`String`]. + /// + /// 'Uppercase' is defined according to the terms of the Unicode Derived Core Property + /// `Uppercase`. + /// + /// Since some characters can expand into multiple characters when changing + /// the case, this function returns a [`String`] instead of modifying the + /// parameter in-place. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = "hello"; + /// + /// assert_eq!("HELLO", s.to_uppercase()); + /// ``` + /// + /// Scripts without case are not changed: + /// + /// ``` + /// let new_year = "农历新年"; + /// + /// assert_eq!(new_year, new_year.to_uppercase()); + /// ``` + /// + /// One character can become multiple: + /// ``` + /// let s = "tschüß"; + /// + /// assert_eq!("TSCHÜSS", s.to_uppercase()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[must_use = "this returns the uppercase string as a new String, \ + without modifying the original"] + #[stable(feature = "unicode_case_mapping", since = "1.2.0")] + pub fn to_uppercase(&self) -> String { + let mut s = String::with_capacity(self.len()); + for c in self[..].chars() { + match conversions::to_upper(c) { + [a, '\0', _] => s.push(a), + [a, b, '\0'] => { + s.push(a); + s.push(b); + } + [a, b, c] => { + s.push(a); + s.push(b); + s.push(c); + } + } + } + s + } + + /// Converts a [`Box<str>`] into a [`String`] without copying or allocating. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let string = String::from("birthday gift"); + /// let boxed_str = string.clone().into_boxed_str(); + /// + /// assert_eq!(boxed_str.into_string(), string); + /// ``` + #[stable(feature = "box_str", since = "1.4.0")] + #[rustc_allow_incoherent_impl] + #[must_use = "`self` will be dropped if the result is not used"] + #[inline] + pub fn into_string(self: Box<str>) -> String { + let slice = Box::<[u8]>::from(self); + unsafe { String::from_utf8_unchecked(slice.into_vec()) } + } + + /// Creates a new [`String`] by repeating a string `n` times. + /// + /// # Panics + /// + /// This function will panic if the capacity would overflow. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// assert_eq!("abc".repeat(4), String::from("abcabcabcabc")); + /// ``` + /// + /// A panic upon overflow: + /// + /// ```should_panic + /// // this will panic at runtime + /// let huge = "0123456789abcdef".repeat(usize::MAX); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[must_use] + #[stable(feature = "repeat_str", since = "1.16.0")] + pub fn repeat(&self, n: usize) -> String { + unsafe { String::from_utf8_unchecked(self.as_bytes().repeat(n)) } + } + + /// Returns a copy of this string where each character is mapped to its + /// ASCII upper case equivalent. + /// + /// ASCII letters 'a' to 'z' are mapped to 'A' to 'Z', + /// but non-ASCII letters are unchanged. + /// + /// To uppercase the value in-place, use [`make_ascii_uppercase`]. + /// + /// To uppercase ASCII characters in addition to non-ASCII characters, use + /// [`to_uppercase`]. + /// + /// # Examples + /// + /// ``` + /// let s = "Grüße, Jürgen ❤"; + /// + /// assert_eq!("GRüßE, JüRGEN ❤", s.to_ascii_uppercase()); + /// ``` + /// + /// [`make_ascii_uppercase`]: str::make_ascii_uppercase + /// [`to_uppercase`]: #method.to_uppercase + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[must_use = "to uppercase the value in-place, use `make_ascii_uppercase()`"] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] + #[inline] + pub fn to_ascii_uppercase(&self) -> String { + let mut bytes = self.as_bytes().to_vec(); + bytes.make_ascii_uppercase(); + // make_ascii_uppercase() preserves the UTF-8 invariant. + unsafe { String::from_utf8_unchecked(bytes) } + } + + /// Returns a copy of this string where each character is mapped to its + /// ASCII lower case equivalent. + /// + /// ASCII letters 'A' to 'Z' are mapped to 'a' to 'z', + /// but non-ASCII letters are unchanged. + /// + /// To lowercase the value in-place, use [`make_ascii_lowercase`]. + /// + /// To lowercase ASCII characters in addition to non-ASCII characters, use + /// [`to_lowercase`]. + /// + /// # Examples + /// + /// ``` + /// let s = "Grüße, Jürgen ❤"; + /// + /// assert_eq!("grüße, jürgen ❤", s.to_ascii_lowercase()); + /// ``` + /// + /// [`make_ascii_lowercase`]: str::make_ascii_lowercase + /// [`to_lowercase`]: #method.to_lowercase + #[cfg(not(no_global_oom_handling))] + #[rustc_allow_incoherent_impl] + #[must_use = "to lowercase the value in-place, use `make_ascii_lowercase()`"] + #[stable(feature = "ascii_methods_on_intrinsics", since = "1.23.0")] + #[inline] + pub fn to_ascii_lowercase(&self) -> String { + let mut bytes = self.as_bytes().to_vec(); + bytes.make_ascii_lowercase(); + // make_ascii_lowercase() preserves the UTF-8 invariant. + unsafe { String::from_utf8_unchecked(bytes) } + } + + /// Tries to create a `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s: &str = "a"; + /// let ss: String = s.try_to_owned().unwrap(); + /// ``` + #[rustc_allow_incoherent_impl] + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_to_owned(&self) -> Result<String, TryReserveError> { + unsafe { Ok(String::from_utf8_unchecked(self.as_bytes().try_to_vec()?)) } + } +} + +/// Converts a boxed slice of bytes to a boxed string slice without checking +/// that the string contains valid UTF-8. +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// let smile_utf8 = Box::new([226, 152, 186]); +/// let smile = unsafe { std::str::from_boxed_utf8_unchecked(smile_utf8) }; +/// +/// assert_eq!("☺", &*smile); +/// ``` +#[stable(feature = "str_box_extras", since = "1.20.0")] +#[must_use] +#[inline] +pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box<str> { + unsafe { Box::from_raw(Box::into_raw(v) as *mut str) } +} diff --git a/rust/alloc/string.rs b/rust/alloc/string.rs new file mode 100644 index 000000000000..2ba7f30a7503 --- /dev/null +++ b/rust/alloc/string.rs @@ -0,0 +1,2944 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +//! A UTF-8–encoded, growable string. +//! +//! This module contains the [`String`] type, the [`ToString`] trait for +//! converting to strings, and several error types that may result from +//! working with [`String`]s. +//! +//! # Examples +//! +//! There are multiple ways to create a new [`String`] from a string literal: +//! +//! ``` +//! let s = "Hello".to_string(); +//! +//! let s = String::from("world"); +//! let s: String = "also this".into(); +//! ``` +//! +//! You can create a new [`String`] from an existing one by concatenating with +//! `+`: +//! +//! ``` +//! let s = "Hello".to_string(); +//! +//! let message = s + " world!"; +//! ``` +//! +//! If you have a vector of valid UTF-8 bytes, you can make a [`String`] out of +//! it. You can do the reverse too. +//! +//! ``` +//! let sparkle_heart = vec![240, 159, 146, 150]; +//! +//! // We know these bytes are valid, so we'll use `unwrap()`. +//! let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); +//! +//! assert_eq!("💖", sparkle_heart); +//! +//! let bytes = sparkle_heart.into_bytes(); +//! +//! assert_eq!(bytes, [240, 159, 146, 150]); +//! ``` + +#![stable(feature = "rust1", since = "1.0.0")] + +#[cfg(not(no_global_oom_handling))] +use core::char::{decode_utf16, REPLACEMENT_CHARACTER}; +use core::fmt; +use core::hash; +#[cfg(not(no_global_oom_handling))] +use core::iter::{from_fn, FromIterator}; +use core::iter::FusedIterator; +#[cfg(not(no_global_oom_handling))] +use core::ops::Add; +#[cfg(not(no_global_oom_handling))] +use core::ops::AddAssign; +#[cfg(not(no_global_oom_handling))] +use core::ops::Bound::{Excluded, Included, Unbounded}; +use core::ops::{self, Index, IndexMut, Range, RangeBounds}; +use core::ptr; +use core::slice; +#[cfg(not(no_global_oom_handling))] +use core::str::lossy; +use core::str::pattern::Pattern; + +#[cfg(not(no_global_oom_handling))] +use crate::borrow::{Cow, ToOwned}; +use crate::boxed::Box; +use crate::collections::TryReserveError; +use crate::str::{self, Chars, Utf8Error}; +#[cfg(not(no_global_oom_handling))] +use crate::str::{from_boxed_utf8_unchecked, FromStr}; +use crate::vec::Vec; + +/// A UTF-8–encoded, growable string. +/// +/// The `String` type is the most common string type that has ownership over the +/// contents of the string. It has a close relationship with its borrowed +/// counterpart, the primitive [`str`]. +/// +/// # Examples +/// +/// You can create a `String` from [a literal string][`&str`] with [`String::from`]: +/// +/// [`String::from`]: From::from +/// +/// ``` +/// let hello = String::from("Hello, world!"); +/// ``` +/// +/// You can append a [`char`] to a `String` with the [`push`] method, and +/// append a [`&str`] with the [`push_str`] method: +/// +/// ``` +/// let mut hello = String::from("Hello, "); +/// +/// hello.push('w'); +/// hello.push_str("orld!"); +/// ``` +/// +/// [`push`]: String::push +/// [`push_str`]: String::push_str +/// +/// If you have a vector of UTF-8 bytes, you can create a `String` from it with +/// the [`from_utf8`] method: +/// +/// ``` +/// // some bytes, in a vector +/// let sparkle_heart = vec![240, 159, 146, 150]; +/// +/// // We know these bytes are valid, so we'll use `unwrap()`. +/// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); +/// +/// assert_eq!("💖", sparkle_heart); +/// ``` +/// +/// [`from_utf8`]: String::from_utf8 +/// +/// # UTF-8 +/// +/// `String`s are always valid UTF-8. If you need a non-UTF-8 string, consider +/// [`OsString`]. It is similar, but without the UTF-8 constraint. Because UTF-8 +/// is a variable width encoding, `String`s are typically smaller than an array of +/// the same `chars`: +/// +/// ``` +/// use std::mem; +/// +/// // `s` is ASCII which represents each `char` as one byte +/// let s = "hello"; +/// assert_eq!(s.len(), 5); +/// +/// // A `char` array with the same contents would be longer because +/// // every `char` is four bytes +/// let s = ['h', 'e', 'l', 'l', 'o']; +/// let size: usize = s.into_iter().map(|c| mem::size_of_val(&c)).sum(); +/// assert_eq!(size, 20); +/// +/// // However, for non-ASCII strings, the difference will be smaller +/// // and sometimes they are the same +/// let s = "💖💖💖💖💖"; +/// assert_eq!(s.len(), 20); +/// +/// let s = ['💖', '💖', '💖', '💖', '💖']; +/// let size: usize = s.into_iter().map(|c| mem::size_of_val(&c)).sum(); +/// assert_eq!(size, 20); +/// ``` +/// +/// This raises interesting questions as to how `s[i]` should work. +/// What should `i` be here? Several options include byte indices and +/// `char` indices but, because of UTF-8 encoding, only byte indices +/// would provide constant time indexing. Getting the `i`th `char`, for +/// example, is available using [`chars`]: +/// +/// ``` +/// let s = "hello"; +/// let third_character = s.chars().nth(2); +/// assert_eq!(third_character, Some('l')); +/// +/// let s = "💖💖💖💖💖"; +/// let third_character = s.chars().nth(2); +/// assert_eq!(third_character, Some('💖')); +/// ``` +/// +/// Next, what should `s[i]` return? Because indexing returns a reference +/// to underlying data it could be `&u8`, `&[u8]`, or something else similar. +/// Since we're only providing one index, `&u8` makes the most sense but that +/// might not be what the user expects and can be explicitly achieved with +/// [`as_bytes()`]: +/// +/// ``` +/// // The first byte is 104 - the byte value of `'h'` +/// let s = "hello"; +/// assert_eq!(s.as_bytes()[0], 104); +/// // or +/// assert_eq!(s.as_bytes()[0], b'h'); +/// +/// // The first byte is 240 which isn't obviously useful +/// let s = "💖💖💖💖💖"; +/// assert_eq!(s.as_bytes()[0], 240); +/// ``` +/// +/// Due to these ambiguities/restrictions, indexing with a `usize` is simply +/// forbidden: +/// +/// ```compile_fail,E0277 +/// let s = "hello"; +/// +/// // The following will not compile! +/// println!("The first letter of s is {}", s[0]); +/// ``` +/// +/// It is more clear, however, how `&s[i..j]` should work (that is, +/// indexing with a range). It should accept byte indices (to be constant-time) +/// and return a `&str` which is UTF-8 encoded. This is also called "string slicing". +/// Note this will panic if the byte indices provided are not character +/// boundaries - see [`is_char_boundary`] for more details. See the implementations +/// for [`SliceIndex<str>`] for more details on string slicing. For a non-panicking +/// version of string slicing, see [`get`]. +/// +/// [`OsString`]: ../../std/ffi/struct.OsString.html "ffi::OsString" +/// [`SliceIndex<str>`]: core::slice::SliceIndex +/// [`as_bytes()`]: str::as_bytes +/// [`get`]: str::get +/// [`is_char_boundary`]: str::is_char_boundary +/// +/// The [`bytes`] and [`chars`] methods return iterators over the bytes and +/// codepoints of the string, respectively. To iterate over codepoints along +/// with byte indices, use [`char_indices`]. +/// +/// [`bytes`]: str::bytes +/// [`chars`]: str::chars +/// [`char_indices`]: str::char_indices +/// +/// # Deref +/// +/// `String` implements <code>[Deref]<Target = [str]></code>, and so inherits all of [`str`]'s +/// methods. In addition, this means that you can pass a `String` to a +/// function which takes a [`&str`] by using an ampersand (`&`): +/// +/// ``` +/// fn takes_str(s: &str) { } +/// +/// let s = String::from("Hello"); +/// +/// takes_str(&s); +/// ``` +/// +/// This will create a [`&str`] from the `String` and pass it in. This +/// conversion is very inexpensive, and so generally, functions will accept +/// [`&str`]s as arguments unless they need a `String` for some specific +/// reason. +/// +/// In certain cases Rust doesn't have enough information to make this +/// conversion, known as [`Deref`] coercion. In the following example a string +/// slice [`&'a str`][`&str`] implements the trait `TraitExample`, and the function +/// `example_func` takes anything that implements the trait. In this case Rust +/// would need to make two implicit conversions, which Rust doesn't have the +/// means to do. For that reason, the following example will not compile. +/// +/// ```compile_fail,E0277 +/// trait TraitExample {} +/// +/// impl<'a> TraitExample for &'a str {} +/// +/// fn example_func<A: TraitExample>(example_arg: A) {} +/// +/// let example_string = String::from("example_string"); +/// example_func(&example_string); +/// ``` +/// +/// There are two options that would work instead. The first would be to +/// change the line `example_func(&example_string);` to +/// `example_func(example_string.as_str());`, using the method [`as_str()`] +/// to explicitly extract the string slice containing the string. The second +/// way changes `example_func(&example_string);` to +/// `example_func(&*example_string);`. In this case we are dereferencing a +/// `String` to a [`str`], then referencing the [`str`] back to +/// [`&str`]. The second way is more idiomatic, however both work to do the +/// conversion explicitly rather than relying on the implicit conversion. +/// +/// # Representation +/// +/// A `String` is made up of three components: a pointer to some bytes, a +/// length, and a capacity. The pointer points to an internal buffer `String` +/// uses to store its data. The length is the number of bytes currently stored +/// in the buffer, and the capacity is the size of the buffer in bytes. As such, +/// the length will always be less than or equal to the capacity. +/// +/// This buffer is always stored on the heap. +/// +/// You can look at these with the [`as_ptr`], [`len`], and [`capacity`] +/// methods: +/// +/// ``` +/// use std::mem; +/// +/// let story = String::from("Once upon a time..."); +/// +// FIXME Update this when vec_into_raw_parts is stabilized +/// // Prevent automatically dropping the String's data +/// let mut story = mem::ManuallyDrop::new(story); +/// +/// let ptr = story.as_mut_ptr(); +/// let len = story.len(); +/// let capacity = story.capacity(); +/// +/// // story has nineteen bytes +/// assert_eq!(19, len); +/// +/// // We can re-build a String out of ptr, len, and capacity. This is all +/// // unsafe because we are responsible for making sure the components are +/// // valid: +/// let s = unsafe { String::from_raw_parts(ptr, len, capacity) } ; +/// +/// assert_eq!(String::from("Once upon a time..."), s); +/// ``` +/// +/// [`as_ptr`]: str::as_ptr +/// [`len`]: String::len +/// [`capacity`]: String::capacity +/// +/// If a `String` has enough capacity, adding elements to it will not +/// re-allocate. For example, consider this program: +/// +/// ``` +/// let mut s = String::new(); +/// +/// println!("{}", s.capacity()); +/// +/// for _ in 0..5 { +/// s.push_str("hello"); +/// println!("{}", s.capacity()); +/// } +/// ``` +/// +/// This will output the following: +/// +/// ```text +/// 0 +/// 5 +/// 10 +/// 20 +/// 20 +/// 40 +/// ``` +/// +/// At first, we have no memory allocated at all, but as we append to the +/// string, it increases its capacity appropriately. If we instead use the +/// [`with_capacity`] method to allocate the correct capacity initially: +/// +/// ``` +/// let mut s = String::with_capacity(25); +/// +/// println!("{}", s.capacity()); +/// +/// for _ in 0..5 { +/// s.push_str("hello"); +/// println!("{}", s.capacity()); +/// } +/// ``` +/// +/// [`with_capacity`]: String::with_capacity +/// +/// We end up with a different output: +/// +/// ```text +/// 25 +/// 25 +/// 25 +/// 25 +/// 25 +/// 25 +/// ``` +/// +/// Here, there's no need to allocate more memory inside the loop. +/// +/// [str]: prim@str "str" +/// [`str`]: prim@str "str" +/// [`&str`]: prim@str "&str" +/// [Deref]: core::ops::Deref "ops::Deref" +/// [`Deref`]: core::ops::Deref "ops::Deref" +/// [`as_str()`]: String::as_str +#[derive(PartialOrd, Eq, Ord)] +#[cfg_attr(not(test), rustc_diagnostic_item = "String")] +#[stable(feature = "rust1", since = "1.0.0")] +pub struct String { + vec: Vec<u8>, +} + +/// A possible error value when converting a `String` from a UTF-8 byte vector. +/// +/// This type is the error type for the [`from_utf8`] method on [`String`]. It +/// is designed in such a way to carefully avoid reallocations: the +/// [`into_bytes`] method will give back the byte vector that was used in the +/// conversion attempt. +/// +/// [`from_utf8`]: String::from_utf8 +/// [`into_bytes`]: FromUtf8Error::into_bytes +/// +/// The [`Utf8Error`] type provided by [`std::str`] represents an error that may +/// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's +/// an analogue to `FromUtf8Error`, and you can get one from a `FromUtf8Error` +/// through the [`utf8_error`] method. +/// +/// [`Utf8Error`]: str::Utf8Error "std::str::Utf8Error" +/// [`std::str`]: core::str "std::str" +/// [`&str`]: prim@str "&str" +/// [`utf8_error`]: FromUtf8Error::utf8_error +/// +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// // some invalid bytes, in a vector +/// let bytes = vec![0, 159]; +/// +/// let value = String::from_utf8(bytes); +/// +/// assert!(value.is_err()); +/// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes()); +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +#[cfg_attr(not(no_global_oom_handling), derive(Clone))] +#[derive(Debug, PartialEq, Eq)] +pub struct FromUtf8Error { + bytes: Vec<u8>, + error: Utf8Error, +} + +/// A possible error value when converting a `String` from a UTF-16 byte slice. +/// +/// This type is the error type for the [`from_utf16`] method on [`String`]. +/// +/// [`from_utf16`]: String::from_utf16 +/// # Examples +/// +/// Basic usage: +/// +/// ``` +/// // 𝄞mu<invalid>ic +/// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, +/// 0xD800, 0x0069, 0x0063]; +/// +/// assert!(String::from_utf16(v).is_err()); +/// ``` +#[stable(feature = "rust1", since = "1.0.0")] +#[derive(Debug)] +pub struct FromUtf16Error(()); + +impl String { + /// Creates a new empty `String`. + /// + /// Given that the `String` is empty, this will not allocate any initial + /// buffer. While that means that this initial operation is very + /// inexpensive, it may cause excessive allocation later when you add + /// data. If you have an idea of how much data the `String` will hold, + /// consider the [`with_capacity`] method to prevent excessive + /// re-allocation. + /// + /// [`with_capacity`]: String::with_capacity + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::new(); + /// ``` + #[inline] + #[rustc_const_stable(feature = "const_string_new", since = "1.39.0")] + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + pub const fn new() -> String { + String { vec: Vec::new() } + } + + /// Creates a new empty `String` with a particular capacity. + /// + /// `String`s have an internal buffer to hold their data. The capacity is + /// the length of that buffer, and can be queried with the [`capacity`] + /// method. This method creates an empty `String`, but one with an initial + /// buffer that can hold `capacity` bytes. This is useful when you may be + /// appending a bunch of data to the `String`, reducing the number of + /// reallocations it needs to do. + /// + /// [`capacity`]: String::capacity + /// + /// If the given capacity is `0`, no allocation will occur, and this method + /// is identical to the [`new`] method. + /// + /// [`new`]: String::new + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// + /// // The String contains no chars, even though it has capacity for more + /// assert_eq!(s.len(), 0); + /// + /// // These are all done without reallocating... + /// let cap = s.capacity(); + /// for _ in 0..10 { + /// s.push('a'); + /// } + /// + /// assert_eq!(s.capacity(), cap); + /// + /// // ...but this may make the string reallocate + /// s.push('a'); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + #[must_use] + pub fn with_capacity(capacity: usize) -> String { + String { vec: Vec::with_capacity(capacity) } + } + + // HACK(japaric): with cfg(test) the inherent `[T]::to_vec` method, which is + // required for this method definition, is not available. Since we don't + // require this method for testing purposes, I'll just stub it + // NB see the slice::hack module in slice.rs for more information + #[inline] + #[cfg(test)] + pub fn from_str(_: &str) -> String { + panic!("not available with cfg(test)"); + } + + /// Converts a vector of bytes to a `String`. + /// + /// A string ([`String`]) is made of bytes ([`u8`]), and a vector of bytes + /// ([`Vec<u8>`]) is made of bytes, so this function converts between the + /// two. Not all byte slices are valid `String`s, however: `String` + /// requires that it is valid UTF-8. `from_utf8()` checks to ensure that + /// the bytes are valid UTF-8, and then does the conversion. + /// + /// If you are sure that the byte slice is valid UTF-8, and you don't want + /// to incur the overhead of the validity check, there is an unsafe version + /// of this function, [`from_utf8_unchecked`], which has the same behavior + /// but skips the check. + /// + /// This method will take care to not copy the vector, for efficiency's + /// sake. + /// + /// If you need a [`&str`] instead of a `String`, consider + /// [`str::from_utf8`]. + /// + /// The inverse of this method is [`into_bytes`]. + /// + /// # Errors + /// + /// Returns [`Err`] if the slice is not UTF-8 with a description as to why the + /// provided bytes are not UTF-8. The vector you moved in is also included. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// // We know these bytes are valid, so we'll use `unwrap()`. + /// let sparkle_heart = String::from_utf8(sparkle_heart).unwrap(); + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + /// + /// Incorrect bytes: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let sparkle_heart = vec![0, 159, 146, 150]; + /// + /// assert!(String::from_utf8(sparkle_heart).is_err()); + /// ``` + /// + /// See the docs for [`FromUtf8Error`] for more details on what you can do + /// with this error. + /// + /// [`from_utf8_unchecked`]: String::from_utf8_unchecked + /// [`Vec<u8>`]: crate::vec::Vec "Vec" + /// [`&str`]: prim@str "&str" + /// [`into_bytes`]: String::into_bytes + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf8(vec: Vec<u8>) -> Result<String, FromUtf8Error> { + match str::from_utf8(&vec) { + Ok(..) => Ok(String { vec }), + Err(e) => Err(FromUtf8Error { bytes: vec, error: e }), + } + } + + /// Converts a slice of bytes to a string, including invalid characters. + /// + /// Strings are made of bytes ([`u8`]), and a slice of bytes + /// ([`&[u8]`][byteslice]) is made of bytes, so this function converts + /// between the two. Not all byte slices are valid strings, however: strings + /// are required to be valid UTF-8. During this conversion, + /// `from_utf8_lossy()` will replace any invalid UTF-8 sequences with + /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD], which looks like this: � + /// + /// [byteslice]: prim@slice + /// [U+FFFD]: core::char::REPLACEMENT_CHARACTER + /// + /// If you are sure that the byte slice is valid UTF-8, and you don't want + /// to incur the overhead of the conversion, there is an unsafe version + /// of this function, [`from_utf8_unchecked`], which has the same behavior + /// but skips the checks. + /// + /// [`from_utf8_unchecked`]: String::from_utf8_unchecked + /// + /// This function returns a [`Cow<'a, str>`]. If our byte slice is invalid + /// UTF-8, then we need to insert the replacement characters, which will + /// change the size of the string, and hence, require a `String`. But if + /// it's already valid UTF-8, we don't need a new allocation. This return + /// type allows us to handle both cases. + /// + /// [`Cow<'a, str>`]: crate::borrow::Cow "borrow::Cow" + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// let sparkle_heart = String::from_utf8_lossy(&sparkle_heart); + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + /// + /// Incorrect bytes: + /// + /// ``` + /// // some invalid bytes + /// let input = b"Hello \xF0\x90\x80World"; + /// let output = String::from_utf8_lossy(input); + /// + /// assert_eq!("Hello �World", output); + /// ``` + #[must_use] + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf8_lossy(v: &[u8]) -> Cow<'_, str> { + let mut iter = lossy::Utf8Lossy::from_bytes(v).chunks(); + + let first_valid = if let Some(chunk) = iter.next() { + let lossy::Utf8LossyChunk { valid, broken } = chunk; + if broken.is_empty() { + debug_assert_eq!(valid.len(), v.len()); + return Cow::Borrowed(valid); + } + valid + } else { + return Cow::Borrowed(""); + }; + + const REPLACEMENT: &str = "\u{FFFD}"; + + let mut res = String::with_capacity(v.len()); + res.push_str(first_valid); + res.push_str(REPLACEMENT); + + for lossy::Utf8LossyChunk { valid, broken } in iter { + res.push_str(valid); + if !broken.is_empty() { + res.push_str(REPLACEMENT); + } + } + + Cow::Owned(res) + } + + /// Decode a UTF-16–encoded vector `v` into a `String`, returning [`Err`] + /// if `v` contains any invalid data. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // 𝄞music + /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, + /// 0x0073, 0x0069, 0x0063]; + /// assert_eq!(String::from("𝄞music"), + /// String::from_utf16(v).unwrap()); + /// + /// // 𝄞mu<invalid>ic + /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, + /// 0xD800, 0x0069, 0x0063]; + /// assert!(String::from_utf16(v).is_err()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf16(v: &[u16]) -> Result<String, FromUtf16Error> { + // This isn't done via collect::<Result<_, _>>() for performance reasons. + // FIXME: the function can be simplified again when #48994 is closed. + let mut ret = String::with_capacity(v.len()); + for c in decode_utf16(v.iter().cloned()) { + if let Ok(c) = c { + ret.push(c); + } else { + return Err(FromUtf16Error(())); + } + } + Ok(ret) + } + + /// Decode a UTF-16–encoded slice `v` into a `String`, replacing + /// invalid data with [the replacement character (`U+FFFD`)][U+FFFD]. + /// + /// Unlike [`from_utf8_lossy`] which returns a [`Cow<'a, str>`], + /// `from_utf16_lossy` returns a `String` since the UTF-16 to UTF-8 + /// conversion requires a memory allocation. + /// + /// [`from_utf8_lossy`]: String::from_utf8_lossy + /// [`Cow<'a, str>`]: crate::borrow::Cow "borrow::Cow" + /// [U+FFFD]: core::char::REPLACEMENT_CHARACTER + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // 𝄞mus<invalid>ic<invalid> + /// let v = &[0xD834, 0xDD1E, 0x006d, 0x0075, + /// 0x0073, 0xDD1E, 0x0069, 0x0063, + /// 0xD834]; + /// + /// assert_eq!(String::from("𝄞mus\u{FFFD}ic\u{FFFD}"), + /// String::from_utf16_lossy(v)); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[must_use] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn from_utf16_lossy(v: &[u16]) -> String { + decode_utf16(v.iter().cloned()).map(|r| r.unwrap_or(REPLACEMENT_CHARACTER)).collect() + } + + /// Decomposes a `String` into its raw components. + /// + /// Returns the raw pointer to the underlying data, the length of + /// the string (in bytes), and the allocated capacity of the data + /// (in bytes). These are the same arguments in the same order as + /// the arguments to [`from_raw_parts`]. + /// + /// After calling this function, the caller is responsible for the + /// memory previously managed by the `String`. The only way to do + /// this is to convert the raw pointer, length, and capacity back + /// into a `String` with the [`from_raw_parts`] function, allowing + /// the destructor to perform the cleanup. + /// + /// [`from_raw_parts`]: String::from_raw_parts + /// + /// # Examples + /// + /// ``` + /// #![feature(vec_into_raw_parts)] + /// let s = String::from("hello"); + /// + /// let (ptr, len, cap) = s.into_raw_parts(); + /// + /// let rebuilt = unsafe { String::from_raw_parts(ptr, len, cap) }; + /// assert_eq!(rebuilt, "hello"); + /// ``` + #[must_use = "`self` will be dropped if the result is not used"] + #[unstable(feature = "vec_into_raw_parts", reason = "new API", issue = "65816")] + pub fn into_raw_parts(self) -> (*mut u8, usize, usize) { + self.vec.into_raw_parts() + } + + /// Creates a new `String` from a length, capacity, and pointer. + /// + /// # Safety + /// + /// This is highly unsafe, due to the number of invariants that aren't + /// checked: + /// + /// * The memory at `buf` needs to have been previously allocated by the + /// same allocator the standard library uses, with a required alignment of exactly 1. + /// * `length` needs to be less than or equal to `capacity`. + /// * `capacity` needs to be the correct value. + /// * The first `length` bytes at `buf` need to be valid UTF-8. + /// + /// Violating these may cause problems like corrupting the allocator's + /// internal data structures. For example, it is normally **not** safe to + /// build a `String` from a pointer to a C `char` array containing UTF-8 + /// _unless_ you are certain that array was originally allocated by the + /// Rust standard library's allocator. + /// + /// The ownership of `buf` is effectively transferred to the + /// `String` which may then deallocate, reallocate or change the + /// contents of memory pointed to by the pointer at will. Ensure + /// that nothing else uses the pointer after calling this + /// function. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// use std::mem; + /// + /// unsafe { + /// let s = String::from("hello"); + /// + // FIXME Update this when vec_into_raw_parts is stabilized + /// // Prevent automatically dropping the String's data + /// let mut s = mem::ManuallyDrop::new(s); + /// + /// let ptr = s.as_mut_ptr(); + /// let len = s.len(); + /// let capacity = s.capacity(); + /// + /// let s = String::from_raw_parts(ptr, len, capacity); + /// + /// assert_eq!(String::from("hello"), s); + /// } + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn from_raw_parts(buf: *mut u8, length: usize, capacity: usize) -> String { + unsafe { String { vec: Vec::from_raw_parts(buf, length, capacity) } } + } + + /// Converts a vector of bytes to a `String` without checking that the + /// string contains valid UTF-8. + /// + /// See the safe version, [`from_utf8`], for more details. + /// + /// [`from_utf8`]: String::from_utf8 + /// + /// # Safety + /// + /// This function is unsafe because it does not check that the bytes passed + /// to it are valid UTF-8. If this constraint is violated, it may cause + /// memory unsafety issues with future users of the `String`, as the rest of + /// the standard library assumes that `String`s are valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some bytes, in a vector + /// let sparkle_heart = vec![240, 159, 146, 150]; + /// + /// let sparkle_heart = unsafe { + /// String::from_utf8_unchecked(sparkle_heart) + /// }; + /// + /// assert_eq!("💖", sparkle_heart); + /// ``` + #[inline] + #[must_use] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn from_utf8_unchecked(bytes: Vec<u8>) -> String { + String { vec: bytes } + } + + /// Converts a `String` into a byte vector. + /// + /// This consumes the `String`, so we do not need to copy its contents. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("hello"); + /// let bytes = s.into_bytes(); + /// + /// assert_eq!(&[104, 101, 108, 108, 111][..], &bytes[..]); + /// ``` + #[inline] + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_bytes(self) -> Vec<u8> { + self.vec + } + + /// Extracts a string slice containing the entire `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("foo"); + /// + /// assert_eq!("foo", s.as_str()); + /// ``` + #[inline] + #[must_use] + #[stable(feature = "string_as_str", since = "1.7.0")] + pub fn as_str(&self) -> &str { + self + } + + /// Converts a `String` into a mutable string slice. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foobar"); + /// let s_mut_str = s.as_mut_str(); + /// + /// s_mut_str.make_ascii_uppercase(); + /// + /// assert_eq!("FOOBAR", s_mut_str); + /// ``` + #[inline] + #[must_use] + #[stable(feature = "string_as_str", since = "1.7.0")] + pub fn as_mut_str(&mut self) -> &mut str { + self + } + + /// Appends a given string slice onto the end of this `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// s.push_str("bar"); + /// + /// assert_eq!("foobar", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn push_str(&mut self, string: &str) { + self.vec.extend_from_slice(string.as_bytes()) + } + + /// Copies elements from `src` range to the end of the string. + /// + /// ## Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// ## Examples + /// + /// ``` + /// #![feature(string_extend_from_within)] + /// let mut string = String::from("abcde"); + /// + /// string.extend_from_within(2..); + /// assert_eq!(string, "abcdecde"); + /// + /// string.extend_from_within(..2); + /// assert_eq!(string, "abcdecdeab"); + /// + /// string.extend_from_within(4..8); + /// assert_eq!(string, "abcdecdeabecde"); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "string_extend_from_within", issue = "none")] + pub fn extend_from_within<R>(&mut self, src: R) + where + R: RangeBounds<usize>, + { + let src @ Range { start, end } = slice::range(src, ..self.len()); + + assert!(self.is_char_boundary(start)); + assert!(self.is_char_boundary(end)); + + self.vec.extend_from_within(src); + } + + /// Returns this `String`'s capacity, in bytes. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::with_capacity(10); + /// + /// assert!(s.capacity() >= 10); + /// ``` + #[inline] + #[must_use] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn capacity(&self) -> usize { + self.vec.capacity() + } + + /// Ensures that this `String`'s capacity is at least `additional` bytes + /// larger than its length. + /// + /// The capacity may be increased by more than `additional` bytes if it + /// chooses, to prevent frequent reallocations. + /// + /// If you do not want this "at least" behavior, see the [`reserve_exact`] + /// method. + /// + /// # Panics + /// + /// Panics if the new capacity overflows [`usize`]. + /// + /// [`reserve_exact`]: String::reserve_exact + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::new(); + /// + /// s.reserve(10); + /// + /// assert!(s.capacity() >= 10); + /// ``` + /// + /// This might not actually increase the capacity: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// s.push('a'); + /// s.push('b'); + /// + /// // s now has a length of 2 and a capacity of 10 + /// assert_eq!(2, s.len()); + /// assert_eq!(10, s.capacity()); + /// + /// // Since we already have an extra 8 capacity, calling this... + /// s.reserve(8); + /// + /// // ... doesn't actually increase. + /// assert_eq!(10, s.capacity()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve(&mut self, additional: usize) { + self.vec.reserve(additional) + } + + /// Ensures that this `String`'s capacity is `additional` bytes + /// larger than its length. + /// + /// Consider using the [`reserve`] method unless you absolutely know + /// better than the allocator. + /// + /// [`reserve`]: String::reserve + /// + /// # Panics + /// + /// Panics if the new capacity overflows `usize`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::new(); + /// + /// s.reserve_exact(10); + /// + /// assert!(s.capacity() >= 10); + /// ``` + /// + /// This might not actually increase the capacity: + /// + /// ``` + /// let mut s = String::with_capacity(10); + /// s.push('a'); + /// s.push('b'); + /// + /// // s now has a length of 2 and a capacity of 10 + /// assert_eq!(2, s.len()); + /// assert_eq!(10, s.capacity()); + /// + /// // Since we already have an extra 8 capacity, calling this... + /// s.reserve_exact(8); + /// + /// // ... doesn't actually increase. + /// assert_eq!(10, s.capacity()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn reserve_exact(&mut self, additional: usize) { + self.vec.reserve_exact(additional) + } + + /// Tries to reserve capacity for at least `additional` more elements to be inserted + /// in the given `String`. The collection may reserve more space to avoid + /// frequent reallocations. After calling `reserve`, capacity will be + /// greater than or equal to `self.len() + additional`. Does nothing if + /// capacity is already sufficient. + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &str) -> Result<String, TryReserveError> { + /// let mut output = String::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// output.try_reserve(data.len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// output.push_str(data); + /// + /// Ok(output) + /// } + /// # process_data("rust").expect("why is the test harness OOMing on 4 bytes?"); + /// ``` + #[stable(feature = "try_reserve", since = "1.57.0")] + pub fn try_reserve(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.vec.try_reserve(additional) + } + + /// Tries to reserve the minimum capacity for exactly `additional` more elements to + /// be inserted in the given `String`. After calling `try_reserve_exact`, + /// capacity will be greater than or equal to `self.len() + additional`. + /// Does nothing if the capacity is already sufficient. + /// + /// Note that the allocator may give the collection more space than it + /// requests. Therefore, capacity can not be relied upon to be precisely + /// minimal. Prefer [`try_reserve`] if future insertions are expected. + /// + /// [`try_reserve`]: String::try_reserve + /// + /// # Errors + /// + /// If the capacity overflows, or the allocator reports a failure, then an error + /// is returned. + /// + /// # Examples + /// + /// ``` + /// use std::collections::TryReserveError; + /// + /// fn process_data(data: &str) -> Result<String, TryReserveError> { + /// let mut output = String::new(); + /// + /// // Pre-reserve the memory, exiting if we can't + /// output.try_reserve_exact(data.len())?; + /// + /// // Now we know this can't OOM in the middle of our complex work + /// output.push_str(data); + /// + /// Ok(output) + /// } + /// # process_data("rust").expect("why is the test harness OOMing on 4 bytes?"); + /// ``` + #[stable(feature = "try_reserve", since = "1.57.0")] + pub fn try_reserve_exact(&mut self, additional: usize) -> Result<(), TryReserveError> { + self.vec.try_reserve_exact(additional) + } + + /// Shrinks the capacity of this `String` to match its length. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to_fit(); + /// assert_eq!(3, s.capacity()); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn shrink_to_fit(&mut self) { + self.vec.shrink_to_fit() + } + + /// Shrinks the capacity of this `String` with a lower bound. + /// + /// The capacity will remain at least as large as both the length + /// and the supplied value. + /// + /// If the current capacity is less than the lower limit, this is a no-op. + /// + /// # Examples + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// s.reserve(100); + /// assert!(s.capacity() >= 100); + /// + /// s.shrink_to(10); + /// assert!(s.capacity() >= 10); + /// s.shrink_to(0); + /// assert!(s.capacity() >= 3); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "shrink_to", since = "1.56.0")] + pub fn shrink_to(&mut self, min_capacity: usize) { + self.vec.shrink_to(min_capacity) + } + + /// Appends the given [`char`] to the end of this `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("abc"); + /// + /// s.push('1'); + /// s.push('2'); + /// s.push('3'); + /// + /// assert_eq!("abc123", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn push(&mut self, ch: char) { + match ch.len_utf8() { + 1 => self.vec.push(ch as u8), + _ => self.vec.extend_from_slice(ch.encode_utf8(&mut [0; 4]).as_bytes()), + } + } + + /// Returns a byte slice of this `String`'s contents. + /// + /// The inverse of this method is [`from_utf8`]. + /// + /// [`from_utf8`]: String::from_utf8 + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("hello"); + /// + /// assert_eq!(&[104, 101, 108, 108, 111], s.as_bytes()); + /// ``` + #[inline] + #[must_use] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn as_bytes(&self) -> &[u8] { + &self.vec + } + + /// Shortens this `String` to the specified length. + /// + /// If `new_len` is greater than the string's current length, this has no + /// effect. + /// + /// Note that this method has no effect on the allocated capacity + /// of the string + /// + /// # Panics + /// + /// Panics if `new_len` does not lie on a [`char`] boundary. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("hello"); + /// + /// s.truncate(2); + /// + /// assert_eq!("he", s); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn truncate(&mut self, new_len: usize) { + if new_len <= self.len() { + assert!(self.is_char_boundary(new_len)); + self.vec.truncate(new_len) + } + } + + /// Removes the last character from the string buffer and returns it. + /// + /// Returns [`None`] if this `String` is empty. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// assert_eq!(s.pop(), Some('o')); + /// assert_eq!(s.pop(), Some('o')); + /// assert_eq!(s.pop(), Some('f')); + /// + /// assert_eq!(s.pop(), None); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn pop(&mut self) -> Option<char> { + let ch = self.chars().rev().next()?; + let newlen = self.len() - ch.len_utf8(); + unsafe { + self.vec.set_len(newlen); + } + Some(ch) + } + + /// Removes a [`char`] from this `String` at a byte position and returns it. + /// + /// This is an *O*(*n*) operation, as it requires copying every element in the + /// buffer. + /// + /// # Panics + /// + /// Panics if `idx` is larger than or equal to the `String`'s length, + /// or if it does not lie on a [`char`] boundary. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// assert_eq!(s.remove(0), 'f'); + /// assert_eq!(s.remove(1), 'o'); + /// assert_eq!(s.remove(0), 'o'); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn remove(&mut self, idx: usize) -> char { + let ch = match self[idx..].chars().next() { + Some(ch) => ch, + None => panic!("cannot remove a char from the end of a string"), + }; + + let next = idx + ch.len_utf8(); + let len = self.len(); + unsafe { + ptr::copy(self.vec.as_ptr().add(next), self.vec.as_mut_ptr().add(idx), len - next); + self.vec.set_len(len - (next - idx)); + } + ch + } + + /// Remove all matches of pattern `pat` in the `String`. + /// + /// # Examples + /// + /// ``` + /// #![feature(string_remove_matches)] + /// let mut s = String::from("Trees are not green, the sky is not blue."); + /// s.remove_matches("not "); + /// assert_eq!("Trees are green, the sky is blue.", s); + /// ``` + /// + /// Matches will be detected and removed iteratively, so in cases where + /// patterns overlap, only the first pattern will be removed: + /// + /// ``` + /// #![feature(string_remove_matches)] + /// let mut s = String::from("banana"); + /// s.remove_matches("ana"); + /// assert_eq!("bna", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[unstable(feature = "string_remove_matches", reason = "new API", issue = "72826")] + pub fn remove_matches<'a, P>(&'a mut self, pat: P) + where + P: for<'x> Pattern<'x>, + { + use core::str::pattern::Searcher; + + let rejections = { + let mut searcher = pat.into_searcher(self); + // Per Searcher::next: + // + // A Match result needs to contain the whole matched pattern, + // however Reject results may be split up into arbitrary many + // adjacent fragments. Both ranges may have zero length. + // + // In practice the implementation of Searcher::next_match tends to + // be more efficient, so we use it here and do some work to invert + // matches into rejections since that's what we want to copy below. + let mut front = 0; + let rejections: Vec<_> = from_fn(|| { + let (start, end) = searcher.next_match()?; + let prev_front = front; + front = end; + Some((prev_front, start)) + }) + .collect(); + rejections.into_iter().chain(core::iter::once((front, self.len()))) + }; + + let mut len = 0; + let ptr = self.vec.as_mut_ptr(); + + for (start, end) in rejections { + let count = end - start; + if start != len { + // SAFETY: per Searcher::next: + // + // The stream of Match and Reject values up to a Done will + // contain index ranges that are adjacent, non-overlapping, + // covering the whole haystack, and laying on utf8 + // boundaries. + unsafe { + ptr::copy(ptr.add(start), ptr.add(len), count); + } + } + len += count; + } + + unsafe { + self.vec.set_len(len); + } + } + + /// Retains only the characters specified by the predicate. + /// + /// In other words, remove all characters `c` such that `f(c)` returns `false`. + /// This method operates in place, visiting each character exactly once in the + /// original order, and preserves the order of the retained characters. + /// + /// # Examples + /// + /// ``` + /// let mut s = String::from("f_o_ob_ar"); + /// + /// s.retain(|c| c != '_'); + /// + /// assert_eq!(s, "foobar"); + /// ``` + /// + /// Because the elements are visited exactly once in the original order, + /// external state may be used to decide which elements to keep. + /// + /// ``` + /// let mut s = String::from("abcde"); + /// let keep = [false, true, true, false, true]; + /// let mut iter = keep.iter(); + /// s.retain(|_| *iter.next().unwrap()); + /// assert_eq!(s, "bce"); + /// ``` + #[inline] + #[stable(feature = "string_retain", since = "1.26.0")] + pub fn retain<F>(&mut self, mut f: F) + where + F: FnMut(char) -> bool, + { + struct SetLenOnDrop<'a> { + s: &'a mut String, + idx: usize, + del_bytes: usize, + } + + impl<'a> Drop for SetLenOnDrop<'a> { + fn drop(&mut self) { + let new_len = self.idx - self.del_bytes; + debug_assert!(new_len <= self.s.len()); + unsafe { self.s.vec.set_len(new_len) }; + } + } + + let len = self.len(); + let mut guard = SetLenOnDrop { s: self, idx: 0, del_bytes: 0 }; + + while guard.idx < len { + let ch = unsafe { guard.s.get_unchecked(guard.idx..len).chars().next().unwrap() }; + let ch_len = ch.len_utf8(); + + if !f(ch) { + guard.del_bytes += ch_len; + } else if guard.del_bytes > 0 { + unsafe { + ptr::copy( + guard.s.vec.as_ptr().add(guard.idx), + guard.s.vec.as_mut_ptr().add(guard.idx - guard.del_bytes), + ch_len, + ); + } + } + + // Point idx to the next char + guard.idx += ch_len; + } + + drop(guard); + } + + /// Inserts a character into this `String` at a byte position. + /// + /// This is an *O*(*n*) operation as it requires copying every element in the + /// buffer. + /// + /// # Panics + /// + /// Panics if `idx` is larger than the `String`'s length, or if it does not + /// lie on a [`char`] boundary. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::with_capacity(3); + /// + /// s.insert(0, 'f'); + /// s.insert(1, 'o'); + /// s.insert(2, 'o'); + /// + /// assert_eq!("foo", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn insert(&mut self, idx: usize, ch: char) { + assert!(self.is_char_boundary(idx)); + let mut bits = [0; 4]; + let bits = ch.encode_utf8(&mut bits).as_bytes(); + + unsafe { + self.insert_bytes(idx, bits); + } + } + + #[cfg(not(no_global_oom_handling))] + unsafe fn insert_bytes(&mut self, idx: usize, bytes: &[u8]) { + let len = self.len(); + let amt = bytes.len(); + self.vec.reserve(amt); + + unsafe { + ptr::copy(self.vec.as_ptr().add(idx), self.vec.as_mut_ptr().add(idx + amt), len - idx); + ptr::copy_nonoverlapping(bytes.as_ptr(), self.vec.as_mut_ptr().add(idx), amt); + self.vec.set_len(len + amt); + } + } + + /// Inserts a string slice into this `String` at a byte position. + /// + /// This is an *O*(*n*) operation as it requires copying every element in the + /// buffer. + /// + /// # Panics + /// + /// Panics if `idx` is larger than the `String`'s length, or if it does not + /// lie on a [`char`] boundary. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("bar"); + /// + /// s.insert_str(0, "foo"); + /// + /// assert_eq!("foobar", s); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "insert_str", since = "1.16.0")] + pub fn insert_str(&mut self, idx: usize, string: &str) { + assert!(self.is_char_boundary(idx)); + + unsafe { + self.insert_bytes(idx, string.as_bytes()); + } + } + + /// Returns a mutable reference to the contents of this `String`. + /// + /// # Safety + /// + /// This function is unsafe because the returned `&mut Vec` allows writing + /// bytes which are not valid UTF-8. If this constraint is violated, using + /// the original `String` after dropping the `&mut Vec` may violate memory + /// safety, as the rest of the standard library assumes that `String`s are + /// valid UTF-8. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("hello"); + /// + /// unsafe { + /// let vec = s.as_mut_vec(); + /// assert_eq!(&[104, 101, 108, 108, 111][..], &vec[..]); + /// + /// vec.reverse(); + /// } + /// assert_eq!(s, "olleh"); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub unsafe fn as_mut_vec(&mut self) -> &mut Vec<u8> { + &mut self.vec + } + + /// Returns the length of this `String`, in bytes, not [`char`]s or + /// graphemes. In other words, it might not be what a human considers the + /// length of the string. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let a = String::from("foo"); + /// assert_eq!(a.len(), 3); + /// + /// let fancy_f = String::from("ƒoo"); + /// assert_eq!(fancy_f.len(), 4); + /// assert_eq!(fancy_f.chars().count(), 3); + /// ``` + #[inline] + #[must_use] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn len(&self) -> usize { + self.vec.len() + } + + /// Returns `true` if this `String` has a length of zero, and `false` otherwise. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut v = String::new(); + /// assert!(v.is_empty()); + /// + /// v.push('a'); + /// assert!(!v.is_empty()); + /// ``` + #[inline] + #[must_use] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Splits the string into two at the given byte index. + /// + /// Returns a newly allocated `String`. `self` contains bytes `[0, at)`, and + /// the returned `String` contains bytes `[at, len)`. `at` must be on the + /// boundary of a UTF-8 code point. + /// + /// Note that the capacity of `self` does not change. + /// + /// # Panics + /// + /// Panics if `at` is not on a `UTF-8` code point boundary, or if it is beyond the last + /// code point of the string. + /// + /// # Examples + /// + /// ``` + /// # fn main() { + /// let mut hello = String::from("Hello, World!"); + /// let world = hello.split_off(7); + /// assert_eq!(hello, "Hello, "); + /// assert_eq!(world, "World!"); + /// # } + /// ``` + #[cfg(not(no_global_oom_handling))] + #[inline] + #[stable(feature = "string_split_off", since = "1.16.0")] + #[must_use = "use `.truncate()` if you don't need the other half"] + pub fn split_off(&mut self, at: usize) -> String { + assert!(self.is_char_boundary(at)); + let other = self.vec.split_off(at); + unsafe { String::from_utf8_unchecked(other) } + } + + /// Truncates this `String`, removing all contents. + /// + /// While this means the `String` will have a length of zero, it does not + /// touch its capacity. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("foo"); + /// + /// s.clear(); + /// + /// assert!(s.is_empty()); + /// assert_eq!(0, s.len()); + /// assert_eq!(3, s.capacity()); + /// ``` + #[inline] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn clear(&mut self) { + self.vec.clear() + } + + /// Removes the specified range from the string in bulk, returning all + /// removed characters as an iterator. + /// + /// The returned iterator keeps a mutable borrow on the string to optimize + /// its implementation. + /// + /// # Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// # Leaking + /// + /// If the returned iterator goes out of scope without being dropped (due to + /// [`core::mem::forget`], for example), the string may still contain a copy + /// of any drained characters, or may have lost characters arbitrarily, + /// including characters outside the range. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("α is alpha, β is beta"); + /// let beta_offset = s.find('β').unwrap_or(s.len()); + /// + /// // Remove the range up until the β from the string + /// let t: String = s.drain(..beta_offset).collect(); + /// assert_eq!(t, "α is alpha, "); + /// assert_eq!(s, "β is beta"); + /// + /// // A full range clears the string, like `clear()` does + /// s.drain(..); + /// assert_eq!(s, ""); + /// ``` + #[stable(feature = "drain", since = "1.6.0")] + pub fn drain<R>(&mut self, range: R) -> Drain<'_> + where + R: RangeBounds<usize>, + { + // Memory safety + // + // The String version of Drain does not have the memory safety issues + // of the vector version. The data is just plain bytes. + // Because the range removal happens in Drop, if the Drain iterator is leaked, + // the removal will not happen. + let Range { start, end } = slice::range(range, ..self.len()); + assert!(self.is_char_boundary(start)); + assert!(self.is_char_boundary(end)); + + // Take out two simultaneous borrows. The &mut String won't be accessed + // until iteration is over, in Drop. + let self_ptr = self as *mut _; + // SAFETY: `slice::range` and `is_char_boundary` do the appropriate bounds checks. + let chars_iter = unsafe { self.get_unchecked(start..end) }.chars(); + + Drain { start, end, iter: chars_iter, string: self_ptr } + } + + /// Removes the specified range in the string, + /// and replaces it with the given string. + /// The given string doesn't need to be the same length as the range. + /// + /// # Panics + /// + /// Panics if the starting point or end point do not lie on a [`char`] + /// boundary, or if they're out of bounds. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let mut s = String::from("α is alpha, β is beta"); + /// let beta_offset = s.find('β').unwrap_or(s.len()); + /// + /// // Replace the range up until the β from the string + /// s.replace_range(..beta_offset, "Α is capital alpha; "); + /// assert_eq!(s, "Α is capital alpha; β is beta"); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "splice", since = "1.27.0")] + pub fn replace_range<R>(&mut self, range: R, replace_with: &str) + where + R: RangeBounds<usize>, + { + // Memory safety + // + // Replace_range does not have the memory safety issues of a vector Splice. + // of the vector version. The data is just plain bytes. + + // WARNING: Inlining this variable would be unsound (#81138) + let start = range.start_bound(); + match start { + Included(&n) => assert!(self.is_char_boundary(n)), + Excluded(&n) => assert!(self.is_char_boundary(n + 1)), + Unbounded => {} + }; + // WARNING: Inlining this variable would be unsound (#81138) + let end = range.end_bound(); + match end { + Included(&n) => assert!(self.is_char_boundary(n + 1)), + Excluded(&n) => assert!(self.is_char_boundary(n)), + Unbounded => {} + }; + + // Using `range` again would be unsound (#81138) + // We assume the bounds reported by `range` remain the same, but + // an adversarial implementation could change between calls + unsafe { self.as_mut_vec() }.splice((start, end), replace_with.bytes()); + } + + /// Converts this `String` into a <code>[Box]<[str]></code>. + /// + /// This will drop any excess capacity. + /// + /// [str]: prim@str "str" + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s = String::from("hello"); + /// + /// let b = s.into_boxed_str(); + /// ``` + #[cfg(not(no_global_oom_handling))] + #[stable(feature = "box_str", since = "1.4.0")] + #[must_use = "`self` will be dropped if the result is not used"] + #[inline] + pub fn into_boxed_str(self) -> Box<str> { + let slice = self.vec.into_boxed_slice(); + unsafe { from_boxed_utf8_unchecked(slice) } + } +} + +impl FromUtf8Error { + /// Returns a slice of [`u8`]s bytes that were attempted to convert to a `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let bytes = vec![0, 159]; + /// + /// let value = String::from_utf8(bytes); + /// + /// assert_eq!(&[0, 159], value.unwrap_err().as_bytes()); + /// ``` + #[must_use] + #[stable(feature = "from_utf8_error_as_bytes", since = "1.26.0")] + pub fn as_bytes(&self) -> &[u8] { + &self.bytes[..] + } + + /// Returns the bytes that were attempted to convert to a `String`. + /// + /// This method is carefully constructed to avoid allocation. It will + /// consume the error, moving out the bytes, so that a copy of the bytes + /// does not need to be made. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let bytes = vec![0, 159]; + /// + /// let value = String::from_utf8(bytes); + /// + /// assert_eq!(vec![0, 159], value.unwrap_err().into_bytes()); + /// ``` + #[must_use = "`self` will be dropped if the result is not used"] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn into_bytes(self) -> Vec<u8> { + self.bytes + } + + /// Fetch a `Utf8Error` to get more details about the conversion failure. + /// + /// The [`Utf8Error`] type provided by [`std::str`] represents an error that may + /// occur when converting a slice of [`u8`]s to a [`&str`]. In this sense, it's + /// an analogue to `FromUtf8Error`. See its documentation for more details + /// on using it. + /// + /// [`std::str`]: core::str "std::str" + /// [`&str`]: prim@str "&str" + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// // some invalid bytes, in a vector + /// let bytes = vec![0, 159]; + /// + /// let error = String::from_utf8(bytes).unwrap_err().utf8_error(); + /// + /// // the first byte is invalid here + /// assert_eq!(1, error.valid_up_to()); + /// ``` + #[must_use] + #[stable(feature = "rust1", since = "1.0.0")] + pub fn utf8_error(&self) -> Utf8Error { + self.error + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for FromUtf8Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&self.error, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for FromUtf16Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt("invalid utf-16: lone surrogate found", f) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Clone for String { + fn clone(&self) -> Self { + String { vec: self.vec.clone() } + } + + fn clone_from(&mut self, source: &Self) { + self.vec.clone_from(&source.vec); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl FromIterator<char> for String { + fn from_iter<I: IntoIterator<Item = char>>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "string_from_iter_by_ref", since = "1.17.0")] +impl<'a> FromIterator<&'a char> for String { + fn from_iter<I: IntoIterator<Item = &'a char>>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> FromIterator<&'a str> for String { + fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "extend_string", since = "1.4.0")] +impl FromIterator<String> for String { + fn from_iter<I: IntoIterator<Item = String>>(iter: I) -> String { + let mut iterator = iter.into_iter(); + + // Because we're iterating over `String`s, we can avoid at least + // one allocation by getting the first string from the iterator + // and appending to it all the subsequent strings. + match iterator.next() { + None => String::new(), + Some(mut buf) => { + buf.extend(iterator); + buf + } + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_str2", since = "1.45.0")] +impl FromIterator<Box<str>> for String { + fn from_iter<I: IntoIterator<Item = Box<str>>>(iter: I) -> String { + let mut buf = String::new(); + buf.extend(iter); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "herd_cows", since = "1.19.0")] +impl<'a> FromIterator<Cow<'a, str>> for String { + fn from_iter<I: IntoIterator<Item = Cow<'a, str>>>(iter: I) -> String { + let mut iterator = iter.into_iter(); + + // Because we're iterating over CoWs, we can (potentially) avoid at least + // one allocation by getting the first item and appending to it all the + // subsequent items. + match iterator.next() { + None => String::new(), + Some(cow) => { + let mut buf = cow.into_owned(); + buf.extend(iterator); + buf + } + } + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Extend<char> for String { + fn extend<I: IntoIterator<Item = char>>(&mut self, iter: I) { + let iterator = iter.into_iter(); + let (lower_bound, _) = iterator.size_hint(); + self.reserve(lower_bound); + iterator.for_each(move |c| self.push(c)); + } + + #[inline] + fn extend_one(&mut self, c: char) { + self.push(c); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + self.reserve(additional); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "extend_ref", since = "1.2.0")] +impl<'a> Extend<&'a char> for String { + fn extend<I: IntoIterator<Item = &'a char>>(&mut self, iter: I) { + self.extend(iter.into_iter().cloned()); + } + + #[inline] + fn extend_one(&mut self, &c: &'a char) { + self.push(c); + } + + #[inline] + fn extend_reserve(&mut self, additional: usize) { + self.reserve(additional); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> Extend<&'a str> for String { + fn extend<I: IntoIterator<Item = &'a str>>(&mut self, iter: I) { + iter.into_iter().for_each(move |s| self.push_str(s)); + } + + #[inline] + fn extend_one(&mut self, s: &'a str) { + self.push_str(s); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_str2", since = "1.45.0")] +impl Extend<Box<str>> for String { + fn extend<I: IntoIterator<Item = Box<str>>>(&mut self, iter: I) { + iter.into_iter().for_each(move |s| self.push_str(&s)); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "extend_string", since = "1.4.0")] +impl Extend<String> for String { + fn extend<I: IntoIterator<Item = String>>(&mut self, iter: I) { + iter.into_iter().for_each(move |s| self.push_str(&s)); + } + + #[inline] + fn extend_one(&mut self, s: String) { + self.push_str(&s); + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "herd_cows", since = "1.19.0")] +impl<'a> Extend<Cow<'a, str>> for String { + fn extend<I: IntoIterator<Item = Cow<'a, str>>>(&mut self, iter: I) { + iter.into_iter().for_each(move |s| self.push_str(&s)); + } + + #[inline] + fn extend_one(&mut self, s: Cow<'a, str>) { + self.push_str(&s); + } +} + +/// A convenience impl that delegates to the impl for `&str`. +/// +/// # Examples +/// +/// ``` +/// assert_eq!(String::from("Hello world").find("world"), Some(6)); +/// ``` +#[unstable( + feature = "pattern", + reason = "API not fully fleshed out and ready to be stabilized", + issue = "27721" +)] +impl<'a, 'b> Pattern<'a> for &'b String { + type Searcher = <&'b str as Pattern<'a>>::Searcher; + + fn into_searcher(self, haystack: &'a str) -> <&'b str as Pattern<'a>>::Searcher { + self[..].into_searcher(haystack) + } + + #[inline] + fn is_contained_in(self, haystack: &'a str) -> bool { + self[..].is_contained_in(haystack) + } + + #[inline] + fn is_prefix_of(self, haystack: &'a str) -> bool { + self[..].is_prefix_of(haystack) + } + + #[inline] + fn strip_prefix_of(self, haystack: &'a str) -> Option<&'a str> { + self[..].strip_prefix_of(haystack) + } + + #[inline] + fn is_suffix_of(self, haystack: &'a str) -> bool { + self[..].is_suffix_of(haystack) + } + + #[inline] + fn strip_suffix_of(self, haystack: &'a str) -> Option<&'a str> { + self[..].strip_suffix_of(haystack) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl PartialEq for String { + #[inline] + fn eq(&self, other: &String) -> bool { + PartialEq::eq(&self[..], &other[..]) + } + #[inline] + fn ne(&self, other: &String) -> bool { + PartialEq::ne(&self[..], &other[..]) + } +} + +macro_rules! impl_eq { + ($lhs:ty, $rhs: ty) => { + #[stable(feature = "rust1", since = "1.0.0")] + #[allow(unused_lifetimes)] + impl<'a, 'b> PartialEq<$rhs> for $lhs { + #[inline] + fn eq(&self, other: &$rhs) -> bool { + PartialEq::eq(&self[..], &other[..]) + } + #[inline] + fn ne(&self, other: &$rhs) -> bool { + PartialEq::ne(&self[..], &other[..]) + } + } + + #[stable(feature = "rust1", since = "1.0.0")] + #[allow(unused_lifetimes)] + impl<'a, 'b> PartialEq<$lhs> for $rhs { + #[inline] + fn eq(&self, other: &$lhs) -> bool { + PartialEq::eq(&self[..], &other[..]) + } + #[inline] + fn ne(&self, other: &$lhs) -> bool { + PartialEq::ne(&self[..], &other[..]) + } + } + }; +} + +impl_eq! { String, str } +impl_eq! { String, &'a str } +#[cfg(not(no_global_oom_handling))] +impl_eq! { Cow<'a, str>, str } +#[cfg(not(no_global_oom_handling))] +impl_eq! { Cow<'a, str>, &'b str } +#[cfg(not(no_global_oom_handling))] +impl_eq! { Cow<'a, str>, String } + +#[stable(feature = "rust1", since = "1.0.0")] +#[rustc_const_unstable(feature = "const_default_impls", issue = "87864")] +impl const Default for String { + /// Creates an empty `String`. + #[inline] + fn default() -> String { + String::new() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Display for String { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Debug for String { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&**self, f) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl hash::Hash for String { + #[inline] + fn hash<H: hash::Hasher>(&self, hasher: &mut H) { + (**self).hash(hasher) + } +} + +/// Implements the `+` operator for concatenating two strings. +/// +/// This consumes the `String` on the left-hand side and re-uses its buffer (growing it if +/// necessary). This is done to avoid allocating a new `String` and copying the entire contents on +/// every operation, which would lead to *O*(*n*^2) running time when building an *n*-byte string by +/// repeated concatenation. +/// +/// The string on the right-hand side is only borrowed; its contents are copied into the returned +/// `String`. +/// +/// # Examples +/// +/// Concatenating two `String`s takes the first by value and borrows the second: +/// +/// ``` +/// let a = String::from("hello"); +/// let b = String::from(" world"); +/// let c = a + &b; +/// // `a` is moved and can no longer be used here. +/// ``` +/// +/// If you want to keep using the first `String`, you can clone it and append to the clone instead: +/// +/// ``` +/// let a = String::from("hello"); +/// let b = String::from(" world"); +/// let c = a.clone() + &b; +/// // `a` is still valid here. +/// ``` +/// +/// Concatenating `&str` slices can be done by converting the first to a `String`: +/// +/// ``` +/// let a = "hello"; +/// let b = " world"; +/// let c = a.to_string() + b; +/// ``` +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl Add<&str> for String { + type Output = String; + + #[inline] + fn add(mut self, other: &str) -> String { + self.push_str(other); + self + } +} + +/// Implements the `+=` operator for appending to a `String`. +/// +/// This has the same behavior as the [`push_str`][String::push_str] method. +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "stringaddassign", since = "1.12.0")] +impl AddAssign<&str> for String { + #[inline] + fn add_assign(&mut self, other: &str) { + self.push_str(other); + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index<ops::Range<usize>> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::Range<usize>) -> &str { + &self[..][index] + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index<ops::RangeTo<usize>> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeTo<usize>) -> &str { + &self[..][index] + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index<ops::RangeFrom<usize>> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeFrom<usize>) -> &str { + &self[..][index] + } +} +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Index<ops::RangeFull> for String { + type Output = str; + + #[inline] + fn index(&self, _index: ops::RangeFull) -> &str { + unsafe { str::from_utf8_unchecked(&self.vec) } + } +} +#[stable(feature = "inclusive_range", since = "1.26.0")] +impl ops::Index<ops::RangeInclusive<usize>> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeInclusive<usize>) -> &str { + Index::index(&**self, index) + } +} +#[stable(feature = "inclusive_range", since = "1.26.0")] +impl ops::Index<ops::RangeToInclusive<usize>> for String { + type Output = str; + + #[inline] + fn index(&self, index: ops::RangeToInclusive<usize>) -> &str { + Index::index(&**self, index) + } +} + +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut<ops::Range<usize>> for String { + #[inline] + fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str { + &mut self[..][index] + } +} +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut<ops::RangeTo<usize>> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str { + &mut self[..][index] + } +} +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut<ops::RangeFrom<usize>> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str { + &mut self[..][index] + } +} +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::IndexMut<ops::RangeFull> for String { + #[inline] + fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str { + unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) } + } +} +#[stable(feature = "inclusive_range", since = "1.26.0")] +impl ops::IndexMut<ops::RangeInclusive<usize>> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str { + IndexMut::index_mut(&mut **self, index) + } +} +#[stable(feature = "inclusive_range", since = "1.26.0")] +impl ops::IndexMut<ops::RangeToInclusive<usize>> for String { + #[inline] + fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str { + IndexMut::index_mut(&mut **self, index) + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl ops::Deref for String { + type Target = str; + + #[inline] + fn deref(&self) -> &str { + unsafe { str::from_utf8_unchecked(&self.vec) } + } +} + +#[stable(feature = "derefmut_for_string", since = "1.3.0")] +impl ops::DerefMut for String { + #[inline] + fn deref_mut(&mut self) -> &mut str { + unsafe { str::from_utf8_unchecked_mut(&mut *self.vec) } + } +} + +/// A type alias for [`Infallible`]. +/// +/// This alias exists for backwards compatibility, and may be eventually deprecated. +/// +/// [`Infallible`]: core::convert::Infallible "convert::Infallible" +#[stable(feature = "str_parse_error", since = "1.5.0")] +pub type ParseError = core::convert::Infallible; + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl FromStr for String { + type Err = core::convert::Infallible; + #[inline] + fn from_str(s: &str) -> Result<String, Self::Err> { + Ok(String::from(s)) + } +} + +/// A trait for converting a value to a `String`. +/// +/// This trait is automatically implemented for any type which implements the +/// [`Display`] trait. As such, `ToString` shouldn't be implemented directly: +/// [`Display`] should be implemented instead, and you get the `ToString` +/// implementation for free. +/// +/// [`Display`]: fmt::Display +#[cfg_attr(not(test), rustc_diagnostic_item = "ToString")] +#[stable(feature = "rust1", since = "1.0.0")] +pub trait ToString { + /// Converts the given value to a `String`. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let i = 5; + /// let five = String::from("5"); + /// + /// assert_eq!(five, i.to_string()); + /// ``` + #[rustc_conversion_suggestion] + #[stable(feature = "rust1", since = "1.0.0")] + fn to_string(&self) -> String; +} + +/// # Panics +/// +/// In this implementation, the `to_string` method panics +/// if the `Display` implementation returns an error. +/// This indicates an incorrect `Display` implementation +/// since `fmt::Write for String` never returns an error itself. +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<T: fmt::Display + ?Sized> ToString for T { + // A common guideline is to not inline generic functions. However, + // removing `#[inline]` from this method causes non-negligible regressions. + // See <https://github.com/rust-lang/rust/pull/74852>, the last attempt + // to try to remove it. + #[inline] + default fn to_string(&self) -> String { + let mut buf = String::new(); + let mut formatter = core::fmt::Formatter::new(&mut buf); + // Bypass format_args!() to avoid write_str with zero-length strs + fmt::Display::fmt(self, &mut formatter) + .expect("a Display implementation returned an error unexpectedly"); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "char_to_string_specialization", since = "1.46.0")] +impl ToString for char { + #[inline] + fn to_string(&self) -> String { + String::from(self.encode_utf8(&mut [0; 4])) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "u8_to_string_specialization", since = "1.54.0")] +impl ToString for u8 { + #[inline] + fn to_string(&self) -> String { + let mut buf = String::with_capacity(3); + let mut n = *self; + if n >= 10 { + if n >= 100 { + buf.push((b'0' + n / 100) as char); + n %= 100; + } + buf.push((b'0' + n / 10) as char); + n %= 10; + } + buf.push((b'0' + n) as char); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "i8_to_string_specialization", since = "1.54.0")] +impl ToString for i8 { + #[inline] + fn to_string(&self) -> String { + let mut buf = String::with_capacity(4); + if self.is_negative() { + buf.push('-'); + } + let mut n = self.unsigned_abs(); + if n >= 10 { + if n >= 100 { + buf.push('1'); + n -= 100; + } + buf.push((b'0' + n / 10) as char); + n %= 10; + } + buf.push((b'0' + n) as char); + buf + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "str_to_string_specialization", since = "1.9.0")] +impl ToString for str { + #[inline] + fn to_string(&self) -> String { + String::from(self) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_str_to_string_specialization", since = "1.17.0")] +impl ToString for Cow<'_, str> { + #[inline] + fn to_string(&self) -> String { + self[..].to_owned() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "string_to_string_specialization", since = "1.17.0")] +impl ToString for String { + #[inline] + fn to_string(&self) -> String { + self.to_owned() + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef<str> for String { + #[inline] + fn as_ref(&self) -> &str { + self + } +} + +#[stable(feature = "string_as_mut", since = "1.43.0")] +impl AsMut<str> for String { + #[inline] + fn as_mut(&mut self) -> &mut str { + self + } +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl AsRef<[u8]> for String { + #[inline] + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl From<&str> for String { + /// Converts a `&str` into a [`String`]. + /// + /// The result is allocated on the heap. + #[inline] + fn from(s: &str) -> String { + s.to_owned() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_mut_str_for_string", since = "1.44.0")] +impl From<&mut str> for String { + /// Converts a `&mut str` into a [`String`]. + /// + /// The result is allocated on the heap. + #[inline] + fn from(s: &mut str) -> String { + s.to_owned() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_ref_string", since = "1.35.0")] +impl From<&String> for String { + /// Converts a `&String` into a [`String`]. + /// + /// This clones `s` and returns the clone. + #[inline] + fn from(s: &String) -> String { + s.clone() + } +} + +// note: test pulls in libstd, which causes errors here +#[cfg(not(test))] +#[stable(feature = "string_from_box", since = "1.18.0")] +impl From<Box<str>> for String { + /// Converts the given boxed `str` slice to a [`String`]. + /// It is notable that the `str` slice is owned. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s1: String = String::from("hello world"); + /// let s2: Box<str> = s1.into_boxed_str(); + /// let s3: String = String::from(s2); + /// + /// assert_eq!("hello world", s3) + /// ``` + fn from(s: Box<str>) -> String { + s.into_string() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "box_from_str", since = "1.20.0")] +impl From<String> for Box<str> { + /// Converts the given [`String`] to a boxed `str` slice that is owned. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s1: String = String::from("hello world"); + /// let s2: Box<str> = Box::from(s1); + /// let s3: String = String::from(s2); + /// + /// assert_eq!("hello world", s3) + /// ``` + fn from(s: String) -> Box<str> { + s.into_boxed_str() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "string_from_cow_str", since = "1.14.0")] +impl<'a> From<Cow<'a, str>> for String { + /// Converts a clone-on-write string to an owned + /// instance of [`String`]. + /// + /// This extracts the owned string, + /// clones the string if it is not already owned. + /// + /// # Example + /// + /// ``` + /// # use std::borrow::Cow; + /// // If the string is not owned... + /// let cow: Cow<str> = Cow::Borrowed("eggplant"); + /// // It will allocate on the heap and copy the string. + /// let owned: String = String::from(cow); + /// assert_eq!(&owned[..], "eggplant"); + /// ``` + fn from(s: Cow<'a, str>) -> String { + s.into_owned() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> From<&'a str> for Cow<'a, str> { + /// Converts a string slice into a [`Borrowed`] variant. + /// No heap allocation is performed, and the string + /// is not copied. + /// + /// # Example + /// + /// ``` + /// # use std::borrow::Cow; + /// assert_eq!(Cow::from("eggplant"), Cow::Borrowed("eggplant")); + /// ``` + /// + /// [`Borrowed`]: crate::borrow::Cow::Borrowed "borrow::Cow::Borrowed" + #[inline] + fn from(s: &'a str) -> Cow<'a, str> { + Cow::Borrowed(s) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl<'a> From<String> for Cow<'a, str> { + /// Converts a [`String`] into an [`Owned`] variant. + /// No heap allocation is performed, and the string + /// is not copied. + /// + /// # Example + /// + /// ``` + /// # use std::borrow::Cow; + /// let s = "eggplant".to_string(); + /// let s2 = "eggplant".to_string(); + /// assert_eq!(Cow::from(s), Cow::<'static, str>::Owned(s2)); + /// ``` + /// + /// [`Owned`]: crate::borrow::Cow::Owned "borrow::Cow::Owned" + #[inline] + fn from(s: String) -> Cow<'a, str> { + Cow::Owned(s) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_from_string_ref", since = "1.28.0")] +impl<'a> From<&'a String> for Cow<'a, str> { + /// Converts a [`String`] reference into a [`Borrowed`] variant. + /// No heap allocation is performed, and the string + /// is not copied. + /// + /// # Example + /// + /// ``` + /// # use std::borrow::Cow; + /// let s = "eggplant".to_string(); + /// assert_eq!(Cow::from(&s), Cow::Borrowed("eggplant")); + /// ``` + /// + /// [`Borrowed`]: crate::borrow::Cow::Borrowed "borrow::Cow::Borrowed" + #[inline] + fn from(s: &'a String) -> Cow<'a, str> { + Cow::Borrowed(s.as_str()) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_str_from_iter", since = "1.12.0")] +impl<'a> FromIterator<char> for Cow<'a, str> { + fn from_iter<I: IntoIterator<Item = char>>(it: I) -> Cow<'a, str> { + Cow::Owned(FromIterator::from_iter(it)) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_str_from_iter", since = "1.12.0")] +impl<'a, 'b> FromIterator<&'b str> for Cow<'a, str> { + fn from_iter<I: IntoIterator<Item = &'b str>>(it: I) -> Cow<'a, str> { + Cow::Owned(FromIterator::from_iter(it)) + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "cow_str_from_iter", since = "1.12.0")] +impl<'a> FromIterator<String> for Cow<'a, str> { + fn from_iter<I: IntoIterator<Item = String>>(it: I) -> Cow<'a, str> { + Cow::Owned(FromIterator::from_iter(it)) + } +} + +#[stable(feature = "from_string_for_vec_u8", since = "1.14.0")] +impl From<String> for Vec<u8> { + /// Converts the given [`String`] to a vector [`Vec`] that holds values of type [`u8`]. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// let s1 = String::from("hello world"); + /// let v1 = Vec::from(s1); + /// + /// for b in v1 { + /// println!("{b}"); + /// } + /// ``` + fn from(string: String) -> Vec<u8> { + string.into_bytes() + } +} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "rust1", since = "1.0.0")] +impl fmt::Write for String { + #[inline] + fn write_str(&mut self, s: &str) -> fmt::Result { + self.push_str(s); + Ok(()) + } + + #[inline] + fn write_char(&mut self, c: char) -> fmt::Result { + self.push(c); + Ok(()) + } +} + +/// A draining iterator for `String`. +/// +/// This struct is created by the [`drain`] method on [`String`]. See its +/// documentation for more. +/// +/// [`drain`]: String::drain +#[stable(feature = "drain", since = "1.6.0")] +pub struct Drain<'a> { + /// Will be used as &'a mut String in the destructor + string: *mut String, + /// Start of part to remove + start: usize, + /// End of part to remove + end: usize, + /// Current remaining range to remove + iter: Chars<'a>, +} + +#[stable(feature = "collection_debug", since = "1.17.0")] +impl fmt::Debug for Drain<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Drain").field(&self.as_str()).finish() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl Sync for Drain<'_> {} +#[stable(feature = "drain", since = "1.6.0")] +unsafe impl Send for Drain<'_> {} + +#[stable(feature = "drain", since = "1.6.0")] +impl Drop for Drain<'_> { + fn drop(&mut self) { + unsafe { + // Use Vec::drain. "Reaffirm" the bounds checks to avoid + // panic code being inserted again. + let self_vec = (*self.string).as_mut_vec(); + if self.start <= self.end && self.end <= self_vec.len() { + self_vec.drain(self.start..self.end); + } + } + } +} + +impl<'a> Drain<'a> { + /// Returns the remaining (sub)string of this iterator as a slice. + /// + /// # Examples + /// + /// ``` + /// let mut s = String::from("abc"); + /// let mut drain = s.drain(..); + /// assert_eq!(drain.as_str(), "abc"); + /// let _ = drain.next().unwrap(); + /// assert_eq!(drain.as_str(), "bc"); + /// ``` + #[must_use] + #[stable(feature = "string_drain_as_str", since = "1.55.0")] + pub fn as_str(&self) -> &str { + self.iter.as_str() + } +} + +#[stable(feature = "string_drain_as_str", since = "1.55.0")] +impl<'a> AsRef<str> for Drain<'a> { + fn as_ref(&self) -> &str { + self.as_str() + } +} + +#[stable(feature = "string_drain_as_str", since = "1.55.0")] +impl<'a> AsRef<[u8]> for Drain<'a> { + fn as_ref(&self) -> &[u8] { + self.as_str().as_bytes() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl Iterator for Drain<'_> { + type Item = char; + + #[inline] + fn next(&mut self) -> Option<char> { + self.iter.next() + } + + fn size_hint(&self) -> (usize, Option<usize>) { + self.iter.size_hint() + } + + #[inline] + fn last(mut self) -> Option<char> { + self.next_back() + } +} + +#[stable(feature = "drain", since = "1.6.0")] +impl DoubleEndedIterator for Drain<'_> { + #[inline] + fn next_back(&mut self) -> Option<char> { + self.iter.next_back() + } +} + +#[stable(feature = "fused", since = "1.26.0")] +impl FusedIterator for Drain<'_> {} + +#[cfg(not(no_global_oom_handling))] +#[stable(feature = "from_char_for_string", since = "1.46.0")] +impl From<char> for String { + /// Allocates an owned [`String`] from a single character. + /// + /// # Example + /// ```rust + /// let c: char = 'a'; + /// let s: String = String::from(c); + /// assert_eq!("a", &s[..]); + /// ``` + #[inline] + fn from(c: char) -> Self { + c.to_string() + } +} diff --git a/rust/alloc/vec/into_iter.rs b/rust/alloc/vec/into_iter.rs index f7a50e76691e..09cfee0ae631 100644 --- a/rust/alloc/vec/into_iter.rs +++ b/rust/alloc/vec/into_iter.rs @@ -126,7 +126,6 @@ impl<T, A: Allocator> IntoIter<T, A> { } /// Forgets to Drop the remaining elements while still allowing the backing allocation to be freed. - #[allow(dead_code)] pub(crate) fn forget_remaining_elements(&mut self) { self.ptr = self.end; } diff --git a/rust/alloc/vec/mod.rs b/rust/alloc/vec/mod.rs index 540787804cc2..4ae81b890fd9 100644 --- a/rust/alloc/vec/mod.rs +++ b/rust/alloc/vec/mod.rs @@ -120,10 +120,8 @@ use self::spec_from_elem::SpecFromElem; #[cfg(not(no_global_oom_handling))] mod spec_from_elem; -#[cfg(not(no_global_oom_handling))] use self::set_len_on_drop::SetLenOnDrop; -#[cfg(not(no_global_oom_handling))] mod set_len_on_drop; #[cfg(not(no_global_oom_handling))] @@ -147,7 +145,8 @@ mod spec_from_iter; #[cfg(not(no_global_oom_handling))] use self::spec_extend::SpecExtend; -#[cfg(not(no_global_oom_handling))] +use self::spec_extend::TrySpecExtend; + mod spec_extend; /// A contiguous growable array type, written as `Vec<T>`, short for 'vector'. @@ -472,6 +471,48 @@ impl<T> Vec<T> { Self::with_capacity_in(capacity, Global) } + /// Tries to construct a new, empty `Vec<T>` with the specified capacity. + /// + /// The vector will be able to hold exactly `capacity` elements without + /// reallocating. If `capacity` is 0, the vector will not allocate. + /// + /// It is important to note that although the returned vector has the + /// *capacity* specified, the vector will have a zero *length*. For an + /// explanation of the difference between length and capacity, see + /// *[Capacity and reallocation]*. + /// + /// [Capacity and reallocation]: #capacity-and-reallocation + /// + /// # Examples + /// + /// ``` + /// let mut vec = Vec::try_with_capacity(10).unwrap(); + /// + /// // The vector contains no items, even though it has capacity for more + /// assert_eq!(vec.len(), 0); + /// assert_eq!(vec.capacity(), 10); + /// + /// // These are all done without reallocating... + /// for i in 0..10 { + /// vec.push(i); + /// } + /// assert_eq!(vec.len(), 10); + /// assert_eq!(vec.capacity(), 10); + /// + /// // ...but this may make the vector reallocate + /// vec.push(11); + /// assert_eq!(vec.len(), 11); + /// assert!(vec.capacity() >= 11); + /// + /// let mut result = Vec::try_with_capacity(usize::MAX); + /// assert!(result.is_err()); + /// ``` + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_with_capacity(capacity: usize) -> Result<Self, TryReserveError> { + Self::try_with_capacity_in(capacity, Global) + } + /// Creates a `Vec<T>` directly from the raw components of another vector. /// /// # Safety @@ -617,6 +658,53 @@ impl<T, A: Allocator> Vec<T, A> { Vec { buf: RawVec::with_capacity_in(capacity, alloc), len: 0 } } + /// Tries to construct a new, empty `Vec<T, A>` with the specified capacity + /// with the provided allocator. + /// + /// The vector will be able to hold exactly `capacity` elements without + /// reallocating. If `capacity` is 0, the vector will not allocate. + /// + /// It is important to note that although the returned vector has the + /// *capacity* specified, the vector will have a zero *length*. For an + /// explanation of the difference between length and capacity, see + /// *[Capacity and reallocation]*. + /// + /// [Capacity and reallocation]: #capacity-and-reallocation + /// + /// # Examples + /// + /// ``` + /// #![feature(allocator_api)] + /// + /// use std::alloc::System; + /// + /// let mut vec = Vec::try_with_capacity_in(10, System).unwrap(); + /// + /// // The vector contains no items, even though it has capacity for more + /// assert_eq!(vec.len(), 0); + /// assert_eq!(vec.capacity(), 10); + /// + /// // These are all done without reallocating... + /// for i in 0..10 { + /// vec.push(i); + /// } + /// assert_eq!(vec.len(), 10); + /// assert_eq!(vec.capacity(), 10); + /// + /// // ...but this may make the vector reallocate + /// vec.push(11); + /// assert_eq!(vec.len(), 11); + /// assert!(vec.capacity() >= 11); + /// + /// let mut result = Vec::try_with_capacity_in(usize::MAX, System); + /// assert!(result.is_err()); + /// ``` + #[inline] + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_with_capacity_in(capacity: usize, alloc: A) -> Result<Self, TryReserveError> { + Ok(Vec { buf: RawVec::try_with_capacity_in(capacity, alloc)?, len: 0 }) + } + /// Creates a `Vec<T, A>` directly from the raw components of another vector. /// /// # Safety @@ -948,6 +1036,32 @@ impl<T, A: Allocator> Vec<T, A> { } } + /// Tries to shrink the capacity of the vector as much as possible. + /// + /// It will drop down as close as possible to the length but the allocator + /// may still inform the vector that there is space for a few more elements. + /// + /// # Examples + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// assert_eq!(vec.capacity(), 10); + /// vec.try_shrink_to_fit().unwrap(); + /// assert!(vec.capacity() >= 3); + /// ``` + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_shrink_to_fit(&mut self) -> Result<(), TryReserveError> { + // The capacity is never less than the length, and there's nothing to do when + // they are equal, so we can avoid the panic case in `RawVec::try_shrink_to_fit` + // by only calling it with a greater capacity. + if self.capacity() <= self.len { + return Ok(()); + } + + self.buf.try_shrink_to_fit(self.len) + } + /// Shrinks the capacity of the vector with a lower bound. /// /// The capacity will remain at least as large as both the length @@ -1010,6 +1124,41 @@ impl<T, A: Allocator> Vec<T, A> { } } + /// Tries to convert the vector into [`Box<[T]>`][owned slice]. + /// + /// Note that this will drop any excess capacity. + /// + /// [owned slice]: Box + /// + /// # Examples + /// + /// ``` + /// let v = vec![1, 2, 3]; + /// + /// let slice = v.try_into_boxed_slice().unwrap(); + /// ``` + /// + /// Any excess capacity is removed: + /// + /// ``` + /// let mut vec = Vec::with_capacity(10); + /// vec.extend([1, 2, 3]); + /// + /// assert_eq!(vec.capacity(), 10); + /// let slice = vec.try_into_boxed_slice().unwrap(); + /// assert_eq!(slice.into_vec().capacity(), 3); + /// ``` + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_into_boxed_slice(mut self) -> Result<Box<[T], A>, TryReserveError> { + unsafe { + self.try_shrink_to_fit()?; + let me = ManuallyDrop::new(self); + let buf = ptr::read(&me.buf); + let len = me.len(); + Ok(buf.into_box(len).assume_init()) + } + } + /// Shortens the vector, keeping the first `len` elements and dropping /// the rest. /// @@ -1828,6 +1977,17 @@ impl<T, A: Allocator> Vec<T, A> { self.len += count; } + /// Tries to append elements to `self` from other buffer. + #[inline] + unsafe fn try_append_elements(&mut self, other: *const [T]) -> Result<(), TryReserveError> { + let count = unsafe { (*other).len() }; + self.try_reserve(count)?; + let len = self.len(); + unsafe { ptr::copy_nonoverlapping(other as *const T, self.as_mut_ptr().add(len), count) }; + self.len += count; + Ok(()) + } + /// Removes the specified range from the vector in bulk, returning all /// removed elements as an iterator. If the iterator is dropped before /// being fully consumed, it drops the remaining removed elements. @@ -2249,6 +2409,44 @@ impl<T: Clone, A: Allocator> Vec<T, A> { } } + /// Tries to resize the `Vec` in-place so that `len` is equal to `new_len`. + /// + /// If `new_len` is greater than `len`, the `Vec` is extended by the + /// difference, with each additional slot filled with `value`. + /// If `new_len` is less than `len`, the `Vec` is simply truncated. + /// + /// This method requires `T` to implement [`Clone`], + /// in order to be able to clone the passed value. + /// If you need more flexibility (or want to rely on [`Default`] instead of + /// [`Clone`]), use [`Vec::resize_with`]. + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec!["hello"]; + /// vec.try_resize(3, "world").unwrap(); + /// assert_eq!(vec, ["hello", "world", "world"]); + /// + /// let mut vec = vec![1, 2, 3, 4]; + /// vec.try_resize(2, 0).unwrap(); + /// assert_eq!(vec, [1, 2]); + /// + /// let mut vec = vec![42]; + /// let result = vec.try_resize(usize::MAX, 0); + /// assert!(result.is_err()); + /// ``` + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_resize(&mut self, new_len: usize, value: T) -> Result<(), TryReserveError> { + let len = self.len(); + + if new_len > len { + self.try_extend_with(new_len - len, ExtendElement(value)) + } else { + self.truncate(new_len); + Ok(()) + } + } + /// Clones and appends all elements in a slice to the `Vec`. /// /// Iterates over the slice `other`, clones each element, and then appends @@ -2274,6 +2472,30 @@ impl<T: Clone, A: Allocator> Vec<T, A> { self.spec_extend(other.iter()) } + /// Tries to clone and append all elements in a slice to the `Vec`. + /// + /// Iterates over the slice `other`, clones each element, and then appends + /// it to this `Vec`. The `other` slice is traversed in-order. + /// + /// Note that this function is same as [`extend`] except that it is + /// specialized to work with slices instead. If and when Rust gets + /// specialization this function will likely be deprecated (but still + /// available). + /// + /// # Examples + /// + /// ``` + /// let mut vec = vec![1]; + /// vec.try_extend_from_slice(&[2, 3, 4]).unwrap(); + /// assert_eq!(vec, [1, 2, 3, 4]); + /// ``` + /// + /// [`extend`]: Vec::extend + #[stable(feature = "kernel", since = "1.0.0")] + pub fn try_extend_from_slice(&mut self, other: &[T]) -> Result<(), TryReserveError> { + self.try_spec_extend(other.iter()) + } + /// Copies elements from `src` range to the end of the vector. /// /// # Panics @@ -2413,6 +2635,36 @@ impl<T, A: Allocator> Vec<T, A> { // len set by scope guard } } + + /// Try to extend the vector by `n` values, using the given generator. + fn try_extend_with<E: ExtendWith<T>>(&mut self, n: usize, mut value: E) -> Result<(), TryReserveError> { + self.try_reserve(n)?; + + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + // Use SetLenOnDrop to work around bug where compiler + // might not realize the store through `ptr` through self.set_len() + // don't alias. + let mut local_len = SetLenOnDrop::new(&mut self.len); + + // Write all elements except the last one + for _ in 1..n { + ptr::write(ptr, value.next()); + ptr = ptr.offset(1); + // Increment the length in every step in case next() panics + local_len.increment_len(1); + } + + if n > 0 { + // We can write the last element directly without cloning needlessly + ptr::write(ptr, value.last()); + local_len.increment_len(1); + } + + // len set by scope guard + Ok(()) + } + } } impl<T: PartialEq, A: Allocator> Vec<T, A> { @@ -2747,6 +2999,34 @@ impl<T, A: Allocator> Vec<T, A> { } } + // leaf method to which various SpecFrom/SpecExtend implementations delegate when + // they have no further optimizations to apply + fn try_extend_desugared<I: Iterator<Item = T>>(&mut self, mut iterator: I) -> Result<(), TryReserveError> { + // This is the case for a general iterator. + // + // This function should be the moral equivalent of: + // + // for item in iterator { + // self.push(item); + // } + while let Some(element) = iterator.next() { + let len = self.len(); + if len == self.capacity() { + let (lower, _) = iterator.size_hint(); + self.try_reserve(lower.saturating_add(1))?; + } + unsafe { + ptr::write(self.as_mut_ptr().add(len), element); + // Since next() executes user code which can panic we have to bump the length + // after each step. + // NB can't overflow since we would have had to alloc the address space + self.set_len(len + 1); + } + } + + Ok(()) + } + /// Creates a splicing iterator that replaces the specified range in the vector /// with the given `replace_with` iterator and yields the removed items. /// `replace_with` does not need to be the same length as `range`. diff --git a/rust/alloc/vec/set_len_on_drop.rs b/rust/alloc/vec/set_len_on_drop.rs new file mode 100644 index 000000000000..448bf5076a0b --- /dev/null +++ b/rust/alloc/vec/set_len_on_drop.rs @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +// Set the length of the vec when the `SetLenOnDrop` value goes out of scope. +// +// The idea is: The length field in SetLenOnDrop is a local variable +// that the optimizer will see does not alias with any stores through the Vec's data +// pointer. This is a workaround for alias analysis issue #32155 +pub(super) struct SetLenOnDrop<'a> { + len: &'a mut usize, + local_len: usize, +} + +impl<'a> SetLenOnDrop<'a> { + #[inline] + pub(super) fn new(len: &'a mut usize) -> Self { + SetLenOnDrop { local_len: *len, len } + } + + #[inline] + pub(super) fn increment_len(&mut self, increment: usize) { + self.local_len += increment; + } +} + +impl Drop for SetLenOnDrop<'_> { + #[inline] + fn drop(&mut self) { + *self.len = self.local_len; + } +} diff --git a/rust/alloc/vec/spec_extend.rs b/rust/alloc/vec/spec_extend.rs new file mode 100644 index 000000000000..5ce2d00991bc --- /dev/null +++ b/rust/alloc/vec/spec_extend.rs @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: Apache-2.0 OR MIT + +use crate::alloc::Allocator; +use crate::collections::{TryReserveError, TryReserveErrorKind}; +use core::iter::TrustedLen; +use core::ptr::{self}; +use core::slice::{self}; + +use super::{IntoIter, SetLenOnDrop, Vec}; + +// Specialization trait used for Vec::extend +#[cfg(not(no_global_oom_handling))] +pub(super) trait SpecExtend<T, I> { + fn spec_extend(&mut self, iter: I); +} + +// Specialization trait used for Vec::try_extend +pub(super) trait TrySpecExtend<T, I> { + fn try_spec_extend(&mut self, iter: I) -> Result<(), TryReserveError>; +} + +#[cfg(not(no_global_oom_handling))] +impl<T, I, A: Allocator> SpecExtend<T, I> for Vec<T, A> +where + I: Iterator<Item = T>, +{ + default fn spec_extend(&mut self, iter: I) { + self.extend_desugared(iter) + } +} + +impl<T, I, A: Allocator> TrySpecExtend<T, I> for Vec<T, A> +where + I: Iterator<Item = T>, +{ + default fn try_spec_extend(&mut self, iter: I) -> Result<(), TryReserveError> { + self.try_extend_desugared(iter) + } +} + +#[cfg(not(no_global_oom_handling))] +impl<T, I, A: Allocator> SpecExtend<T, I> for Vec<T, A> +where + I: TrustedLen<Item = T>, +{ + default fn spec_extend(&mut self, iterator: I) { + // This is the case for a TrustedLen iterator. + let (low, high) = iterator.size_hint(); + if let Some(additional) = high { + debug_assert_eq!( + low, + additional, + "TrustedLen iterator's size hint is not exact: {:?}", + (low, high) + ); + self.reserve(additional); + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + let mut local_len = SetLenOnDrop::new(&mut self.len); + iterator.for_each(move |element| { + ptr::write(ptr, element); + ptr = ptr.offset(1); + // Since the loop executes user code which can panic we have to bump the pointer + // after each step. + // NB can't overflow since we would have had to alloc the address space + local_len.increment_len(1); + }); + } + } else { + // Per TrustedLen contract a `None` upper bound means that the iterator length + // truly exceeds usize::MAX, which would eventually lead to a capacity overflow anyway. + // Since the other branch already panics eagerly (via `reserve()`) we do the same here. + // This avoids additional codegen for a fallback code path which would eventually + // panic anyway. + panic!("capacity overflow"); + } + } +} + +impl<T, I, A: Allocator> TrySpecExtend<T, I> for Vec<T, A> +where + I: TrustedLen<Item = T>, +{ + default fn try_spec_extend(&mut self, iterator: I) -> Result<(), TryReserveError> { + // This is the case for a TrustedLen iterator. + let (low, high) = iterator.size_hint(); + if let Some(additional) = high { + debug_assert_eq!( + low, + additional, + "TrustedLen iterator's size hint is not exact: {:?}", + (low, high) + ); + self.try_reserve(additional)?; + unsafe { + let mut ptr = self.as_mut_ptr().add(self.len()); + let mut local_len = SetLenOnDrop::new(&mut self.len); + iterator.for_each(move |element| { + ptr::write(ptr, element); + ptr = ptr.offset(1); + // Since the loop executes user code which can panic we have to bump the pointer + // after each step. + // NB can't overflow since we would have had to alloc the address space + local_len.increment_len(1); + }); + } + Ok(()) + } else { + Err(TryReserveErrorKind::CapacityOverflow.into()) + } + } +} + +#[cfg(not(no_global_oom_handling))] +impl<T, A: Allocator> SpecExtend<T, IntoIter<T>> for Vec<T, A> { + fn spec_extend(&mut self, mut iterator: IntoIter<T>) { + unsafe { + self.append_elements(iterator.as_slice() as _); + } + iterator.forget_remaining_elements(); + } +} + +impl<T, A: Allocator> TrySpecExtend<T, IntoIter<T>> for Vec<T, A> { + fn try_spec_extend(&mut self, mut iterator: IntoIter<T>) -> Result<(), TryReserveError> { + unsafe { + self.try_append_elements(iterator.as_slice() as _)?; + } + iterator.forget_remaining_elements(); + Ok(()) + } +} + +#[cfg(not(no_global_oom_handling))] +impl<'a, T: 'a, I, A: Allocator + 'a> SpecExtend<&'a T, I> for Vec<T, A> +where + I: Iterator<Item = &'a T>, + T: Clone, +{ + default fn spec_extend(&mut self, iterator: I) { + self.spec_extend(iterator.cloned()) + } +} + +impl<'a, T: 'a, I, A: Allocator + 'a> TrySpecExtend<&'a T, I> for Vec<T, A> +where + I: Iterator<Item = &'a T>, + T: Clone, +{ + default fn try_spec_extend(&mut self, iterator: I) -> Result<(), TryReserveError> { + self.try_spec_extend(iterator.cloned()) + } +} + +#[cfg(not(no_global_oom_handling))] +impl<'a, T: 'a, A: Allocator + 'a> SpecExtend<&'a T, slice::Iter<'a, T>> for Vec<T, A> +where + T: Copy, +{ + fn spec_extend(&mut self, iterator: slice::Iter<'a, T>) { + let slice = iterator.as_slice(); + unsafe { self.append_elements(slice) }; + } +} + +impl<'a, T: 'a, A: Allocator + 'a> TrySpecExtend<&'a T, slice::Iter<'a, T>> for Vec<T, A> +where + T: Copy, +{ + fn try_spec_extend(&mut self, iterator: slice::Iter<'a, T>) -> Result<(), TryReserveError> { + let slice = iterator.as_slice(); + unsafe { self.try_append_elements(slice) } + } +} |