rustpython-common-0.2.0/.cargo_vcs_info.json0000644000000001440000000000100145570ustar { "git": { "sha1": "c7faae9b22ce31a3ba1f2cc1cd3ad759b54ce100" }, "path_in_vcs": "common" }rustpython-common-0.2.0/Cargo.toml0000644000000031540000000000100125610ustar # THIS FILE IS AUTOMATICALLY GENERATED BY CARGO # # When uploading crates to the registry Cargo will automatically # "normalize" Cargo.toml files for maximal compatibility # with all versions of Cargo and also rewrite `path` dependencies # to registry (e.g., crates.io) dependencies. # # If you are reading this file be aware that the original Cargo.toml # will likely look very different (and much more reasonable). # See Cargo.toml.orig for the original contents. [package] edition = "2021" name = "rustpython-common" version = "0.2.0" authors = ["RustPython Team"] description = "General python functions and algorithms for use in RustPython" license = "MIT" repository = "https://github.com/RustPython/RustPython" [dependencies.ascii] version = "1.0" [dependencies.bitflags] version = "1.3.2" [dependencies.cfg-if] version = "1.0" [dependencies.hexf-parse] version = "0.2.1" [dependencies.itertools] version = "0.10.3" [dependencies.lexical-parse-float] version = "0.8.0" features = ["format"] [dependencies.libc] version = "0.2.133" [dependencies.lock_api] version = "0.4" [dependencies.num-bigint] version = "0.4.2" [dependencies.num-complex] version = "0.4.0" [dependencies.num-traits] version = "0.2" [dependencies.once_cell] version = "1.4.1" [dependencies.parking_lot] version = "0.12.0" optional = true [dependencies.radium] version = "0.7" [dependencies.rand] version = "0.8" [dependencies.siphasher] version = "0.3" [dependencies.unic-ucd-category] version = "0.9" [dependencies.volatile] version = "0.3" [features] threading = ["parking_lot"] [target."cfg(windows)".dependencies.widestring] version = "0.5.1" rustpython-common-0.2.0/Cargo.toml.orig000064400000000000000000000014171046102023000162420ustar 00000000000000[package] name = "rustpython-common" version = "0.2.0" description = "General python functions and algorithms for use in RustPython" authors = ["RustPython Team"] edition = "2021" repository = "https://github.com/RustPython/RustPython" license = "MIT" [features] threading = ["parking_lot"] [dependencies] ascii = "1.0" bitflags = "1.3.2" cfg-if = "1.0" hexf-parse = "0.2.1" itertools = "0.10.3" lexical-parse-float = { version = "0.8.0", features = ["format"] } libc = "0.2.133" lock_api = "0.4" num-bigint = "0.4.2" num-complex = "0.4.0" num-traits = "0.2" once_cell = "1.4.1" parking_lot = { version = "0.12.0", optional = true } radium = "0.7" rand = "0.8" siphasher = "0.3" unic-ucd-category = "0.9" volatile = "0.3" [target.'cfg(windows)'.dependencies] widestring = "0.5.1" rustpython-common-0.2.0/src/atomic.rs000064400000000000000000000055451046102023000157720ustar 00000000000000use core::ptr::{self, NonNull}; pub use core::sync::atomic::*; pub use radium::Radium; mod sealed { pub trait Sealed {} } pub trait PyAtomicScalar: sealed::Sealed { type Radium: Radium; } pub type PyAtomic = ::Radium; #[cfg(feature = "threading")] macro_rules! atomic_ty { ($i:ty, $atomic:ty) => { $atomic }; } #[cfg(not(feature = "threading"))] macro_rules! atomic_ty { ($i:ty, $atomic:ty) => { core::cell::Cell<$i> }; } macro_rules! impl_atomic_scalar { ($(($i:ty, $atomic:ty),)*) => { $( impl sealed::Sealed for $i {} impl PyAtomicScalar for $i { type Radium = atomic_ty!($i, $atomic); } )* }; } impl_atomic_scalar!( (u8, AtomicU8), (i8, AtomicI8), (u16, AtomicU16), (i16, AtomicI16), (u32, AtomicU32), (i32, AtomicI32), (u64, AtomicU64), (i64, AtomicI64), (usize, AtomicUsize), (isize, AtomicIsize), (bool, AtomicBool), ); impl sealed::Sealed for *mut T {} impl PyAtomicScalar for *mut T { type Radium = atomic_ty!(*mut T, AtomicPtr); } pub struct OncePtr { inner: PyAtomic<*mut T>, } impl Default for OncePtr { fn default() -> Self { Self::new() } } impl OncePtr { #[inline] pub fn new() -> Self { OncePtr { inner: Radium::new(ptr::null_mut()), } } pub fn get(&self) -> Option> { NonNull::new(self.inner.load(Ordering::Acquire)) } pub fn set(&self, value: NonNull) -> Result<(), NonNull> { let exchange = self.inner.compare_exchange( ptr::null_mut(), value.as_ptr(), Ordering::AcqRel, Ordering::Acquire, ); match exchange { Ok(_) => Ok(()), Err(_) => Err(value), } } pub fn get_or_init(&self, f: F) -> NonNull where F: FnOnce() -> Box, { enum Void {} match self.get_or_try_init(|| Ok::<_, Void>(f())) { Ok(val) => val, Err(void) => match void {}, } } pub fn get_or_try_init(&self, f: F) -> Result, E> where F: FnOnce() -> Result, E>, { if let Some(val) = self.get() { return Ok(val); } Ok(self.initialize(f()?)) } #[cold] fn initialize(&self, val: Box) -> NonNull { let ptr = Box::into_raw(val); let exchange = self.inner .compare_exchange(ptr::null_mut(), ptr, Ordering::AcqRel, Ordering::Acquire); let ptr = match exchange { Ok(_) => ptr, Err(winner) => { drop(unsafe { Box::from_raw(ptr) }); winner } }; unsafe { NonNull::new_unchecked(ptr) } } } rustpython-common-0.2.0/src/borrow.rs000064400000000000000000000100101046102023000160070ustar 00000000000000use crate::lock::{ MapImmutable, PyImmutableMappedMutexGuard, PyMappedMutexGuard, PyMappedRwLockReadGuard, PyMappedRwLockWriteGuard, PyMutexGuard, PyRwLockReadGuard, PyRwLockWriteGuard, }; use std::{ fmt, ops::{Deref, DerefMut}, }; macro_rules! impl_from { ($lt:lifetime, $gen:ident, $t:ty, $($var:ident($from:ty),)*) => { $( impl<$lt, $gen: ?Sized> From<$from> for $t { fn from(t: $from) -> Self { Self::$var(t) } } )* }; } #[derive(Debug)] pub enum BorrowedValue<'a, T: ?Sized> { Ref(&'a T), MuLock(PyMutexGuard<'a, T>), MappedMuLock(PyImmutableMappedMutexGuard<'a, T>), ReadLock(PyRwLockReadGuard<'a, T>), MappedReadLock(PyMappedRwLockReadGuard<'a, T>), } impl_from!('a, T, BorrowedValue<'a, T>, Ref(&'a T), MuLock(PyMutexGuard<'a, T>), MappedMuLock(PyImmutableMappedMutexGuard<'a, T>), ReadLock(PyRwLockReadGuard<'a, T>), MappedReadLock(PyMappedRwLockReadGuard<'a, T>), ); impl<'a, T: ?Sized> BorrowedValue<'a, T> { pub fn map(s: Self, f: F) -> BorrowedValue<'a, U> where F: FnOnce(&T) -> &U, { match s { Self::Ref(r) => BorrowedValue::Ref(f(r)), Self::MuLock(m) => BorrowedValue::MappedMuLock(PyMutexGuard::map_immutable(m, f)), Self::MappedMuLock(m) => { BorrowedValue::MappedMuLock(PyImmutableMappedMutexGuard::map(m, f)) } Self::ReadLock(r) => BorrowedValue::MappedReadLock(PyRwLockReadGuard::map(r, f)), Self::MappedReadLock(m) => { BorrowedValue::MappedReadLock(PyMappedRwLockReadGuard::map(m, f)) } } } } impl Deref for BorrowedValue<'_, T> { type Target = T; fn deref(&self) -> &T { match self { Self::Ref(r) => r, Self::MuLock(m) => m, Self::MappedMuLock(m) => m, Self::ReadLock(r) => r, Self::MappedReadLock(m) => m, } } } impl fmt::Display for BorrowedValue<'_, str> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Display::fmt(self.deref(), f) } } #[derive(Debug)] pub enum BorrowedValueMut<'a, T: ?Sized> { RefMut(&'a mut T), MuLock(PyMutexGuard<'a, T>), MappedMuLock(PyMappedMutexGuard<'a, T>), WriteLock(PyRwLockWriteGuard<'a, T>), MappedWriteLock(PyMappedRwLockWriteGuard<'a, T>), } impl_from!('a, T, BorrowedValueMut<'a, T>, RefMut(&'a mut T), MuLock(PyMutexGuard<'a, T>), MappedMuLock(PyMappedMutexGuard<'a, T>), WriteLock(PyRwLockWriteGuard<'a, T>), MappedWriteLock(PyMappedRwLockWriteGuard<'a, T>), ); impl<'a, T: ?Sized> BorrowedValueMut<'a, T> { pub fn map(s: Self, f: F) -> BorrowedValueMut<'a, U> where F: FnOnce(&mut T) -> &mut U, { match s { Self::RefMut(r) => BorrowedValueMut::RefMut(f(r)), Self::MuLock(m) => BorrowedValueMut::MappedMuLock(PyMutexGuard::map(m, f)), Self::MappedMuLock(m) => BorrowedValueMut::MappedMuLock(PyMappedMutexGuard::map(m, f)), Self::WriteLock(r) => BorrowedValueMut::MappedWriteLock(PyRwLockWriteGuard::map(r, f)), Self::MappedWriteLock(m) => { BorrowedValueMut::MappedWriteLock(PyMappedRwLockWriteGuard::map(m, f)) } } } } impl Deref for BorrowedValueMut<'_, T> { type Target = T; fn deref(&self) -> &T { match self { Self::RefMut(r) => r, Self::MuLock(m) => m, Self::MappedMuLock(m) => m, Self::WriteLock(w) => w, Self::MappedWriteLock(w) => w, } } } impl DerefMut for BorrowedValueMut<'_, T> { fn deref_mut(&mut self) -> &mut T { match self { Self::RefMut(r) => r, Self::MuLock(m) => &mut *m, Self::MappedMuLock(m) => &mut *m, Self::WriteLock(w) => &mut *w, Self::MappedWriteLock(w) => &mut *w, } } } rustpython-common-0.2.0/src/boxvec.rs000064400000000000000000000455331046102023000160050ustar 00000000000000//! An unresizable vector backed by a `Box<[T]>` use std::{ alloc, borrow::{Borrow, BorrowMut}, cmp, fmt, mem::{self, MaybeUninit}, ops::{Bound, Deref, DerefMut, RangeBounds}, ptr, slice, }; pub struct BoxVec { xs: Box<[MaybeUninit]>, len: usize, } impl Drop for BoxVec { fn drop(&mut self) { self.clear(); // MaybeUninit inhibits array's drop } } macro_rules! panic_oob { ($method_name:expr, $index:expr, $len:expr) => { panic!( concat!( "BoxVec::", $method_name, ": index {} is out of bounds in vector of length {}" ), $index, $len ) }; } fn capacity_overflow() -> ! { panic!("capacity overflow") } impl BoxVec { pub fn new(n: usize) -> BoxVec { unsafe { let layout = match alloc::Layout::array::(n) { Ok(l) => l, Err(_) => capacity_overflow(), }; let ptr = if mem::size_of::() == 0 { ptr::NonNull::>::dangling().as_ptr() } else { let ptr = alloc::alloc(layout); if ptr.is_null() { alloc::handle_alloc_error(layout) } ptr as *mut MaybeUninit }; let ptr = ptr::slice_from_raw_parts_mut(ptr, n); let xs = Box::from_raw(ptr); BoxVec { xs, len: 0 } } } #[inline] pub fn len(&self) -> usize { self.len } #[inline] pub fn is_empty(&self) -> bool { self.len() == 0 } #[inline] pub fn capacity(&self) -> usize { self.xs.len() } pub fn is_full(&self) -> bool { self.len() == self.capacity() } pub fn remaining_capacity(&self) -> usize { self.capacity() - self.len() } pub fn push(&mut self, element: T) { self.try_push(element).unwrap() } pub fn try_push(&mut self, element: T) -> Result<(), CapacityError> { if self.len() < self.capacity() { unsafe { self.push_unchecked(element); } Ok(()) } else { Err(CapacityError::new(element)) } } /// # Safety /// Must ensure that self.len() < self.capacity() pub unsafe fn push_unchecked(&mut self, element: T) { let len = self.len(); debug_assert!(len < self.capacity()); ptr::write(self.get_unchecked_ptr(len), element); self.set_len(len + 1); } /// Get pointer to where element at `index` would be unsafe fn get_unchecked_ptr(&mut self, index: usize) -> *mut T { self.xs.as_mut_ptr().add(index).cast() } pub fn insert(&mut self, index: usize, element: T) { self.try_insert(index, element).unwrap() } pub fn try_insert(&mut self, index: usize, element: T) -> Result<(), CapacityError> { if index > self.len() { panic_oob!("try_insert", index, self.len()) } if self.len() == self.capacity() { return Err(CapacityError::new(element)); } let len = self.len(); // follows is just like Vec unsafe { // infallible // The spot to put the new value { let p: *mut _ = self.get_unchecked_ptr(index); // Shift everything over to make space. (Duplicating the // `index`th element into two consecutive places.) ptr::copy(p, p.offset(1), len - index); // Write it in, overwriting the first copy of the `index`th // element. ptr::write(p, element); } self.set_len(len + 1); } Ok(()) } pub fn pop(&mut self) -> Option { if self.is_empty() { return None; } unsafe { let new_len = self.len() - 1; self.set_len(new_len); Some(ptr::read(self.get_unchecked_ptr(new_len))) } } pub fn swap_remove(&mut self, index: usize) -> T { self.swap_pop(index) .unwrap_or_else(|| panic_oob!("swap_remove", index, self.len())) } pub fn swap_pop(&mut self, index: usize) -> Option { let len = self.len(); if index >= len { return None; } self.swap(index, len - 1); self.pop() } pub fn remove(&mut self, index: usize) -> T { self.pop_at(index) .unwrap_or_else(|| panic_oob!("remove", index, self.len())) } pub fn pop_at(&mut self, index: usize) -> Option { if index >= self.len() { None } else { self.drain(index..index + 1).next() } } pub fn truncate(&mut self, new_len: usize) { unsafe { if new_len < self.len() { let tail: *mut [_] = &mut self[new_len..]; self.len = new_len; ptr::drop_in_place(tail); } } } /// Remove all elements in the vector. pub fn clear(&mut self) { self.truncate(0) } /// Retains only the elements specified by the predicate. /// /// In other words, remove all elements `e` such that `f(&mut e)` returns false. /// This method operates in place and preserves the order of the retained /// elements. pub fn retain(&mut self, mut f: F) where F: FnMut(&mut T) -> bool, { let len = self.len(); let mut del = 0; { let v = &mut **self; for i in 0..len { if !f(&mut v[i]) { del += 1; } else if del > 0 { v.swap(i - del, i); } } } if del > 0 { self.drain(len - del..); } } /// Set the vector’s length without dropping or moving out elements /// /// This method is `unsafe` because it changes the notion of the /// number of “valid” elements in the vector. Use with care. /// /// This method uses *debug assertions* to check that `length` is /// not greater than the capacity. /// /// # Safety /// Must ensure that length <= self.capacity() pub unsafe fn set_len(&mut self, length: usize) { debug_assert!(length <= self.capacity()); self.len = length; } /// Copy and appends all elements in a slice to the `BoxVec`. /// /// # Errors /// /// This method will return an error if the capacity left (see /// [`remaining_capacity`]) is smaller then the length of the provided /// slice. /// /// [`remaining_capacity`]: #method.remaining_capacity pub fn try_extend_from_slice(&mut self, other: &[T]) -> Result<(), CapacityError> where T: Copy, { if self.remaining_capacity() < other.len() { return Err(CapacityError::new(())); } let self_len = self.len(); let other_len = other.len(); unsafe { let dst = self.as_mut_ptr().add(self_len); ptr::copy_nonoverlapping(other.as_ptr(), dst, other_len); self.set_len(self_len + other_len); } Ok(()) } /// Create a draining iterator that removes the specified range in the vector /// and yields the removed items from start to end. The element range is /// removed even if the iterator is not consumed until the end. /// /// Note: It is unspecified how many elements are removed from the vector, /// if the `Drain` value is leaked. /// /// **Panics** if the starting point is greater than the end point or if /// the end point is greater than the length of the vector. pub fn drain(&mut self, range: R) -> Drain where R: RangeBounds, { // Memory safety // // When the Drain is first created, it shortens the length of // the source vector to make sure no uninitialized or moved-from elements // are accessible at all if the Drain's destructor never gets to run. // // Drain will ptr::read out the values to remove. // When finished, remaining tail of the vec is copied back to cover // the hole, and the vector length is restored to the new length. // let len = self.len(); let start = match range.start_bound() { Bound::Unbounded => 0, Bound::Included(&i) => i, Bound::Excluded(&i) => i.saturating_add(1), }; let end = match range.end_bound() { Bound::Excluded(&j) => j, Bound::Included(&j) => j.saturating_add(1), Bound::Unbounded => len, }; self.drain_range(start, end) } fn drain_range(&mut self, start: usize, end: usize) -> Drain { let len = self.len(); // bounds check happens here (before length is changed!) let range_slice: *const _ = &self[start..end]; // Calling `set_len` creates a fresh and thus unique mutable references, making all // older aliases we created invalid. So we cannot call that function. self.len = start; unsafe { Drain { tail_start: end, tail_len: len - end, iter: (*range_slice).iter(), vec: ptr::NonNull::from(self), } } } /// Return a slice containing all elements of the vector. pub fn as_slice(&self) -> &[T] { self } /// Return a mutable slice containing all elements of the vector. pub fn as_mut_slice(&mut self) -> &mut [T] { self } /// Return a raw pointer to the vector's buffer. #[inline] pub fn as_ptr(&self) -> *const T { self.xs.as_ptr().cast() } /// Return a raw mutable pointer to the vector's buffer. #[inline] pub fn as_mut_ptr(&mut self) -> *mut T { self.xs.as_mut_ptr().cast() } } impl Deref for BoxVec { type Target = [T]; #[inline] fn deref(&self) -> &[T] { unsafe { slice::from_raw_parts(self.as_ptr(), self.len()) } } } impl DerefMut for BoxVec { #[inline] fn deref_mut(&mut self) -> &mut [T] { let len = self.len(); unsafe { slice::from_raw_parts_mut(self.as_mut_ptr(), len) } } } /// Iterate the `BoxVec` with references to each element. impl<'a, T> IntoIterator for &'a BoxVec { type Item = &'a T; type IntoIter = slice::Iter<'a, T>; fn into_iter(self) -> Self::IntoIter { self.iter() } } /// Iterate the `BoxVec` with mutable references to each element. impl<'a, T> IntoIterator for &'a mut BoxVec { type Item = &'a mut T; type IntoIter = slice::IterMut<'a, T>; fn into_iter(self) -> Self::IntoIter { self.iter_mut() } } /// Iterate the `BoxVec` with each element by value. /// /// The vector is consumed by this operation. impl IntoIterator for BoxVec { type Item = T; type IntoIter = IntoIter; fn into_iter(self) -> IntoIter { IntoIter { index: 0, v: self } } } /// By-value iterator for `BoxVec`. pub struct IntoIter { index: usize, v: BoxVec, } impl Iterator for IntoIter { type Item = T; fn next(&mut self) -> Option { if self.index == self.v.len { None } else { unsafe { let index = self.index; self.index += 1; Some(ptr::read(self.v.get_unchecked_ptr(index))) } } } fn size_hint(&self) -> (usize, Option) { let len = self.v.len() - self.index; (len, Some(len)) } } impl DoubleEndedIterator for IntoIter { fn next_back(&mut self) -> Option { if self.index == self.v.len { None } else { unsafe { let new_len = self.v.len() - 1; self.v.set_len(new_len); Some(ptr::read(self.v.get_unchecked_ptr(new_len))) } } } } impl ExactSizeIterator for IntoIter {} impl Drop for IntoIter { fn drop(&mut self) { // panic safety: Set length to 0 before dropping elements. let index = self.index; let len = self.v.len(); unsafe { self.v.set_len(0); let elements = slice::from_raw_parts_mut(self.v.get_unchecked_ptr(index), len - index); ptr::drop_in_place(elements); } } } impl fmt::Debug for IntoIter where T: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.debug_list().entries(&self.v[self.index..]).finish() } } /// A draining iterator for `BoxVec`. pub struct Drain<'a, T> { /// Index of tail to preserve tail_start: usize, /// Length of tail tail_len: usize, /// Current remaining range to remove iter: slice::Iter<'a, T>, vec: ptr::NonNull>, } unsafe impl<'a, T: Sync> Sync for Drain<'a, T> {} unsafe impl<'a, T: Sync> Send for Drain<'a, T> {} impl Iterator for Drain<'_, T> { type Item = T; fn next(&mut self) -> Option { self.iter .next() .map(|elt| unsafe { ptr::read(elt as *const _) }) } fn size_hint(&self) -> (usize, Option) { self.iter.size_hint() } } impl DoubleEndedIterator for Drain<'_, T> { fn next_back(&mut self) -> Option { self.iter .next_back() .map(|elt| unsafe { ptr::read(elt as *const _) }) } } impl ExactSizeIterator for Drain<'_, T> {} impl<'a, T> Drain<'a, T> { pub fn as_slice(&self) -> &'a [T] { self.iter.as_slice() } } impl Drop for Drain<'_, T> { fn drop(&mut self) { // len is currently 0 so panicking while dropping will not cause a double drop. for _ in self.by_ref() {} if self.tail_len > 0 { unsafe { let source_vec = self.vec.as_mut(); // memmove back untouched tail, update to new length let start = source_vec.len(); let tail = self.tail_start; let src = source_vec.as_ptr().add(tail); let dst = source_vec.as_mut_ptr().add(start); ptr::copy(src, dst, self.tail_len); source_vec.set_len(start + self.tail_len); } } } } struct ScopeExitGuard where F: FnMut(&Data, &mut T), { value: T, data: Data, f: F, } impl Drop for ScopeExitGuard where F: FnMut(&Data, &mut T), { fn drop(&mut self) { (self.f)(&self.data, &mut self.value) } } /// Extend the `BoxVec` with an iterator. /// /// Does not extract more items than there is space for. No error /// occurs if there are more iterator elements. impl Extend for BoxVec { fn extend>(&mut self, iter: I) { let take = self.capacity() - self.len(); unsafe { let len = self.len(); let mut ptr = raw_ptr_add(self.as_mut_ptr(), len); let end_ptr = raw_ptr_add(ptr, take); // Keep the length in a separate variable, write it back on scope // exit. To help the compiler with alias analysis and stuff. // We update the length to handle panic in the iteration of the // user's iterator, without dropping any elements on the floor. let mut guard = ScopeExitGuard { value: &mut self.len, data: len, f: move |&len, self_len| { **self_len = len; }, }; let mut iter = iter.into_iter(); loop { if ptr == end_ptr { break; } if let Some(elt) = iter.next() { raw_ptr_write(ptr, elt); ptr = raw_ptr_add(ptr, 1); guard.data += 1; } else { break; } } } } } /// Rawptr add but uses arithmetic distance for ZST unsafe fn raw_ptr_add(ptr: *mut T, offset: usize) -> *mut T { if mem::size_of::() == 0 { // Special case for ZST (ptr as usize).wrapping_add(offset) as _ } else { ptr.add(offset) } } unsafe fn raw_ptr_write(ptr: *mut T, value: T) { if mem::size_of::() == 0 { /* nothing */ } else { ptr::write(ptr, value) } } impl Clone for BoxVec where T: Clone, { fn clone(&self) -> Self { let mut new = BoxVec::new(self.capacity()); new.extend(self.iter().cloned()); new } fn clone_from(&mut self, rhs: &Self) { // recursive case for the common prefix let prefix = cmp::min(self.len(), rhs.len()); self[..prefix].clone_from_slice(&rhs[..prefix]); if prefix < self.len() { // rhs was shorter for _ in 0..self.len() - prefix { self.pop(); } } else { let rhs_elems = rhs[self.len()..].iter().cloned(); self.extend(rhs_elems); } } } impl PartialEq for BoxVec where T: PartialEq, { fn eq(&self, other: &Self) -> bool { **self == **other } } impl PartialEq<[T]> for BoxVec where T: PartialEq, { fn eq(&self, other: &[T]) -> bool { **self == *other } } impl Eq for BoxVec where T: Eq {} impl Borrow<[T]> for BoxVec { fn borrow(&self) -> &[T] { self } } impl BorrowMut<[T]> for BoxVec { fn borrow_mut(&mut self) -> &mut [T] { self } } impl AsRef<[T]> for BoxVec { fn as_ref(&self) -> &[T] { self } } impl AsMut<[T]> for BoxVec { fn as_mut(&mut self) -> &mut [T] { self } } impl fmt::Debug for BoxVec where T: fmt::Debug, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { (**self).fmt(f) } } /// Error value indicating insufficient capacity #[derive(Clone, Copy, Eq, Ord, PartialEq, PartialOrd)] pub struct CapacityError { element: T, } impl CapacityError { /// Create a new `CapacityError` from `element`. pub fn new(element: T) -> CapacityError { CapacityError { element } } /// Extract the overflowing element pub fn element(self) -> T { self.element } /// Convert into a `CapacityError` that does not carry an element. pub fn simplify(self) -> CapacityError { CapacityError { element: () } } } const CAPERROR: &str = "insufficient capacity"; impl std::error::Error for CapacityError {} impl fmt::Display for CapacityError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{CAPERROR}") } } impl fmt::Debug for CapacityError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "capacity error: {CAPERROR}") } } rustpython-common-0.2.0/src/bytes.rs000064400000000000000000000031741046102023000156400ustar 00000000000000pub fn repr(b: &[u8]) -> String { repr_with(b, &[], "") } pub fn repr_with(b: &[u8], prefixes: &[&str], suffix: &str) -> String { use std::fmt::Write; let mut out_len = 0usize; let mut squote = 0; let mut dquote = 0; for &ch in b { let incr = match ch { b'\'' => { squote += 1; 1 } b'"' => { dquote += 1; 1 } b'\\' | b'\t' | b'\r' | b'\n' => 2, 0x20..=0x7e => 1, _ => 4, // \xHH }; // TODO: OverflowError out_len = out_len.checked_add(incr).unwrap(); } let (quote, num_escaped_quotes) = crate::str::choose_quotes_for_repr(squote, dquote); // we'll be adding backslashes in front of the existing inner quotes out_len += num_escaped_quotes; // 3 is for b prefix + outer quotes out_len += 3 + prefixes.iter().map(|s| s.len()).sum::() + suffix.len(); let mut res = String::with_capacity(out_len); res.extend(prefixes.iter().copied()); res.push('b'); res.push(quote); for &ch in b { match ch { b'\t' => res.push_str("\\t"), b'\n' => res.push_str("\\n"), b'\r' => res.push_str("\\r"), // printable ascii range 0x20..=0x7e => { let ch = ch as char; if ch == quote || ch == '\\' { res.push('\\'); } res.push(ch); } _ => write!(res, "\\x{ch:02x}").unwrap(), } } res.push(quote); res.push_str(suffix); res } rustpython-common-0.2.0/src/cformat.rs000064400000000000000000000726311046102023000161510ustar 00000000000000//! Implementation of Printf-Style string formatting //! as per the [Python Docs](https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting). use crate::float_ops; use bitflags::bitflags; use num_bigint::{BigInt, Sign}; use num_traits::Signed; use std::{ cmp, fmt, iter::{Enumerate, Peekable}, str::FromStr, }; #[derive(Debug, PartialEq)] pub enum CFormatErrorType { UnmatchedKeyParentheses, MissingModuloSign, UnsupportedFormatChar(char), IncompleteFormat, IntTooBig, // Unimplemented, } // also contains how many chars the parsing function consumed pub type ParsingError = (CFormatErrorType, usize); #[derive(Debug, PartialEq)] pub struct CFormatError { pub typ: CFormatErrorType, // FIXME pub index: usize, } impl fmt::Display for CFormatError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { use CFormatErrorType::*; match self.typ { UnmatchedKeyParentheses => write!(f, "incomplete format key"), CFormatErrorType::IncompleteFormat => write!(f, "incomplete format"), UnsupportedFormatChar(c) => write!( f, "unsupported format character '{}' ({:#x}) at index {}", c, c as u32, self.index ), IntTooBig => write!(f, "width/precision too big"), _ => write!(f, "unexpected error parsing format string"), } } } pub type CFormatPreconversor = super::format::FormatPreconversor; #[derive(Debug, PartialEq)] pub enum CFormatCase { Lowercase, Uppercase, } #[derive(Debug, PartialEq)] pub enum CNumberType { Decimal, Octal, Hex(CFormatCase), } #[derive(Debug, PartialEq)] pub enum CFloatType { Exponent(CFormatCase), PointDecimal(CFormatCase), General(CFormatCase), } #[derive(Debug, PartialEq)] pub enum CFormatType { Number(CNumberType), Float(CFloatType), Character, String(CFormatPreconversor), } bitflags! { pub struct CConversionFlags: u32 { const ALTERNATE_FORM = 0b0000_0001; const ZERO_PAD = 0b0000_0010; const LEFT_ADJUST = 0b0000_0100; const BLANK_SIGN = 0b0000_1000; const SIGN_CHAR = 0b0001_0000; } } impl CConversionFlags { #[inline] pub fn sign_string(&self) -> &'static str { if self.contains(CConversionFlags::SIGN_CHAR) { "+" } else if self.contains(CConversionFlags::BLANK_SIGN) { " " } else { "" } } } #[derive(Debug, PartialEq)] pub enum CFormatQuantity { Amount(usize), FromValuesTuple, } #[derive(Debug, PartialEq)] pub struct CFormatSpec { pub mapping_key: Option, pub flags: CConversionFlags, pub min_field_width: Option, pub precision: Option, pub format_type: CFormatType, pub format_char: char, // chars_consumed: usize, } impl FromStr for CFormatSpec { type Err = ParsingError; fn from_str(text: &str) -> Result { let mut chars = text.chars().enumerate().peekable(); if chars.next().map(|x| x.1) != Some('%') { return Err((CFormatErrorType::MissingModuloSign, 1)); } CFormatSpec::parse(&mut chars) } } pub type ParseIter = Peekable>; impl CFormatSpec { pub fn parse(iter: &mut ParseIter) -> Result where T: Into + Copy, I: Iterator, { let mapping_key = parse_spec_mapping_key(iter)?; let flags = parse_flags(iter); let min_field_width = parse_quantity(iter)?; let precision = parse_precision(iter)?; consume_length(iter); let (format_type, format_char) = parse_format_type(iter)?; let precision = precision.or(match format_type { CFormatType::Float(_) => Some(CFormatQuantity::Amount(6)), _ => None, }); Ok(CFormatSpec { mapping_key, flags, min_field_width, precision, format_type, format_char, }) } fn compute_fill_string(fill_char: char, fill_chars_needed: usize) -> String { (0..fill_chars_needed) .map(|_| fill_char) .collect::() } fn fill_string( &self, string: String, fill_char: char, num_prefix_chars: Option, fill_with_precision: bool, ) -> String { let target_width = if fill_with_precision { &self.precision } else { &self.min_field_width }; let mut num_chars = string.chars().count(); if let Some(num_prefix_chars) = num_prefix_chars { num_chars += num_prefix_chars; } let num_chars = num_chars; let width = match target_width { Some(CFormatQuantity::Amount(width)) => cmp::max(width, &num_chars), _ => &num_chars, }; let fill_chars_needed = width.saturating_sub(num_chars); let fill_string = CFormatSpec::compute_fill_string(fill_char, fill_chars_needed); if !fill_string.is_empty() { // Don't left-adjust if precision-filling: that will always be prepending 0s to %d // arguments, the LEFT_ADJUST flag will be used by a later call to fill_string with // the 0-filled string as the string param. if !fill_with_precision && self.flags.contains(CConversionFlags::LEFT_ADJUST) { format!("{string}{fill_string}") } else { format!("{fill_string}{string}") } } else { string } } fn format_string_with_precision( &self, string: String, precision: Option<&CFormatQuantity>, ) -> String { // truncate if needed let string = match precision { Some(CFormatQuantity::Amount(precision)) if string.chars().count() > *precision => { string.chars().take(*precision).collect::() } _ => string, }; self.fill_string(string, ' ', None, false) } #[inline] pub fn format_string(&self, string: String) -> String { self.format_string_with_precision(string, self.precision.as_ref()) } #[inline] pub fn format_char(&self, ch: char) -> String { self.format_string_with_precision(ch.to_string(), Some(&CFormatQuantity::Amount(1))) } pub fn format_bytes(&self, bytes: &[u8]) -> Vec { let bytes = if let Some(CFormatQuantity::Amount(precision)) = self.precision { &bytes[..cmp::min(bytes.len(), precision)] } else { bytes }; if let Some(CFormatQuantity::Amount(width)) = self.min_field_width { let fill = cmp::max(0, width - bytes.len()); let mut v = Vec::with_capacity(bytes.len() + fill); if self.flags.contains(CConversionFlags::LEFT_ADJUST) { v.extend_from_slice(bytes); v.append(&mut vec![b' '; fill]); } else { v.append(&mut vec![b' '; fill]); v.extend_from_slice(bytes); } v } else { bytes.to_vec() } } pub fn format_number(&self, num: &BigInt) -> String { use CFormatCase::{Lowercase, Uppercase}; use CNumberType::*; let magnitude = num.abs(); let prefix = if self.flags.contains(CConversionFlags::ALTERNATE_FORM) { match self.format_type { CFormatType::Number(Octal) => "0o", CFormatType::Number(Hex(Lowercase)) => "0x", CFormatType::Number(Hex(Uppercase)) => "0X", _ => "", } } else { "" }; let magnitude_string: String = match self.format_type { CFormatType::Number(Decimal) => magnitude.to_str_radix(10), CFormatType::Number(Octal) => magnitude.to_str_radix(8), CFormatType::Number(Hex(Lowercase)) => magnitude.to_str_radix(16), CFormatType::Number(Hex(Uppercase)) => { let mut result = magnitude.to_str_radix(16); result.make_ascii_uppercase(); result } _ => unreachable!(), // Should not happen because caller has to make sure that this is a number }; let sign_string = match num.sign() { Sign::Minus => "-", _ => self.flags.sign_string(), }; let padded_magnitude_string = self.fill_string(magnitude_string, '0', None, true); if self.flags.contains(CConversionFlags::ZERO_PAD) { let fill_char = if !self.flags.contains(CConversionFlags::LEFT_ADJUST) { '0' } else { ' ' // '-' overrides the '0' conversion if both are given }; let signed_prefix = format!("{sign_string}{prefix}"); format!( "{}{}", signed_prefix, self.fill_string( padded_magnitude_string, fill_char, Some(signed_prefix.chars().count()), false ), ) } else { self.fill_string( format!("{sign_string}{prefix}{padded_magnitude_string}"), ' ', None, false, ) } } pub fn format_float(&self, num: f64) -> String { let sign_string = if num.is_sign_negative() && !num.is_nan() { "-" } else { self.flags.sign_string() }; let precision = match self.precision { Some(CFormatQuantity::Amount(p)) => p, _ => 6, }; let magnitude_string = match &self.format_type { CFormatType::Float(CFloatType::PointDecimal(case)) => { let case = match case { CFormatCase::Lowercase => float_ops::Case::Lower, CFormatCase::Uppercase => float_ops::Case::Upper, }; let magnitude = num.abs(); float_ops::format_fixed(precision, magnitude, case) } CFormatType::Float(CFloatType::Exponent(case)) => { let case = match case { CFormatCase::Lowercase => float_ops::Case::Lower, CFormatCase::Uppercase => float_ops::Case::Upper, }; let magnitude = num.abs(); float_ops::format_exponent(precision, magnitude, case) } CFormatType::Float(CFloatType::General(case)) => { let precision = if precision == 0 { 1 } else { precision }; let case = match case { CFormatCase::Lowercase => float_ops::Case::Lower, CFormatCase::Uppercase => float_ops::Case::Upper, }; let magnitude = num.abs(); float_ops::format_general( precision, magnitude, case, self.flags.contains(CConversionFlags::ALTERNATE_FORM), false, ) } _ => unreachable!(), }; if self.flags.contains(CConversionFlags::ZERO_PAD) { let fill_char = if !self.flags.contains(CConversionFlags::LEFT_ADJUST) { '0' } else { ' ' }; format!( "{}{}", sign_string, self.fill_string( magnitude_string, fill_char, Some(sign_string.chars().count()), false ) ) } else { self.fill_string(format!("{sign_string}{magnitude_string}"), ' ', None, false) } } } fn parse_spec_mapping_key(iter: &mut ParseIter) -> Result, ParsingError> where T: Into + Copy, I: Iterator, { if let Some(&(index, c)) = iter.peek() { if c.into() == '(' { iter.next().unwrap(); return match parse_text_inside_parentheses(iter) { Some(key) => Ok(Some(key)), None => Err((CFormatErrorType::UnmatchedKeyParentheses, index)), }; } } Ok(None) } fn parse_flags(iter: &mut ParseIter) -> CConversionFlags where T: Into + Copy, I: Iterator, { let mut flags = CConversionFlags::empty(); while let Some(&(_, c)) = iter.peek() { let flag = match c.into() { '#' => CConversionFlags::ALTERNATE_FORM, '0' => CConversionFlags::ZERO_PAD, '-' => CConversionFlags::LEFT_ADJUST, ' ' => CConversionFlags::BLANK_SIGN, '+' => CConversionFlags::SIGN_CHAR, _ => break, }; iter.next().unwrap(); flags |= flag; } flags } fn consume_length(iter: &mut ParseIter) where T: Into + Copy, I: Iterator, { if let Some(&(_, c)) = iter.peek() { let c = c.into(); if c == 'h' || c == 'l' || c == 'L' { iter.next().unwrap(); } } } fn parse_format_type(iter: &mut ParseIter) -> Result<(CFormatType, char), ParsingError> where T: Into, I: Iterator, { use CFloatType::*; use CFormatCase::{Lowercase, Uppercase}; use CNumberType::*; let (index, c) = match iter.next() { Some((index, c)) => (index, c.into()), None => { return Err(( CFormatErrorType::IncompleteFormat, iter.peek().map(|x| x.0).unwrap_or(0), )); } }; let format_type = match c { 'd' | 'i' | 'u' => CFormatType::Number(Decimal), 'o' => CFormatType::Number(Octal), 'x' => CFormatType::Number(Hex(Lowercase)), 'X' => CFormatType::Number(Hex(Uppercase)), 'e' => CFormatType::Float(Exponent(Lowercase)), 'E' => CFormatType::Float(Exponent(Uppercase)), 'f' => CFormatType::Float(PointDecimal(Lowercase)), 'F' => CFormatType::Float(PointDecimal(Uppercase)), 'g' => CFormatType::Float(General(Lowercase)), 'G' => CFormatType::Float(General(Uppercase)), 'c' => CFormatType::Character, 'r' => CFormatType::String(CFormatPreconversor::Repr), 's' => CFormatType::String(CFormatPreconversor::Str), 'b' => CFormatType::String(CFormatPreconversor::Bytes), 'a' => CFormatType::String(CFormatPreconversor::Ascii), _ => return Err((CFormatErrorType::UnsupportedFormatChar(c), index)), }; Ok((format_type, c)) } fn parse_quantity(iter: &mut ParseIter) -> Result, ParsingError> where T: Into + Copy, I: Iterator, { if let Some(&(_, c)) = iter.peek() { let c: char = c.into(); if c == '*' { iter.next().unwrap(); return Ok(Some(CFormatQuantity::FromValuesTuple)); } if let Some(i) = c.to_digit(10) { let mut num = i as i32; iter.next().unwrap(); while let Some(&(index, c)) = iter.peek() { if let Some(i) = c.into().to_digit(10) { num = num .checked_mul(10) .and_then(|num| num.checked_add(i as i32)) .ok_or((CFormatErrorType::IntTooBig, index))?; iter.next().unwrap(); } else { break; } } return Ok(Some(CFormatQuantity::Amount(num.unsigned_abs() as usize))); } } Ok(None) } fn parse_precision(iter: &mut ParseIter) -> Result, ParsingError> where T: Into + Copy, I: Iterator, { if let Some(&(_, c)) = iter.peek() { if c.into() == '.' { iter.next().unwrap(); return parse_quantity(iter); } } Ok(None) } fn parse_text_inside_parentheses(iter: &mut ParseIter) -> Option where T: Into, I: Iterator, { let mut counter: i32 = 1; let mut contained_text = String::new(); loop { let (_, c) = iter.next()?; let c = c.into(); match c { _ if c == '(' => { counter += 1; } _ if c == ')' => { counter -= 1; } _ => (), } if counter > 0 { contained_text.push(c); } else { break; } } Some(contained_text) } #[derive(Debug, PartialEq)] pub enum CFormatPart { Literal(T), Spec(CFormatSpec), } impl CFormatPart { #[inline] pub fn is_specifier(&self) -> bool { matches!(self, CFormatPart::Spec(_)) } #[inline] pub fn has_key(&self) -> bool { match self { CFormatPart::Spec(s) => s.mapping_key.is_some(), _ => false, } } } #[derive(Debug, PartialEq)] pub struct CFormatStrOrBytes { parts: Vec<(usize, CFormatPart)>, } impl CFormatStrOrBytes { pub fn check_specifiers(&self) -> Option<(usize, bool)> { let mut count = 0; let mut mapping_required = false; for (_, part) in &self.parts { if part.is_specifier() { let has_key = part.has_key(); if count == 0 { mapping_required = has_key; } else if mapping_required != has_key { return None; } count += 1; } } Some((count, mapping_required)) } #[inline] pub fn iter(&self) -> impl Iterator)> { self.parts.iter() } #[inline] pub fn iter_mut(&mut self) -> impl Iterator)> { self.parts.iter_mut() } } pub type CFormatBytes = CFormatStrOrBytes>; impl CFormatBytes { pub fn parse>(iter: &mut ParseIter) -> Result { let mut parts = vec![]; let mut literal = vec![]; let mut part_index = 0; while let Some((index, c)) = iter.next() { if c == b'%' { if let Some(&(_, second)) = iter.peek() { if second == b'%' { iter.next().unwrap(); literal.push(b'%'); continue; } else { if !literal.is_empty() { parts.push(( part_index, CFormatPart::Literal(std::mem::take(&mut literal)), )); } let spec = CFormatSpec::parse(iter).map_err(|err| CFormatError { typ: err.0, index: err.1, })?; parts.push((index, CFormatPart::Spec(spec))); if let Some(&(index, _)) = iter.peek() { part_index = index; } } } else { return Err(CFormatError { typ: CFormatErrorType::IncompleteFormat, index: index + 1, }); } } else { literal.push(c); } } if !literal.is_empty() { parts.push((part_index, CFormatPart::Literal(literal))); } Ok(Self { parts }) } pub fn parse_from_bytes(bytes: &[u8]) -> Result { let mut iter = bytes.iter().cloned().enumerate().peekable(); Self::parse(&mut iter) } } pub type CFormatString = CFormatStrOrBytes; impl FromStr for CFormatString { type Err = CFormatError; fn from_str(text: &str) -> Result { let mut iter = text.chars().enumerate().peekable(); Self::parse(&mut iter) } } impl CFormatString { pub(crate) fn parse>( iter: &mut ParseIter, ) -> Result { let mut parts = vec![]; let mut literal = String::new(); let mut part_index = 0; while let Some((index, c)) = iter.next() { if c == '%' { if let Some(&(_, second)) = iter.peek() { if second == '%' { iter.next().unwrap(); literal.push('%'); continue; } else { if !literal.is_empty() { parts.push(( part_index, CFormatPart::Literal(std::mem::take(&mut literal)), )); } let spec = CFormatSpec::parse(iter).map_err(|err| CFormatError { typ: err.0, index: err.1, })?; parts.push((index, CFormatPart::Spec(spec))); if let Some(&(index, _)) = iter.peek() { part_index = index; } } } else { return Err(CFormatError { typ: CFormatErrorType::IncompleteFormat, index: index + 1, }); } } else { literal.push(c); } } if !literal.is_empty() { parts.push((part_index, CFormatPart::Literal(literal))); } Ok(Self { parts }) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_fill_and_align() { assert_eq!( "%10s" .parse::() .unwrap() .format_string("test".to_owned()), " test".to_owned() ); assert_eq!( "%-10s" .parse::() .unwrap() .format_string("test".to_owned()), "test ".to_owned() ); assert_eq!( "%#10x" .parse::() .unwrap() .format_number(&BigInt::from(0x1337)), " 0x1337".to_owned() ); assert_eq!( "%-#10x" .parse::() .unwrap() .format_number(&BigInt::from(0x1337)), "0x1337 ".to_owned() ); } #[test] fn test_parse_key() { let expected = Ok(CFormatSpec { mapping_key: Some("amount".to_owned()), format_type: CFormatType::Number(CNumberType::Decimal), format_char: 'd', min_field_width: None, precision: None, flags: CConversionFlags::empty(), }); assert_eq!("%(amount)d".parse::(), expected); let expected = Ok(CFormatSpec { mapping_key: Some("m((u(((l((((ti))))p)))l))e".to_owned()), format_type: CFormatType::Number(CNumberType::Decimal), format_char: 'd', min_field_width: None, precision: None, flags: CConversionFlags::empty(), }); assert_eq!( "%(m((u(((l((((ti))))p)))l))e)d".parse::(), expected ); } #[test] fn test_format_parse_key_fail() { assert_eq!( "%(aged".parse::(), Err(CFormatError { typ: CFormatErrorType::UnmatchedKeyParentheses, index: 1 }) ); } #[test] fn test_format_parse_type_fail() { assert_eq!( "Hello %n".parse::(), Err(CFormatError { typ: CFormatErrorType::UnsupportedFormatChar('n'), index: 7 }) ); } #[test] fn test_incomplete_format_fail() { assert_eq!( "Hello %".parse::(), Err(CFormatError { typ: CFormatErrorType::IncompleteFormat, index: 7 }) ); } #[test] fn test_parse_flags() { let expected = Ok(CFormatSpec { format_type: CFormatType::Number(CNumberType::Decimal), format_char: 'd', min_field_width: Some(CFormatQuantity::Amount(10)), precision: None, mapping_key: None, flags: CConversionFlags::all(), }); let parsed = "% 0 -+++###10d".parse::(); assert_eq!(parsed, expected); assert_eq!( parsed.unwrap().format_number(&BigInt::from(12)), "+12 ".to_owned() ); } #[test] fn test_parse_and_format_string() { assert_eq!( "%5.4s" .parse::() .unwrap() .format_string("Hello, World!".to_owned()), " Hell".to_owned() ); assert_eq!( "%-5.4s" .parse::() .unwrap() .format_string("Hello, World!".to_owned()), "Hell ".to_owned() ); } #[test] fn test_parse_and_format_unicode_string() { assert_eq!( "%.2s" .parse::() .unwrap() .format_string("❤❤❤❤❤❤❤❤".to_owned()), "❤❤".to_owned() ); } #[test] fn test_parse_and_format_number() { assert_eq!( "%05d" .parse::() .unwrap() .format_number(&BigInt::from(27)), "00027".to_owned() ); assert_eq!( "%+05d" .parse::() .unwrap() .format_number(&BigInt::from(27)), "+0027".to_owned() ); assert_eq!( "%-d" .parse::() .unwrap() .format_number(&BigInt::from(-27)), "-27".to_owned() ); assert_eq!( "% d" .parse::() .unwrap() .format_number(&BigInt::from(27)), " 27".to_owned() ); assert_eq!( "% d" .parse::() .unwrap() .format_number(&BigInt::from(-27)), "-27".to_owned() ); assert_eq!( "%08x" .parse::() .unwrap() .format_number(&BigInt::from(0x1337)), "00001337".to_owned() ); assert_eq!( "%#010x" .parse::() .unwrap() .format_number(&BigInt::from(0x1337)), "0x00001337".to_owned() ); assert_eq!( "%-#010x" .parse::() .unwrap() .format_number(&BigInt::from(0x1337)), "0x1337 ".to_owned() ); } #[test] fn test_parse_and_format_float() { assert_eq!( "%f".parse::().unwrap().format_float(1.2345), "1.234500" ); assert_eq!( "%+f".parse::().unwrap().format_float(1.2345), "+1.234500" ); assert_eq!( "% f".parse::().unwrap().format_float(1.2345), " 1.234500" ); assert_eq!( "%f".parse::().unwrap().format_float(-1.2345), "-1.234500" ); assert_eq!( "%f".parse::() .unwrap() .format_float(1.2345678901), "1.234568" ); } #[test] fn test_format_parse() { let fmt = "Hello, my name is %s and I'm %d years old"; let expected = Ok(CFormatString { parts: vec![ (0, CFormatPart::Literal("Hello, my name is ".to_owned())), ( 18, CFormatPart::Spec(CFormatSpec { format_type: CFormatType::String(CFormatPreconversor::Str), format_char: 's', mapping_key: None, min_field_width: None, precision: None, flags: CConversionFlags::empty(), }), ), (20, CFormatPart::Literal(" and I'm ".to_owned())), ( 29, CFormatPart::Spec(CFormatSpec { format_type: CFormatType::Number(CNumberType::Decimal), format_char: 'd', mapping_key: None, min_field_width: None, precision: None, flags: CConversionFlags::empty(), }), ), (31, CFormatPart::Literal(" years old".to_owned())), ], }); let result = fmt.parse::(); assert_eq!( result, expected, "left = {result:#?} \n\n\n right = {expected:#?}" ); } } rustpython-common-0.2.0/src/char.rs000064400000000000000000000010351046102023000154210ustar 00000000000000use unic_ucd_category::GeneralCategory; /// According to python following categories aren't printable: /// * Cc (Other, Control) /// * Cf (Other, Format) /// * Cs (Other, Surrogate) /// * Co (Other, Private Use) /// * Cn (Other, Not Assigned) /// * Zl Separator, Line ('\u2028', LINE SEPARATOR) /// * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR) /// * Zs (Separator, Space) other than ASCII space('\x20'). pub fn is_printable(c: char) -> bool { let cat = GeneralCategory::of(c); !(cat.is_other() || cat.is_separator()) } rustpython-common-0.2.0/src/cmp.rs000064400000000000000000000032361046102023000152700ustar 00000000000000use volatile::Volatile; /// Compare 2 byte slices in a way that ensures that the timing of the operation can't be used to /// glean any information about the data. #[inline(never)] #[cold] pub fn timing_safe_cmp(a: &[u8], b: &[u8]) -> bool { // we use raw pointers here to keep faithful to the C implementation and // to try to avoid any optimizations rustc might do with slices let len_a = a.len(); let a = a.as_ptr(); let len_b = b.len(); let b = b.as_ptr(); /* The volatile type declarations make sure that the compiler has no * chance to optimize and fold the code in any way that may change * the timing. */ let mut result: u8 = 0; /* loop count depends on length of b */ let length: Volatile = Volatile::new(len_b); let mut left: Volatile<*const u8> = Volatile::new(std::ptr::null()); let mut right: Volatile<*const u8> = Volatile::new(b); /* don't use else here to keep the amount of CPU instructions constant, * volatile forces re-evaluation * */ if len_a == length.read() { left.write(Volatile::new(a).read()); result = 0; } if len_a != length.read() { left.write(b); result = 1; } for _ in 0..length.read() { let l = left.read(); left.write(l.wrapping_add(1)); let r = right.read(); right.write(r.wrapping_add(1)); // safety: the 0..length range will always be either: // * as long as the length of both a and b, if len_a and len_b are equal // * as long as b, and both `left` and `right` are b result |= unsafe { l.read_volatile() ^ r.read_volatile() }; } result == 0 } rustpython-common-0.2.0/src/crt_fd.rs000064400000000000000000000065671046102023000157640ustar 00000000000000//! A module implementing an io type backed by the C runtime's file descriptors, i.e. what's //! returned from libc::open, even on windows. use std::{cmp, ffi, io}; #[cfg(windows)] use libc::commit as fsync; #[cfg(windows)] extern "C" { #[link_name = "_chsize_s"] fn ftruncate(fd: i32, len: i64) -> i32; } #[cfg(not(windows))] use libc::{fsync, ftruncate}; // this is basically what CPython has for Py_off_t; windows uses long long // for offsets, other platforms just use off_t #[cfg(not(windows))] pub type Offset = libc::off_t; #[cfg(windows)] pub type Offset = libc::c_longlong; #[inline] fn cvt(ret: I, f: impl FnOnce(I) -> T) -> io::Result { if ret < I::zero() { Err(crate::os::errno()) } else { Ok(f(ret)) } } const MAX_RW: usize = if cfg!(any(windows, target_vendor = "apple")) { i32::MAX as usize } else { isize::MAX as usize }; #[derive(Copy, Clone, PartialEq, Eq)] #[repr(transparent)] pub struct Fd(pub i32); impl Fd { pub fn open(path: &ffi::CStr, flags: i32, mode: i32) -> io::Result { cvt(unsafe { libc::open(path.as_ptr(), flags, mode) }, Fd) } #[cfg(windows)] pub fn wopen(path: &widestring::WideCStr, flags: i32, mode: i32) -> io::Result { cvt( unsafe { suppress_iph!(libc::wopen(path.as_ptr(), flags, mode)) }, Fd, ) } #[cfg(all(any(unix, target_os = "wasi"), not(target_os = "redox")))] pub fn openat(&self, path: &ffi::CStr, flags: i32, mode: i32) -> io::Result { cvt( unsafe { libc::openat(self.0, path.as_ptr(), flags, mode) }, Fd, ) } pub fn fsync(&self) -> io::Result<()> { cvt(unsafe { suppress_iph!(fsync(self.0)) }, drop) } pub fn close(&self) -> io::Result<()> { cvt(unsafe { suppress_iph!(libc::close(self.0)) }, drop) } pub fn ftruncate(&self, len: Offset) -> io::Result<()> { cvt(unsafe { suppress_iph!(ftruncate(self.0, len)) }, drop) } #[cfg(windows)] pub fn to_raw_handle(&self) -> io::Result { extern "C" { fn _get_osfhandle(fd: i32) -> libc::intptr_t; } let handle = unsafe { suppress_iph!(_get_osfhandle(self.0)) }; if handle == -1 { Err(io::Error::last_os_error()) } else { Ok(handle as _) } } } impl io::Write for &Fd { fn write(&mut self, buf: &[u8]) -> io::Result { let count = cmp::min(buf.len(), MAX_RW); cvt( unsafe { suppress_iph!(libc::write(self.0, buf.as_ptr() as _, count as _)) }, |i| i as usize, ) } #[inline] fn flush(&mut self) -> io::Result<()> { Ok(()) } } impl io::Write for Fd { #[inline] fn write(&mut self, buf: &[u8]) -> io::Result { (&*self).write(buf) } #[inline] fn flush(&mut self) -> io::Result<()> { (&*self).flush() } } impl io::Read for &Fd { fn read(&mut self, buf: &mut [u8]) -> io::Result { let count = cmp::min(buf.len(), MAX_RW); cvt( unsafe { suppress_iph!(libc::read(self.0, buf.as_mut_ptr() as _, count as _)) }, |i| i as usize, ) } } impl io::Read for Fd { #[inline] fn read(&mut self, buf: &mut [u8]) -> io::Result { (&*self).read(buf) } } rustpython-common-0.2.0/src/encodings.rs000064400000000000000000000270431046102023000164640ustar 00000000000000use std::ops::Range; pub type EncodeErrorResult = Result<(EncodeReplace, usize), E>; pub type DecodeErrorResult = Result<(S, Option, usize), E>; pub trait StrBuffer: AsRef { fn is_ascii(&self) -> bool { self.as_ref().is_ascii() } } pub trait ErrorHandler { type Error; type StrBuf: StrBuffer; type BytesBuf: AsRef<[u8]>; fn handle_encode_error( &self, data: &str, char_range: Range, reason: &str, ) -> EncodeErrorResult; fn handle_decode_error( &self, data: &[u8], byte_range: Range, reason: &str, ) -> DecodeErrorResult; fn error_oob_restart(&self, i: usize) -> Self::Error; fn error_encoding(&self, data: &str, char_range: Range, reason: &str) -> Self::Error; } pub enum EncodeReplace { Str(S), Bytes(B), } struct DecodeError<'a> { valid_prefix: &'a str, rest: &'a [u8], err_len: Option, } /// # Safety /// `v[..valid_up_to]` must be valid utf8 unsafe fn make_decode_err(v: &[u8], valid_up_to: usize, err_len: Option) -> DecodeError<'_> { let valid_prefix = core::str::from_utf8_unchecked(v.get_unchecked(..valid_up_to)); let rest = v.get_unchecked(valid_up_to..); DecodeError { valid_prefix, rest, err_len, } } enum HandleResult<'a> { Done, Error { err_len: Option, reason: &'a str, }, } fn decode_utf8_compatible( data: &[u8], errors: &E, decode: DecodeF, handle_error: ErrF, ) -> Result<(String, usize), E::Error> where DecodeF: Fn(&[u8]) -> Result<&str, DecodeError<'_>>, ErrF: Fn(&[u8], Option) -> HandleResult<'_>, { if data.is_empty() { return Ok((String::new(), 0)); } // we need to coerce the lifetime to that of the function body rather than the // anonymous input lifetime, so that we can assign it data borrowed from data_from_err let mut data = data; let mut data_from_err: E::BytesBuf; let mut out = String::with_capacity(data.len()); let mut remaining_index = 0; let mut remaining_data = data; loop { match decode(remaining_data) { Ok(decoded) => { out.push_str(decoded); remaining_index += decoded.len(); break; } Err(e) => { out.push_str(e.valid_prefix); match handle_error(e.rest, e.err_len) { HandleResult::Done => { remaining_index += e.valid_prefix.len(); break; } HandleResult::Error { err_len, reason } => { let err_idx = remaining_index + e.valid_prefix.len(); let err_range = err_idx..err_len.map_or_else(|| data.len(), |len| err_idx + len); let (replace, new_data, restart) = errors.handle_decode_error(data, err_range, reason)?; out.push_str(replace.as_ref()); if let Some(new_data) = new_data { data_from_err = new_data; data = data_from_err.as_ref(); } remaining_data = data .get(restart..) .ok_or_else(|| errors.error_oob_restart(restart))?; remaining_index = restart; continue; } } } } } Ok((out, remaining_index)) } pub mod utf8 { use super::*; pub const ENCODING_NAME: &str = "utf-8"; #[inline] pub fn encode(s: &str, _errors: &E) -> Result, E::Error> { Ok(s.as_bytes().to_vec()) } pub fn decode( data: &[u8], errors: &E, final_decode: bool, ) -> Result<(String, usize), E::Error> { decode_utf8_compatible( data, errors, |v| { core::str::from_utf8(v).map_err(|e| { // SAFETY: as specified in valid_up_to's documentation, input[..e.valid_up_to()] // is valid utf8 unsafe { make_decode_err(v, e.valid_up_to(), e.error_len()) } }) }, |rest, err_len| { let first_err = rest[0]; if matches!(first_err, 0x80..=0xc1 | 0xf5..=0xff) { HandleResult::Error { err_len: Some(1), reason: "invalid start byte", } } else if err_len.is_none() { // error_len() == None means unexpected eof if final_decode { HandleResult::Error { err_len, reason: "unexpected end of data", } } else { HandleResult::Done } } else if !final_decode && matches!(rest, [0xed, 0xa0..=0xbf]) { // truncated surrogate HandleResult::Done } else { HandleResult::Error { err_len, reason: "invalid continuation byte", } } }, ) } } pub mod latin_1 { use super::*; pub const ENCODING_NAME: &str = "latin-1"; const ERR_REASON: &str = "ordinal not in range(256)"; #[inline] pub fn encode(s: &str, errors: &E) -> Result, E::Error> { let full_data = s; let mut data = s; let mut char_data_index = 0; let mut out = Vec::::new(); loop { match data .char_indices() .enumerate() .find(|(_, (_, c))| !c.is_ascii()) { None => { out.extend_from_slice(data.as_bytes()); break; } Some((char_i, (byte_i, ch))) => { out.extend_from_slice(&data.as_bytes()[..byte_i]); let char_start = char_data_index + char_i; if (ch as u32) <= 255 { out.push(ch as u8); let char_restart = char_start + 1; data = crate::str::try_get_chars(full_data, char_restart..) .ok_or_else(|| errors.error_oob_restart(char_restart))?; char_data_index = char_restart; } else { // number of non-latin_1 chars between the first non-latin_1 char and the next latin_1 char let non_latin_1_run_length = data[byte_i..] .chars() .take_while(|c| (*c as u32) > 255) .count(); let char_range = char_start..char_start + non_latin_1_run_length; let (replace, char_restart) = errors.handle_encode_error( full_data, char_range.clone(), ERR_REASON, )?; match replace { EncodeReplace::Str(s) => { if s.as_ref().chars().any(|c| (c as u32) > 255) { return Err( errors.error_encoding(full_data, char_range, ERR_REASON) ); } out.extend_from_slice(s.as_ref().as_bytes()); } EncodeReplace::Bytes(b) => { out.extend_from_slice(b.as_ref()); } } data = crate::str::try_get_chars(full_data, char_restart..) .ok_or_else(|| errors.error_oob_restart(char_restart))?; char_data_index = char_restart; } continue; } } } Ok(out) } pub fn decode(data: &[u8], _errors: &E) -> Result<(String, usize), E::Error> { let out: String = data.iter().map(|c| *c as char).collect(); let out_len = out.len(); Ok((out, out_len)) } } pub mod ascii { use super::*; use ::ascii::AsciiStr; pub const ENCODING_NAME: &str = "ascii"; const ERR_REASON: &str = "ordinal not in range(128)"; #[inline] pub fn encode(s: &str, errors: &E) -> Result, E::Error> { let full_data = s; let mut data = s; let mut char_data_index = 0; let mut out = Vec::::new(); loop { match data .char_indices() .enumerate() .find(|(_, (_, c))| !c.is_ascii()) { None => { out.extend_from_slice(data.as_bytes()); break; } Some((char_i, (byte_i, _))) => { out.extend_from_slice(&data.as_bytes()[..byte_i]); let char_start = char_data_index + char_i; // number of non-ascii chars between the first non-ascii char and the next ascii char let non_ascii_run_length = data[byte_i..].chars().take_while(|c| !c.is_ascii()).count(); let char_range = char_start..char_start + non_ascii_run_length; let (replace, char_restart) = errors.handle_encode_error(full_data, char_range.clone(), ERR_REASON)?; match replace { EncodeReplace::Str(s) => { if !s.is_ascii() { return Err( errors.error_encoding(full_data, char_range, ERR_REASON) ); } out.extend_from_slice(s.as_ref().as_bytes()); } EncodeReplace::Bytes(b) => { out.extend_from_slice(b.as_ref()); } } data = crate::str::try_get_chars(full_data, char_restart..) .ok_or_else(|| errors.error_oob_restart(char_restart))?; char_data_index = char_restart; continue; } } } Ok(out) } pub fn decode(data: &[u8], errors: &E) -> Result<(String, usize), E::Error> { decode_utf8_compatible( data, errors, |v| { AsciiStr::from_ascii(v).map(|s| s.as_str()).map_err(|e| { // SAFETY: as specified in valid_up_to's documentation, input[..e.valid_up_to()] // is valid ascii & therefore valid utf8 unsafe { make_decode_err(v, e.valid_up_to(), Some(1)) } }) }, |_rest, err_len| HandleResult::Error { err_len, reason: ERR_REASON, }, ) } } rustpython-common-0.2.0/src/float_ops.rs000064400000000000000000000365761046102023000165140ustar 00000000000000use num_bigint::{BigInt, ToBigInt}; use num_traits::{Float, Signed, ToPrimitive, Zero}; use std::f64; pub fn ufrexp(value: f64) -> (f64, i32) { if 0.0 == value { (0.0, 0i32) } else { let bits = value.to_bits(); let exponent: i32 = ((bits >> 52) & 0x7ff) as i32 - 1022; let mantissa_bits = bits & (0x000f_ffff_ffff_ffff) | (1022 << 52); (f64::from_bits(mantissa_bits), exponent) } } /// Equate an integer to a float. /// /// Returns true if and only if, when converted to each others types, both are equal. /// /// # Examples /// /// ``` /// use num_bigint::BigInt; /// use rustpython_common::float_ops::eq_int; /// let a = 1.0f64; /// let b = BigInt::from(1); /// let c = 2.0f64; /// assert!(eq_int(a, &b)); /// assert!(!eq_int(c, &b)); /// ``` /// pub fn eq_int(value: f64, other: &BigInt) -> bool { if let (Some(self_int), Some(other_float)) = (value.to_bigint(), other.to_f64()) { value == other_float && self_int == *other } else { false } } pub fn lt_int(value: f64, other_int: &BigInt) -> bool { match (value.to_bigint(), other_int.to_f64()) { (Some(self_int), Some(other_float)) => value < other_float || self_int < *other_int, // finite float, other_int too big for float, // the result depends only on other_int’s sign (Some(_), None) => other_int.is_positive(), // infinite float must be bigger or lower than any int, depending on its sign _ if value.is_infinite() => value.is_sign_negative(), // NaN, always false _ => false, } } pub fn gt_int(value: f64, other_int: &BigInt) -> bool { match (value.to_bigint(), other_int.to_f64()) { (Some(self_int), Some(other_float)) => value > other_float || self_int > *other_int, // finite float, other_int too big for float, // the result depends only on other_int’s sign (Some(_), None) => other_int.is_negative(), // infinite float must be bigger or lower than any int, depending on its sign _ if value.is_infinite() => value.is_sign_positive(), // NaN, always false _ => false, } } pub fn parse_str(literal: &str) -> Option { parse_inner(literal.trim().as_bytes()) } pub fn parse_bytes(literal: &[u8]) -> Option { parse_inner(trim_slice(literal, |b| b.is_ascii_whitespace())) } fn trim_slice(v: &[T], mut trim: impl FnMut(&T) -> bool) -> &[T] { let mut it = v.iter(); // it.take_while_ref(&mut trim).for_each(drop); // hmm.. `&mut slice::Iter<_>` is not `Clone` // it.by_ref().rev().take_while_ref(&mut trim).for_each(drop); while it.clone().next().map_or(false, &mut trim) { it.next(); } while it.clone().next_back().map_or(false, &mut trim) { it.next_back(); } it.as_slice() } fn parse_inner(literal: &[u8]) -> Option { use lexical_parse_float::{ format::PYTHON3_LITERAL, FromLexicalWithOptions, NumberFormatBuilder, Options, }; // lexical-core's format::PYTHON_STRING is inaccurate const PYTHON_STRING: u128 = NumberFormatBuilder::rebuild(PYTHON3_LITERAL) .no_special(false) .build(); f64::from_lexical_with_options::(literal, &Options::new()).ok() } pub fn is_integer(v: f64) -> bool { (v - v.round()).abs() < f64::EPSILON } #[derive(Debug)] pub enum Case { Lower, Upper, } fn format_nan(case: Case) -> String { let nan = match case { Case::Lower => "nan", Case::Upper => "NAN", }; nan.to_string() } fn format_inf(case: Case) -> String { let inf = match case { Case::Lower => "inf", Case::Upper => "INF", }; inf.to_string() } pub fn format_fixed(precision: usize, magnitude: f64, case: Case) -> String { match magnitude { magnitude if magnitude.is_finite() => format!("{magnitude:.precision$}"), magnitude if magnitude.is_nan() => format_nan(case), magnitude if magnitude.is_infinite() => format_inf(case), _ => "".to_string(), } } // Formats floats into Python style exponent notation, by first formatting in Rust style // exponent notation (`1.0000e0`), then convert to Python style (`1.0000e+00`). pub fn format_exponent(precision: usize, magnitude: f64, case: Case) -> String { match magnitude { magnitude if magnitude.is_finite() => { let r_exp = format!("{magnitude:.precision$e}"); let mut parts = r_exp.splitn(2, 'e'); let base = parts.next().unwrap(); let exponent = parts.next().unwrap().parse::().unwrap(); let e = match case { Case::Lower => 'e', Case::Upper => 'E', }; format!("{base}{e}{exponent:+#03}") } magnitude if magnitude.is_nan() => format_nan(case), magnitude if magnitude.is_infinite() => format_inf(case), _ => "".to_string(), } } /// If s represents a floating point value, trailing zeros and a possibly trailing /// decimal point will be removed. /// This function does NOT work with decimal commas. fn maybe_remove_trailing_redundant_chars(s: String, alternate_form: bool) -> String { if !alternate_form && s.contains('.') { // only truncate floating point values when not in alternate form let s = remove_trailing_zeros(s); remove_trailing_decimal_point(s) } else { s } } fn remove_trailing_zeros(s: String) -> String { let mut s = s; while s.ends_with('0') { s.pop(); } s } fn remove_trailing_decimal_point(s: String) -> String { let mut s = s; if s.ends_with('.') { s.pop(); } s } pub fn format_general( precision: usize, magnitude: f64, case: Case, alternate_form: bool, always_shows_fract: bool, ) -> String { match magnitude { magnitude if magnitude.is_finite() => { let r_exp = format!("{:.*e}", precision.saturating_sub(1), magnitude); let mut parts = r_exp.splitn(2, 'e'); let base = parts.next().unwrap(); let exponent = parts.next().unwrap().parse::().unwrap(); if exponent < -4 || exponent + (always_shows_fract as i64) >= (precision as i64) { let e = match case { Case::Lower => 'e', Case::Upper => 'E', }; let base = maybe_remove_trailing_redundant_chars( format!("{:.*}", precision + 1, base), alternate_form, ); format!("{base}{e}{exponent:+#03}") } else { let precision = (precision as i64) - 1 - exponent; let precision = precision as usize; maybe_remove_trailing_redundant_chars( format!("{magnitude:.precision$}"), alternate_form, ) } } magnitude if magnitude.is_nan() => format_nan(case), magnitude if magnitude.is_infinite() => format_inf(case), _ => "".to_string(), } } pub fn to_string(value: f64) -> String { let lit = format!("{value:e}"); if let Some(position) = lit.find('e') { let significand = &lit[..position]; let exponent = &lit[position + 1..]; let exponent = exponent.parse::().unwrap(); if exponent < 16 && exponent > -5 { if is_integer(value) { format!("{value:.1?}") } else { value.to_string() } } else { format!("{significand}e{exponent:+#03}") } } else { let mut s = value.to_string(); s.make_ascii_lowercase(); s } } pub fn from_hex(s: &str) -> Option { if let Ok(f) = hexf_parse::parse_hexf64(s, false) { return Some(f); } match s.to_ascii_lowercase().as_str() { "nan" | "+nan" | "-nan" => Some(f64::NAN), "inf" | "infinity" | "+inf" | "+infinity" => Some(f64::INFINITY), "-inf" | "-infinity" => Some(f64::NEG_INFINITY), value => { let mut hex = String::with_capacity(value.len()); let has_0x = value.contains("0x"); let has_p = value.contains('p'); let has_dot = value.contains('.'); let mut start = 0; if !has_0x && value.starts_with('-') { hex.push_str("-0x"); start += 1; } else if !has_0x { hex.push_str("0x"); if value.starts_with('+') { start += 1; } } for (index, ch) in value.chars().enumerate() { if ch == 'p' { if has_dot { hex.push('p'); } else { hex.push_str(".p"); } } else if index >= start { hex.push(ch); } } if !has_p && has_dot { hex.push_str("p0"); } else if !has_p && !has_dot { hex.push_str(".p0") } hexf_parse::parse_hexf64(hex.as_str(), false).ok() } } } pub fn to_hex(value: f64) -> String { let (mantissa, exponent, sign) = value.integer_decode(); let sign_fmt = if sign < 0 { "-" } else { "" }; match value { value if value.is_zero() => format!("{sign_fmt}0x0.0p+0"), value if value.is_infinite() => format!("{sign_fmt}inf"), value if value.is_nan() => "nan".to_owned(), _ => { const BITS: i16 = 52; const FRACT_MASK: u64 = 0xf_ffff_ffff_ffff; format!( "{}{:#x}.{:013x}p{:+}", sign_fmt, mantissa >> BITS, mantissa & FRACT_MASK, exponent + BITS ) } } } pub fn div(v1: f64, v2: f64) -> Option { if v2 != 0.0 { Some(v1 / v2) } else { None } } pub fn mod_(v1: f64, v2: f64) -> Option { if v2 != 0.0 { let mut val = v1 % v2; if (val < 0.0) != (v2 < 0.0) { val += v2; } Some(val) } else { None } } pub fn floordiv(v1: f64, v2: f64) -> Option { if v2 != 0.0 { Some((v1 / v2).floor()) } else { None } } pub fn divmod(v1: f64, v2: f64) -> Option<(f64, f64)> { if v2 != 0.0 { let mut m = v1 % v2; let mut d = (v1 - m) / v2; if v2.is_sign_negative() != m.is_sign_negative() { m += v2; d -= 1.0; } Some((d, m)) } else { None } } // nextafter algorithm based off of https://gitlab.com/bronsonbdevost/next_afterf #[allow(clippy::float_cmp)] pub fn nextafter(x: f64, y: f64) -> f64 { if x == y { y } else if x.is_nan() || y.is_nan() { f64::NAN } else if x >= f64::INFINITY { f64::MAX } else if x <= f64::NEG_INFINITY { f64::MIN } else if x == 0.0 { f64::from_bits(1).copysign(y) } else { // next x after 0 if y is farther from 0 than x, otherwise next towards 0 // the sign is a separate bit in floats, so bits+1 moves away from 0 no matter the float let b = x.to_bits(); let bits = if (y > x) == (x > 0.0) { b + 1 } else { b - 1 }; let ret = f64::from_bits(bits); if ret == 0.0 { ret.copysign(x) } else { ret } } } pub fn ulp(x: f64) -> f64 { if x.is_nan() { return x; } let x = x.abs(); let x2 = nextafter(x, f64::INFINITY); if x2.is_infinite() { // special case: x is the largest positive representable float let x2 = nextafter(x, f64::NEG_INFINITY); x - x2 } else { x2 - x } } pub fn round_float_digits(x: f64, ndigits: i32) -> Option { let float = if ndigits.is_zero() { let fract = x.fract(); if (fract.abs() - 0.5).abs() < f64::EPSILON { if x.trunc() % 2.0 == 0.0 { x - fract } else { x + fract } } else { x.round() } } else { const NDIGITS_MAX: i32 = ((f64::MANTISSA_DIGITS as i32 - f64::MIN_EXP) as f64 * f64::consts::LOG10_2) as i32; const NDIGITS_MIN: i32 = -(((f64::MAX_EXP + 1) as f64 * f64::consts::LOG10_2) as i32); if ndigits > NDIGITS_MAX { x } else if ndigits < NDIGITS_MIN { 0.0f64.copysign(x) } else { let (y, pow1, pow2) = if ndigits >= 0 { // according to cpython: pow1 and pow2 are each safe from overflow, but // pow1*pow2 ~= pow(10.0, ndigits) might overflow let (pow1, pow2) = if ndigits > 22 { (10.0.powf((ndigits - 22) as f64), 1e22) } else { (10.0.powf(ndigits as f64), 1.0) }; let y = (x * pow1) * pow2; if !y.is_finite() { return Some(x); } (y, pow1, Some(pow2)) } else { let pow1 = 10.0.powf((-ndigits) as f64); (x / pow1, pow1, None) }; let z = y.round(); #[allow(clippy::float_cmp)] let z = if (y - z).abs() == 0.5 { 2.0 * (y / 2.0).round() } else { z }; let z = if let Some(pow2) = pow2 { // ndigits >= 0 (z / pow2) / pow1 } else { z * pow1 }; if !z.is_finite() { // overflow return None; } z } }; Some(float) } #[test] fn test_to_hex() { use rand::Rng; for _ in 0..20000 { let bytes = rand::thread_rng().gen::<[u64; 1]>(); let f = f64::from_bits(bytes[0]); if !f.is_finite() { continue; } let hex = to_hex(f); // println!("{} -> {}", f, hex); let roundtrip = hexf_parse::parse_hexf64(&hex, false).unwrap(); // println!(" -> {}", roundtrip); assert!(f == roundtrip, "{} {} {}", f, hex, roundtrip); } } #[test] fn test_remove_trailing_zeros() { assert!(remove_trailing_zeros(String::from("100")) == *"1"); assert!(remove_trailing_zeros(String::from("100.00")) == *"100."); // leave leading zeros untouched assert!(remove_trailing_zeros(String::from("001")) == *"001"); // leave strings untouched if they don't end with 0 assert!(remove_trailing_zeros(String::from("101")) == *"101"); } #[test] fn test_remove_trailing_decimal_point() { assert!(remove_trailing_decimal_point(String::from("100.")) == *"100"); assert!(remove_trailing_decimal_point(String::from("1.")) == *"1"); // leave leading decimal points untouched assert!(remove_trailing_decimal_point(String::from(".5")) == *".5"); } #[test] fn test_maybe_remove_trailing_redundant_chars() { assert!(maybe_remove_trailing_redundant_chars(String::from("100."), true) == *"100."); assert!(maybe_remove_trailing_redundant_chars(String::from("100."), false) == *"100"); assert!(maybe_remove_trailing_redundant_chars(String::from("1."), false) == *"1"); assert!(maybe_remove_trailing_redundant_chars(String::from("10.0"), false) == *"10"); // don't truncate integers assert!(maybe_remove_trailing_redundant_chars(String::from("1000"), false) == *"1000"); } rustpython-common-0.2.0/src/format.rs000064400000000000000000001155571046102023000160130ustar 00000000000000use crate::{float_ops, str::BorrowedStr}; use itertools::{Itertools, PeekingNext}; use num_bigint::{BigInt, Sign}; use num_traits::{cast::ToPrimitive, Signed}; use std::{cmp, str::FromStr}; trait FormatParse { fn parse(text: &str) -> (Option, &str) where Self: Sized; } #[derive(Debug, Copy, Clone, PartialEq)] pub enum FormatPreconversor { Str, Repr, Ascii, Bytes, } impl FormatParse for FormatPreconversor { fn parse(text: &str) -> (Option, &str) { let Some(preconversor) = Self::from_string(text) else { return (None, text); }; let mut chars = text.chars(); chars.next(); // Consume the bang chars.next(); // Consume one r,s,a char (Some(preconversor), chars.as_str()) } } impl FormatPreconversor { pub fn from_char(c: char) -> Option { match c { 's' => Some(FormatPreconversor::Str), 'r' => Some(FormatPreconversor::Repr), 'a' => Some(FormatPreconversor::Ascii), 'b' => Some(FormatPreconversor::Bytes), _ => None, } } fn from_string(text: &str) -> Option { let mut chars = text.chars(); if chars.next() != Some('!') { return None; } FormatPreconversor::from_char(chars.next()?) } } #[derive(Debug, Copy, Clone, PartialEq)] pub enum FormatAlign { Left, Right, AfterSign, Center, } impl FormatAlign { fn from_char(c: char) -> Option { match c { '<' => Some(FormatAlign::Left), '>' => Some(FormatAlign::Right), '=' => Some(FormatAlign::AfterSign), '^' => Some(FormatAlign::Center), _ => None, } } } impl FormatParse for FormatAlign { fn parse(text: &str) -> (Option, &str) { let mut chars = text.chars(); if let Some(maybe_align) = chars.next().and_then(Self::from_char) { (Some(maybe_align), chars.as_str()) } else { (None, text) } } } #[derive(Debug, Copy, Clone, PartialEq)] pub enum FormatSign { Plus, Minus, MinusOrSpace, } impl FormatParse for FormatSign { fn parse(text: &str) -> (Option, &str) { let mut chars = text.chars(); match chars.next() { Some('-') => (Some(Self::Minus), chars.as_str()), Some('+') => (Some(Self::Plus), chars.as_str()), Some(' ') => (Some(Self::MinusOrSpace), chars.as_str()), _ => (None, text), } } } #[derive(Debug, PartialEq)] pub enum FormatGrouping { Comma, Underscore, } impl FormatParse for FormatGrouping { fn parse(text: &str) -> (Option, &str) { let mut chars = text.chars(); match chars.next() { Some('_') => (Some(Self::Underscore), chars.as_str()), Some(',') => (Some(Self::Comma), chars.as_str()), _ => (None, text), } } } #[derive(Debug, PartialEq)] pub enum FormatType { String, Binary, Character, Decimal, Octal, HexLower, HexUpper, Number, ExponentLower, ExponentUpper, GeneralFormatLower, GeneralFormatUpper, FixedPointLower, FixedPointUpper, Percentage, } impl From<&FormatType> for char { fn from(from: &FormatType) -> char { match from { FormatType::String => 's', FormatType::Binary => 'b', FormatType::Character => 'c', FormatType::Decimal => 'd', FormatType::Octal => 'o', FormatType::HexLower => 'x', FormatType::HexUpper => 'X', FormatType::Number => 'n', FormatType::ExponentLower => 'e', FormatType::ExponentUpper => 'E', FormatType::GeneralFormatLower => 'g', FormatType::GeneralFormatUpper => 'G', FormatType::FixedPointLower => 'f', FormatType::FixedPointUpper => 'F', FormatType::Percentage => '%', } } } impl FormatParse for FormatType { fn parse(text: &str) -> (Option, &str) { let mut chars = text.chars(); match chars.next() { Some('s') => (Some(Self::String), chars.as_str()), Some('b') => (Some(Self::Binary), chars.as_str()), Some('c') => (Some(Self::Character), chars.as_str()), Some('d') => (Some(Self::Decimal), chars.as_str()), Some('o') => (Some(Self::Octal), chars.as_str()), Some('x') => (Some(Self::HexLower), chars.as_str()), Some('X') => (Some(Self::HexUpper), chars.as_str()), Some('e') => (Some(Self::ExponentLower), chars.as_str()), Some('E') => (Some(Self::ExponentUpper), chars.as_str()), Some('f') => (Some(Self::FixedPointLower), chars.as_str()), Some('F') => (Some(Self::FixedPointUpper), chars.as_str()), Some('g') => (Some(Self::GeneralFormatLower), chars.as_str()), Some('G') => (Some(Self::GeneralFormatUpper), chars.as_str()), Some('n') => (Some(Self::Number), chars.as_str()), Some('%') => (Some(Self::Percentage), chars.as_str()), _ => (None, text), } } } #[derive(Debug, PartialEq)] pub struct FormatSpec { preconversor: Option, fill: Option, align: Option, sign: Option, alternate_form: bool, width: Option, grouping_option: Option, precision: Option, format_type: Option, } fn get_num_digits(text: &str) -> usize { for (index, character) in text.char_indices() { if !character.is_ascii_digit() { return index; } } text.len() } fn parse_fill_and_align(text: &str) -> (Option, Option, &str) { let char_indices: Vec<(usize, char)> = text.char_indices().take(3).collect(); if char_indices.is_empty() { (None, None, text) } else if char_indices.len() == 1 { let (maybe_align, remaining) = FormatAlign::parse(text); (None, maybe_align, remaining) } else { let (maybe_align, remaining) = FormatAlign::parse(&text[char_indices[1].0..]); if maybe_align.is_some() { (Some(char_indices[0].1), maybe_align, remaining) } else { let (only_align, only_align_remaining) = FormatAlign::parse(text); (None, only_align, only_align_remaining) } } } fn parse_number(text: &str) -> Result<(Option, &str), FormatSpecError> { let num_digits: usize = get_num_digits(text); if num_digits == 0 { return Ok((None, text)); } if let Ok(num) = text[..num_digits].parse::() { Ok((Some(num), &text[num_digits..])) } else { // NOTE: this condition is different from CPython Err(FormatSpecError::DecimalDigitsTooMany) } } fn parse_alternate_form(text: &str) -> (bool, &str) { let mut chars = text.chars(); match chars.next() { Some('#') => (true, chars.as_str()), _ => (false, text), } } fn parse_zero(text: &str) -> (bool, &str) { let mut chars = text.chars(); match chars.next() { Some('0') => (true, chars.as_str()), _ => (false, text), } } fn parse_precision(text: &str) -> Result<(Option, &str), FormatSpecError> { let mut chars = text.chars(); Ok(match chars.next() { Some('.') => { let (size, remaining) = parse_number(chars.as_str())?; if let Some(size) = size { if size > i32::MAX as usize { return Err(FormatSpecError::PrecisionTooBig); } (Some(size), remaining) } else { (None, text) } } _ => (None, text), }) } impl FormatSpec { pub fn parse(text: &str) -> Result { // get_integer in CPython let (preconversor, text) = FormatPreconversor::parse(text); let (mut fill, mut align, text) = parse_fill_and_align(text); let (sign, text) = FormatSign::parse(text); let (alternate_form, text) = parse_alternate_form(text); let (zero, text) = parse_zero(text); let (width, text) = parse_number(text)?; let (grouping_option, text) = FormatGrouping::parse(text); let (precision, text) = parse_precision(text)?; let (format_type, text) = FormatType::parse(text); if !text.is_empty() { return Err(FormatSpecError::InvalidFormatSpecifier); } if zero && fill.is_none() { fill.replace('0'); align = align.or(Some(FormatAlign::AfterSign)); } Ok(FormatSpec { preconversor, fill, align, sign, alternate_form, width, grouping_option, precision, format_type, }) } fn compute_fill_string(fill_char: char, fill_chars_needed: i32) -> String { (0..fill_chars_needed) .map(|_| fill_char) .collect::() } fn add_magnitude_separators_for_char( magnitude_str: String, inter: i32, sep: char, disp_digit_cnt: i32, ) -> String { // Don't add separators to the floating decimal point of numbers let mut parts = magnitude_str.splitn(2, '.'); let magnitude_int_str = parts.next().unwrap().to_string(); let dec_digit_cnt = magnitude_str.len() as i32 - magnitude_int_str.len() as i32; let int_digit_cnt = disp_digit_cnt - dec_digit_cnt; let mut result = FormatSpec::separate_integer(magnitude_int_str, inter, sep, int_digit_cnt); if let Some(part) = parts.next() { result.push_str(&format!(".{part}")) } result } fn separate_integer( magnitude_str: String, inter: i32, sep: char, disp_digit_cnt: i32, ) -> String { let magnitude_len = magnitude_str.len() as i32; let offset = (disp_digit_cnt % (inter + 1) == 0) as i32; let disp_digit_cnt = disp_digit_cnt + offset; let pad_cnt = disp_digit_cnt - magnitude_len; if pad_cnt > 0 { // separate with 0 padding let sep_cnt = disp_digit_cnt / (inter + 1); let padding = "0".repeat((pad_cnt - sep_cnt) as usize); let padded_num = format!("{padding}{magnitude_str}"); FormatSpec::insert_separator(padded_num, inter, sep, sep_cnt) } else { // separate without padding let sep_cnt = (magnitude_len - 1) / inter; FormatSpec::insert_separator(magnitude_str, inter, sep, sep_cnt) } } fn insert_separator(mut magnitude_str: String, inter: i32, sep: char, sep_cnt: i32) -> String { let magnitude_len = magnitude_str.len() as i32; for i in 1..sep_cnt + 1 { magnitude_str.insert((magnitude_len - inter * i) as usize, sep); } magnitude_str } fn validate_format(&self, default_format_type: FormatType) -> Result<(), FormatSpecError> { let format_type = self.format_type.as_ref().unwrap_or(&default_format_type); match (&self.grouping_option, format_type) { ( Some(FormatGrouping::Comma), FormatType::String | FormatType::Character | FormatType::Binary | FormatType::Octal | FormatType::HexLower | FormatType::HexUpper | FormatType::Number, ) => { let ch = char::from(format_type); Err(FormatSpecError::UnspecifiedFormat(',', ch)) } ( Some(FormatGrouping::Underscore), FormatType::String | FormatType::Character | FormatType::Number, ) => { let ch = char::from(format_type); Err(FormatSpecError::UnspecifiedFormat('_', ch)) } _ => Ok(()), } } fn get_separator_interval(&self) -> usize { match self.format_type { Some(FormatType::Binary) => 4, Some(FormatType::Decimal) => 3, Some(FormatType::Octal) => 4, Some(FormatType::HexLower) => 4, Some(FormatType::HexUpper) => 4, Some(FormatType::Number) => 3, Some(FormatType::FixedPointLower) | Some(FormatType::FixedPointUpper) => 3, None => 3, _ => panic!("Separators only valid for numbers!"), } } fn add_magnitude_separators(&self, magnitude_str: String, prefix: &str) -> String { match &self.grouping_option { Some(fg) => { let sep = match fg { FormatGrouping::Comma => ',', FormatGrouping::Underscore => '_', }; let inter = self.get_separator_interval().try_into().unwrap(); let magnitude_len = magnitude_str.len(); let width = self.width.unwrap_or(magnitude_len) as i32 - prefix.len() as i32; let disp_digit_cnt = cmp::max(width, magnitude_len as i32); FormatSpec::add_magnitude_separators_for_char( magnitude_str, inter, sep, disp_digit_cnt, ) } None => magnitude_str, } } pub fn format_float(&self, num: f64) -> Result { self.validate_format(FormatType::FixedPointLower)?; let precision = self.precision.unwrap_or(6); let magnitude = num.abs(); let raw_magnitude_str: Result = match self.format_type { Some(FormatType::FixedPointUpper) => Ok(float_ops::format_fixed( precision, magnitude, float_ops::Case::Upper, )), Some(FormatType::FixedPointLower) => Ok(float_ops::format_fixed( precision, magnitude, float_ops::Case::Lower, )), Some(FormatType::Decimal) | Some(FormatType::Binary) | Some(FormatType::Octal) | Some(FormatType::HexLower) | Some(FormatType::HexUpper) | Some(FormatType::String) | Some(FormatType::Character) => { let ch = char::from(self.format_type.as_ref().unwrap()); Err(FormatSpecError::UnknownFormatCode(ch, "float")) } Some(FormatType::Number) => Err(FormatSpecError::NotImplemented('n', "float")), Some(FormatType::GeneralFormatUpper) => { let precision = if precision == 0 { 1 } else { precision }; Ok(float_ops::format_general( precision, magnitude, float_ops::Case::Upper, false, false, )) } Some(FormatType::GeneralFormatLower) => { let precision = if precision == 0 { 1 } else { precision }; Ok(float_ops::format_general( precision, magnitude, float_ops::Case::Lower, false, false, )) } Some(FormatType::ExponentUpper) => Ok(float_ops::format_exponent( precision, magnitude, float_ops::Case::Upper, )), Some(FormatType::ExponentLower) => Ok(float_ops::format_exponent( precision, magnitude, float_ops::Case::Lower, )), Some(FormatType::Percentage) => match magnitude { magnitude if magnitude.is_nan() => Ok("nan%".to_owned()), magnitude if magnitude.is_infinite() => Ok("inf%".to_owned()), _ => Ok(format!("{:.*}%", precision, magnitude * 100.0)), }, None => match magnitude { magnitude if magnitude.is_nan() => Ok("nan".to_owned()), magnitude if magnitude.is_infinite() => Ok("inf".to_owned()), _ => match self.precision { Some(_) => { let precision = self.precision.unwrap_or(magnitude.to_string().len() - 1); Ok(float_ops::format_general( precision, magnitude, float_ops::Case::Lower, false, true, )) } None => Ok(float_ops::to_string(magnitude)), }, }, }; let format_sign = self.sign.unwrap_or(FormatSign::Minus); let sign_str = if num.is_sign_negative() && !num.is_nan() { "-" } else { match format_sign { FormatSign::Plus => "+", FormatSign::Minus => "", FormatSign::MinusOrSpace => " ", } }; let magnitude_str = self.add_magnitude_separators(raw_magnitude_str?, sign_str); self.format_sign_and_align( unsafe { &BorrowedStr::from_ascii_unchecked(magnitude_str.as_bytes()) }, sign_str, FormatAlign::Right, ) } #[inline] fn format_int_radix(&self, magnitude: BigInt, radix: u32) -> Result { match self.precision { Some(_) => Err(FormatSpecError::PrecisionNotAllowed), None => Ok(magnitude.to_str_radix(radix)), } } pub fn format_int(&self, num: &BigInt) -> Result { self.validate_format(FormatType::Decimal)?; let magnitude = num.abs(); let prefix = if self.alternate_form { match self.format_type { Some(FormatType::Binary) => "0b", Some(FormatType::Octal) => "0o", Some(FormatType::HexLower) => "0x", Some(FormatType::HexUpper) => "0X", _ => "", } } else { "" }; let raw_magnitude_str: Result = match self.format_type { Some(FormatType::Binary) => self.format_int_radix(magnitude, 2), Some(FormatType::Decimal) => self.format_int_radix(magnitude, 10), Some(FormatType::Octal) => self.format_int_radix(magnitude, 8), Some(FormatType::HexLower) => self.format_int_radix(magnitude, 16), Some(FormatType::HexUpper) => match self.precision { Some(_) => Err(FormatSpecError::PrecisionNotAllowed), None => { let mut result = magnitude.to_str_radix(16); result.make_ascii_uppercase(); Ok(result) } }, Some(FormatType::Number) => self.format_int_radix(magnitude, 10), Some(FormatType::String) => Err(FormatSpecError::UnknownFormatCode('s', "int")), Some(FormatType::Character) => match (self.sign, self.alternate_form) { (Some(_), _) => Err(FormatSpecError::NotAllowed("Sign")), (_, true) => Err(FormatSpecError::NotAllowed("Alternate form (#)")), (_, _) => match num.to_u32() { Some(n) if n <= 0x10ffff => Ok(std::char::from_u32(n).unwrap().to_string()), Some(_) | None => Err(FormatSpecError::CodeNotInRange), }, }, Some(FormatType::GeneralFormatUpper) | Some(FormatType::GeneralFormatLower) | Some(FormatType::FixedPointUpper) | Some(FormatType::FixedPointLower) | Some(FormatType::ExponentUpper) | Some(FormatType::ExponentLower) | Some(FormatType::Percentage) => match num.to_f64() { Some(float) => return self.format_float(float), _ => Err(FormatSpecError::UnableToConvert), }, None => self.format_int_radix(magnitude, 10), }; let format_sign = self.sign.unwrap_or(FormatSign::Minus); let sign_str = match num.sign() { Sign::Minus => "-", _ => match format_sign { FormatSign::Plus => "+", FormatSign::Minus => "", FormatSign::MinusOrSpace => " ", }, }; let sign_prefix = format!("{sign_str}{prefix}"); let magnitude_str = self.add_magnitude_separators(raw_magnitude_str?, &sign_prefix); self.format_sign_and_align( &BorrowedStr::from_bytes(magnitude_str.as_bytes()), &sign_prefix, FormatAlign::Right, ) } pub fn format_string(&self, s: &BorrowedStr) -> Result { self.validate_format(FormatType::String)?; match self.format_type { Some(FormatType::String) | None => self .format_sign_and_align(s, "", FormatAlign::Left) .map(|mut value| { if let Some(precision) = self.precision { value.truncate(precision); } value }), _ => { let ch = char::from(self.format_type.as_ref().unwrap()); Err(FormatSpecError::UnknownFormatCode(ch, "str")) } } } fn format_sign_and_align( &self, magnitude_str: &BorrowedStr, sign_str: &str, default_align: FormatAlign, ) -> Result { let align = self.align.unwrap_or(default_align); let num_chars = magnitude_str.char_len(); let fill_char = self.fill.unwrap_or(' '); let fill_chars_needed: i32 = self.width.map_or(0, |w| { cmp::max(0, (w as i32) - (num_chars as i32) - (sign_str.len() as i32)) }); Ok(match align { FormatAlign::Left => format!( "{}{}{}", sign_str, magnitude_str, FormatSpec::compute_fill_string(fill_char, fill_chars_needed) ), FormatAlign::Right => format!( "{}{}{}", FormatSpec::compute_fill_string(fill_char, fill_chars_needed), sign_str, magnitude_str ), FormatAlign::AfterSign => format!( "{}{}{}", sign_str, FormatSpec::compute_fill_string(fill_char, fill_chars_needed), magnitude_str ), FormatAlign::Center => { let left_fill_chars_needed = fill_chars_needed / 2; let right_fill_chars_needed = fill_chars_needed - left_fill_chars_needed; let left_fill_string = FormatSpec::compute_fill_string(fill_char, left_fill_chars_needed); let right_fill_string = FormatSpec::compute_fill_string(fill_char, right_fill_chars_needed); format!("{left_fill_string}{sign_str}{magnitude_str}{right_fill_string}") } }) } } #[derive(Debug, PartialEq)] pub enum FormatSpecError { DecimalDigitsTooMany, PrecisionTooBig, InvalidFormatSpecifier, UnspecifiedFormat(char, char), UnknownFormatCode(char, &'static str), PrecisionNotAllowed, NotAllowed(&'static str), UnableToConvert, CodeNotInRange, NotImplemented(char, &'static str), } #[derive(Debug, PartialEq)] pub enum FormatParseError { UnmatchedBracket, MissingStartBracket, UnescapedStartBracketInLiteral, InvalidFormatSpecifier, UnknownConversion, EmptyAttribute, MissingRightBracket, InvalidCharacterAfterRightBracket, } impl FromStr for FormatSpec { type Err = FormatSpecError; fn from_str(s: &str) -> Result { FormatSpec::parse(s) } } #[derive(Debug, PartialEq)] pub enum FieldNamePart { Attribute(String), Index(usize), StringIndex(String), } impl FieldNamePart { fn parse_part( chars: &mut impl PeekingNext, ) -> Result, FormatParseError> { chars .next() .map(|ch| match ch { '.' => { let mut attribute = String::new(); for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') { attribute.push(ch); } if attribute.is_empty() { Err(FormatParseError::EmptyAttribute) } else { Ok(FieldNamePart::Attribute(attribute)) } } '[' => { let mut index = String::new(); for ch in chars { if ch == ']' { return if index.is_empty() { Err(FormatParseError::EmptyAttribute) } else if let Ok(index) = index.parse::() { Ok(FieldNamePart::Index(index)) } else { Ok(FieldNamePart::StringIndex(index)) }; } index.push(ch); } Err(FormatParseError::MissingRightBracket) } _ => Err(FormatParseError::InvalidCharacterAfterRightBracket), }) .transpose() } } #[derive(Debug, PartialEq)] pub enum FieldType { Auto, Index(usize), Keyword(String), } #[derive(Debug, PartialEq)] pub struct FieldName { pub field_type: FieldType, pub parts: Vec, } impl FieldName { pub fn parse(text: &str) -> Result { let mut chars = text.chars().peekable(); let mut first = String::new(); for ch in chars.peeking_take_while(|ch| *ch != '.' && *ch != '[') { first.push(ch); } let field_type = if first.is_empty() { FieldType::Auto } else if let Ok(index) = first.parse::() { FieldType::Index(index) } else { FieldType::Keyword(first) }; let mut parts = Vec::new(); while let Some(part) = FieldNamePart::parse_part(&mut chars)? { parts.push(part) } Ok(FieldName { field_type, parts }) } } #[derive(Debug, PartialEq)] pub enum FormatPart { Field { field_name: String, preconversion_spec: Option, format_spec: String, }, Literal(String), } #[derive(Debug, PartialEq)] pub struct FormatString { pub format_parts: Vec, } impl FormatString { fn parse_literal_single(text: &str) -> Result<(char, &str), FormatParseError> { let mut chars = text.chars(); // This should never be called with an empty str let first_char = chars.next().unwrap(); // isn't this detectable only with bytes operation? if first_char == '{' || first_char == '}' { let maybe_next_char = chars.next(); // if we see a bracket, it has to be escaped by doubling up to be in a literal return if maybe_next_char.is_none() || maybe_next_char.unwrap() != first_char { Err(FormatParseError::UnescapedStartBracketInLiteral) } else { Ok((first_char, chars.as_str())) }; } Ok((first_char, chars.as_str())) } fn parse_literal(text: &str) -> Result<(FormatPart, &str), FormatParseError> { let mut cur_text = text; let mut result_string = String::new(); while !cur_text.is_empty() { match FormatString::parse_literal_single(cur_text) { Ok((next_char, remaining)) => { result_string.push(next_char); cur_text = remaining; } Err(err) => { return if !result_string.is_empty() { Ok((FormatPart::Literal(result_string), cur_text)) } else { Err(err) }; } } } Ok((FormatPart::Literal(result_string), "")) } fn parse_part_in_brackets(text: &str) -> Result { let parts: Vec<&str> = text.splitn(2, ':').collect(); // before the comma is a keyword or arg index, after the comma is maybe a spec. let arg_part = parts[0]; let format_spec = if parts.len() > 1 { parts[1].to_owned() } else { String::new() }; // On parts[0] can still be the preconversor (!r, !s, !a) let parts: Vec<&str> = arg_part.splitn(2, '!').collect(); // before the bang is a keyword or arg index, after the comma is maybe a conversor spec. let arg_part = parts[0]; let preconversion_spec = parts .get(1) .map(|conversion| { // conversions are only every one character conversion .chars() .exactly_one() .map_err(|_| FormatParseError::UnknownConversion) }) .transpose()?; Ok(FormatPart::Field { field_name: arg_part.to_owned(), preconversion_spec, format_spec, }) } fn parse_spec(text: &str) -> Result<(FormatPart, &str), FormatParseError> { let mut nested = false; let mut end_bracket_pos = None; let mut left = String::new(); // There may be one layer nesting brackets in spec for (idx, c) in text.chars().enumerate() { if idx == 0 { if c != '{' { return Err(FormatParseError::MissingStartBracket); } } else if c == '{' { if nested { return Err(FormatParseError::InvalidFormatSpecifier); } else { nested = true; left.push(c); continue; } } else if c == '}' { if nested { nested = false; left.push(c); continue; } else { end_bracket_pos = Some(idx); break; } } else { left.push(c); } } if let Some(pos) = end_bracket_pos { let (_, right) = text.split_at(pos); let format_part = FormatString::parse_part_in_brackets(&left)?; Ok((format_part, &right[1..])) } else { Err(FormatParseError::UnmatchedBracket) } } } pub trait FromTemplate<'a>: Sized { type Err; fn from_str(s: &'a str) -> Result; } impl<'a> FromTemplate<'a> for FormatString { type Err = FormatParseError; fn from_str(text: &'a str) -> Result { let mut cur_text: &str = text; let mut parts: Vec = Vec::new(); while !cur_text.is_empty() { // Try to parse both literals and bracketed format parts until we // run out of text cur_text = FormatString::parse_literal(cur_text) .or_else(|_| FormatString::parse_spec(cur_text)) .map(|(part, new_text)| { parts.push(part); new_text })?; } Ok(FormatString { format_parts: parts, }) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_fill_and_align() { assert_eq!( parse_fill_and_align(" <"), (Some(' '), Some(FormatAlign::Left), "") ); assert_eq!( parse_fill_and_align(" <22"), (Some(' '), Some(FormatAlign::Left), "22") ); assert_eq!( parse_fill_and_align("<22"), (None, Some(FormatAlign::Left), "22") ); assert_eq!( parse_fill_and_align(" ^^"), (Some(' '), Some(FormatAlign::Center), "^") ); assert_eq!( parse_fill_and_align("==="), (Some('='), Some(FormatAlign::AfterSign), "=") ); } #[test] fn test_width_only() { let expected = Ok(FormatSpec { preconversor: None, fill: None, align: None, sign: None, alternate_form: false, width: Some(33), grouping_option: None, precision: None, format_type: None, }); assert_eq!(FormatSpec::parse("33"), expected); } #[test] fn test_fill_and_width() { let expected = Ok(FormatSpec { preconversor: None, fill: Some('<'), align: Some(FormatAlign::Right), sign: None, alternate_form: false, width: Some(33), grouping_option: None, precision: None, format_type: None, }); assert_eq!(FormatSpec::parse("<>33"), expected); } #[test] fn test_all() { let expected = Ok(FormatSpec { preconversor: None, fill: Some('<'), align: Some(FormatAlign::Right), sign: Some(FormatSign::Minus), alternate_form: true, width: Some(23), grouping_option: Some(FormatGrouping::Comma), precision: Some(11), format_type: Some(FormatType::Binary), }); assert_eq!(FormatSpec::parse("<>-#23,.11b"), expected); } #[test] fn test_format_int() { assert_eq!( FormatSpec::parse("d") .unwrap() .format_int(&BigInt::from_bytes_be(Sign::Plus, b"\x10")), Ok("16".to_owned()) ); assert_eq!( FormatSpec::parse("x") .unwrap() .format_int(&BigInt::from_bytes_be(Sign::Plus, b"\x10")), Ok("10".to_owned()) ); assert_eq!( FormatSpec::parse("b") .unwrap() .format_int(&BigInt::from_bytes_be(Sign::Plus, b"\x10")), Ok("10000".to_owned()) ); assert_eq!( FormatSpec::parse("o") .unwrap() .format_int(&BigInt::from_bytes_be(Sign::Plus, b"\x10")), Ok("20".to_owned()) ); assert_eq!( FormatSpec::parse("+d") .unwrap() .format_int(&BigInt::from_bytes_be(Sign::Plus, b"\x10")), Ok("+16".to_owned()) ); assert_eq!( FormatSpec::parse("^ 5d") .unwrap() .format_int(&BigInt::from_bytes_be(Sign::Minus, b"\x10")), Ok(" -16 ".to_owned()) ); assert_eq!( FormatSpec::parse("0>+#10x") .unwrap() .format_int(&BigInt::from_bytes_be(Sign::Plus, b"\x10")), Ok("00000+0x10".to_owned()) ); } #[test] fn test_format_parse() { let expected = Ok(FormatString { format_parts: vec![ FormatPart::Literal("abcd".to_owned()), FormatPart::Field { field_name: "1".to_owned(), preconversion_spec: None, format_spec: String::new(), }, FormatPart::Literal(":".to_owned()), FormatPart::Field { field_name: "key".to_owned(), preconversion_spec: None, format_spec: String::new(), }, ], }); assert_eq!(FormatString::from_str("abcd{1}:{key}"), expected); } #[test] fn test_format_parse_fail() { assert_eq!( FormatString::from_str("{s"), Err(FormatParseError::UnmatchedBracket) ); } #[test] fn test_format_parse_escape() { let expected = Ok(FormatString { format_parts: vec![ FormatPart::Literal("{".to_owned()), FormatPart::Field { field_name: "key".to_owned(), preconversion_spec: None, format_spec: String::new(), }, FormatPart::Literal("}ddfe".to_owned()), ], }); assert_eq!(FormatString::from_str("{{{key}}}ddfe"), expected); } #[test] fn test_format_invalid_specification() { assert_eq!( FormatSpec::parse("%3"), Err(FormatSpecError::InvalidFormatSpecifier) ); assert_eq!( FormatSpec::parse(".2fa"), Err(FormatSpecError::InvalidFormatSpecifier) ); assert_eq!( FormatSpec::parse("ds"), Err(FormatSpecError::InvalidFormatSpecifier) ); assert_eq!( FormatSpec::parse("x+"), Err(FormatSpecError::InvalidFormatSpecifier) ); assert_eq!( FormatSpec::parse("b4"), Err(FormatSpecError::InvalidFormatSpecifier) ); assert_eq!( FormatSpec::parse("o!"), Err(FormatSpecError::InvalidFormatSpecifier) ); assert_eq!( FormatSpec::parse("d "), Err(FormatSpecError::InvalidFormatSpecifier) ); } #[test] fn test_parse_field_name() { assert_eq!( FieldName::parse(""), Ok(FieldName { field_type: FieldType::Auto, parts: Vec::new(), }) ); assert_eq!( FieldName::parse("0"), Ok(FieldName { field_type: FieldType::Index(0), parts: Vec::new(), }) ); assert_eq!( FieldName::parse("key"), Ok(FieldName { field_type: FieldType::Keyword("key".to_owned()), parts: Vec::new(), }) ); assert_eq!( FieldName::parse("key.attr[0][string]"), Ok(FieldName { field_type: FieldType::Keyword("key".to_owned()), parts: vec![ FieldNamePart::Attribute("attr".to_owned()), FieldNamePart::Index(0), FieldNamePart::StringIndex("string".to_owned()) ], }) ); assert_eq!( FieldName::parse("key.."), Err(FormatParseError::EmptyAttribute) ); assert_eq!( FieldName::parse("key[]"), Err(FormatParseError::EmptyAttribute) ); assert_eq!( FieldName::parse("key["), Err(FormatParseError::MissingRightBracket) ); assert_eq!( FieldName::parse("key[0]after"), Err(FormatParseError::InvalidCharacterAfterRightBracket) ); } } rustpython-common-0.2.0/src/hash.rs000064400000000000000000000127201046102023000154320ustar 00000000000000use num_bigint::BigInt; use num_traits::ToPrimitive; use siphasher::sip::SipHasher24; use std::hash::{BuildHasher, Hash, Hasher}; pub type PyHash = i64; pub type PyUHash = u64; /// A PyHash value used to represent a missing hash value, e.g. means "not yet computed" for /// `str`'s hash cache pub const SENTINEL: PyHash = -1; /// Prime multiplier used in string and various other hashes. pub const MULTIPLIER: PyHash = 1_000_003; // 0xf4243 /// Numeric hashes are based on reduction modulo the prime 2**_BITS - 1 pub const BITS: usize = 61; pub const MODULUS: PyUHash = (1 << BITS) - 1; pub const INF: PyHash = 314_159; pub const NAN: PyHash = 0; pub const IMAG: PyHash = MULTIPLIER; pub const ALGO: &str = "siphash24"; pub const HASH_BITS: usize = std::mem::size_of::() * 8; // SipHasher24 takes 2 u64s as a seed pub const SEED_BITS: usize = std::mem::size_of::() * 2 * 8; // pub const CUTOFF: usize = 7; pub struct HashSecret { k0: u64, k1: u64, } impl BuildHasher for HashSecret { type Hasher = SipHasher24; fn build_hasher(&self) -> Self::Hasher { SipHasher24::new_with_keys(self.k0, self.k1) } } impl rand::distributions::Distribution for rand::distributions::Standard { fn sample(&self, rng: &mut R) -> HashSecret { HashSecret { k0: rng.gen(), k1: rng.gen(), } } } impl HashSecret { pub fn new(seed: u32) -> Self { let mut buf = [0u8; 16]; lcg_urandom(seed, &mut buf); let (left, right) = buf.split_at(8); let k0 = u64::from_le_bytes(left.try_into().unwrap()); let k1 = u64::from_le_bytes(right.try_into().unwrap()); Self { k0, k1 } } } impl HashSecret { pub fn hash_value(&self, data: &T) -> PyHash { let mut hasher = self.build_hasher(); data.hash(&mut hasher); fix_sentinel(mod_int(hasher.finish() as PyHash)) } pub fn hash_iter<'a, T: 'a, I, F, E>(&self, iter: I, hashf: F) -> Result where I: IntoIterator, F: Fn(&'a T) -> Result, { let mut hasher = self.build_hasher(); for element in iter { let item_hash = hashf(element)?; item_hash.hash(&mut hasher); } Ok(fix_sentinel(mod_int(hasher.finish() as PyHash))) } pub fn hash_bytes(&self, value: &[u8]) -> PyHash { if value.is_empty() { 0 } else { self.hash_value(value) } } pub fn hash_str(&self, value: &str) -> PyHash { self.hash_bytes(value.as_bytes()) } } #[inline] pub fn hash_float(value: f64) -> Option { // cpython _Py_HashDouble if !value.is_finite() { return if value.is_infinite() { Some(if value > 0.0 { INF } else { -INF }) } else { None }; } let frexp = super::float_ops::ufrexp(value); // process 28 bits at a time; this should work well both for binary // and hexadecimal floating point. let mut m = frexp.0; let mut e = frexp.1; let mut x: PyUHash = 0; while m != 0.0 { x = ((x << 28) & MODULUS) | x >> (BITS - 28); m *= 268_435_456.0; // 2**28 e -= 28; let y = m as PyUHash; // pull out integer part m -= y as f64; x += y; if x >= MODULUS { x -= MODULUS; } } // adjust for the exponent; first reduce it modulo BITS const BITS32: i32 = BITS as i32; e = if e >= 0 { e % BITS32 } else { BITS32 - 1 - ((-1 - e) % BITS32) }; x = ((x << e) & MODULUS) | x >> (BITS32 - e); Some(fix_sentinel(x as PyHash * value.signum() as PyHash)) } pub fn hash_iter_unordered<'a, T: 'a, I, F, E>(iter: I, hashf: F) -> Result where I: IntoIterator, F: Fn(&'a T) -> Result, { let mut hash: PyHash = 0; for element in iter { let item_hash = hashf(element)?; // xor is commutative and hash should be independent of order hash ^= item_hash; } Ok(fix_sentinel(mod_int(hash))) } pub fn hash_bigint(value: &BigInt) -> PyHash { let ret = match value.to_i64() { Some(i) => mod_int(i), None => (value % MODULUS).to_i64().unwrap_or_else(|| unsafe { // SAFETY: MODULUS < i64::MAX, so value % MODULUS is guaranteed to be in the range of i64 std::hint::unreachable_unchecked() }), }; fix_sentinel(ret) } #[inline(always)] pub fn fix_sentinel(x: PyHash) -> PyHash { if x == SENTINEL { -2 } else { x } } #[inline] pub fn mod_int(value: i64) -> PyHash { value % MODULUS as i64 } pub fn lcg_urandom(mut x: u32, buf: &mut [u8]) { for b in buf { x = x.wrapping_mul(214013); x = x.wrapping_add(2531011); *b = ((x >> 16) & 0xff) as u8; } } #[inline] pub fn hash_object_id_raw(p: usize) -> PyHash { // TODO: Use commented logic when below issue resolved. // Ref: https://github.com/RustPython/RustPython/pull/3951#issuecomment-1193108966 /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid excessive hash collisions for dicts and sets */ // p.rotate_right(4) as PyHash p as PyHash } #[inline] pub fn hash_object_id(p: usize) -> PyHash { fix_sentinel(hash_object_id_raw(p)) } pub fn keyed_hash(key: u64, buf: &[u8]) -> u64 { let mut hasher = SipHasher24::new_with_keys(key, 0); buf.hash(&mut hasher); hasher.finish() } rustpython-common-0.2.0/src/lib.rs000064400000000000000000000010421046102023000152500ustar 00000000000000//! A crate to hold types and functions common to all rustpython components. #[macro_use] mod macros; pub use macros::*; pub mod atomic; pub mod borrow; pub mod boxvec; pub mod bytes; pub mod cformat; pub mod char; pub mod cmp; #[cfg(any(unix, windows, target_os = "wasi"))] pub mod crt_fd; pub mod encodings; pub mod float_ops; pub mod format; pub mod hash; pub mod linked_list; pub mod lock; pub mod os; pub mod rc; pub mod refcount; pub mod static_cell; pub mod str; #[cfg(windows)] pub mod windows; pub mod vendored { pub use ascii; } rustpython-common-0.2.0/src/linked_list.rs000064400000000000000000000277451046102023000170250ustar 00000000000000//! This module is modified from tokio::util::linked_list: https://github.com/tokio-rs/tokio/blob/master/tokio/src/util/linked_list.rs //! Tokio is licensed under the MIT license: //! //! Copyright (c) 2021 Tokio Contributors //! //! Permission is hereby granted, free of charge, to any //! person obtaining a copy of this software and associated //! documentation files (the "Software"), to deal in the //! Software without restriction, including without //! limitation the rights to use, copy, modify, merge, //! publish, distribute, sublicense, and/or sell copies of //! the Software, and to permit persons to whom the Software //! is furnished to do so, subject to the following //! conditions: //! //! The above copyright notice and this permission notice //! shall be included in all copies or substantial portions //! of the Software. //! //! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF //! ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED //! TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A //! PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT //! SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY //! CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION //! OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR //! IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER //! DEALINGS IN THE SOFTWARE. //! //! Original header: //! //! An intrusive double linked list of data. //! //! The data structure supports tracking pinned nodes. Most of the data //! structure's APIs are `unsafe` as they require the caller to ensure the //! specified node is actually contained by the list. #![allow(clippy::new_without_default, clippy::missing_safety_doc)] use core::cell::UnsafeCell; use core::fmt; use core::marker::{PhantomData, PhantomPinned}; use core::mem::ManuallyDrop; use core::ptr::{self, NonNull}; /// An intrusive linked list. /// /// Currently, the list is not emptied on drop. It is the caller's /// responsibility to ensure the list is empty before dropping it. pub struct LinkedList { /// Linked list head head: Option>, // /// Linked list tail // tail: Option>, /// Node type marker. _marker: PhantomData<*const L>, } unsafe impl Send for LinkedList where L::Target: Send {} unsafe impl Sync for LinkedList where L::Target: Sync {} /// Defines how a type is tracked within a linked list. /// /// In order to support storing a single type within multiple lists, accessing /// the list pointers is decoupled from the entry type. /// /// # Safety /// /// Implementations must guarantee that `Target` types are pinned in memory. In /// other words, when a node is inserted, the value will not be moved as long as /// it is stored in the list. pub unsafe trait Link { /// Handle to the list entry. /// /// This is usually a pointer-ish type. type Handle; /// Node type. type Target; /// Convert the handle to a raw pointer without consuming the handle. #[allow(clippy::wrong_self_convention)] fn as_raw(handle: &Self::Handle) -> NonNull; /// Convert the raw pointer to a handle unsafe fn from_raw(ptr: NonNull) -> Self::Handle; /// Return the pointers for a node unsafe fn pointers(target: NonNull) -> NonNull>; } /// Previous / next pointers. pub struct Pointers { inner: UnsafeCell>, } /// We do not want the compiler to put the `noalias` attribute on mutable /// references to this type, so the type has been made `!Unpin` with a /// `PhantomPinned` field. /// /// Additionally, we never access the `prev` or `next` fields directly, as any /// such access would implicitly involve the creation of a reference to the /// field, which we want to avoid since the fields are not `!Unpin`, and would /// hence be given the `noalias` attribute if we were to do such an access. /// As an alternative to accessing the fields directly, the `Pointers` type /// provides getters and setters for the two fields, and those are implemented /// using raw pointer casts and offsets, which is valid since the struct is /// #[repr(C)]. /// /// See this link for more information: /// #[repr(C)] struct PointersInner { /// The previous node in the list. null if there is no previous node. /// /// This field is accessed through pointer manipulation, so it is not dead code. #[allow(dead_code)] prev: Option>, /// The next node in the list. null if there is no previous node. /// /// This field is accessed through pointer manipulation, so it is not dead code. #[allow(dead_code)] next: Option>, /// This type is !Unpin due to the heuristic from: /// _pin: PhantomPinned, } unsafe impl Send for PointersInner {} unsafe impl Sync for PointersInner {} unsafe impl Send for Pointers {} unsafe impl Sync for Pointers {} // ===== impl LinkedList ===== impl LinkedList { /// Creates an empty linked list. pub const fn new() -> LinkedList { LinkedList { head: None, // tail: None, _marker: PhantomData, } } } impl LinkedList { /// Adds an element first in the list. pub fn push_front(&mut self, val: L::Handle) { // The value should not be dropped, it is being inserted into the list let val = ManuallyDrop::new(val); let ptr = L::as_raw(&val); assert_ne!(self.head, Some(ptr)); unsafe { L::pointers(ptr).as_mut().set_next(self.head); L::pointers(ptr).as_mut().set_prev(None); if let Some(head) = self.head { L::pointers(head).as_mut().set_prev(Some(ptr)); } self.head = Some(ptr); // if self.tail.is_none() { // self.tail = Some(ptr); // } } } // /// Removes the last element from a list and returns it, or None if it is // /// empty. // pub fn pop_back(&mut self) -> Option { // unsafe { // let last = self.tail?; // self.tail = L::pointers(last).as_ref().get_prev(); // if let Some(prev) = L::pointers(last).as_ref().get_prev() { // L::pointers(prev).as_mut().set_next(None); // } else { // self.head = None // } // L::pointers(last).as_mut().set_prev(None); // L::pointers(last).as_mut().set_next(None); // Some(L::from_raw(last)) // } // } /// Returns whether the linked list does not contain any node pub fn is_empty(&self) -> bool { self.head.is_none() // if self.head.is_some() { // return false; // } // assert!(self.tail.is_none()); // true } /// Removes the specified node from the list /// /// # Safety /// /// The caller **must** ensure that `node` is currently contained by /// `self` or not contained by any other list. pub unsafe fn remove(&mut self, node: NonNull) -> Option { if let Some(prev) = L::pointers(node).as_ref().get_prev() { debug_assert_eq!(L::pointers(prev).as_ref().get_next(), Some(node)); L::pointers(prev) .as_mut() .set_next(L::pointers(node).as_ref().get_next()); } else { if self.head != Some(node) { return None; } self.head = L::pointers(node).as_ref().get_next(); } if let Some(next) = L::pointers(node).as_ref().get_next() { debug_assert_eq!(L::pointers(next).as_ref().get_prev(), Some(node)); L::pointers(next) .as_mut() .set_prev(L::pointers(node).as_ref().get_prev()); } else { // // This might be the last item in the list // if self.tail != Some(node) { // return None; // } // self.tail = L::pointers(node).as_ref().get_prev(); } L::pointers(node).as_mut().set_next(None); L::pointers(node).as_mut().set_prev(None); Some(L::from_raw(node)) } // pub fn last(&self) -> Option<&L::Target> { // let tail = self.tail.as_ref()?; // unsafe { Some(&*tail.as_ptr()) } // } // === rustpython additions === pub fn iter(&self) -> impl Iterator { std::iter::successors(self.head, |node| unsafe { L::pointers(*node).as_ref().get_next() }) .map(|ptr| unsafe { ptr.as_ref() }) } } impl fmt::Debug for LinkedList { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_struct("LinkedList") .field("head", &self.head) // .field("tail", &self.tail) .finish() } } impl Default for LinkedList { fn default() -> Self { Self::new() } } // ===== impl DrainFilter ===== pub struct DrainFilter<'a, T: Link, F> { list: &'a mut LinkedList, filter: F, curr: Option>, } impl LinkedList { pub fn drain_filter(&mut self, filter: F) -> DrainFilter<'_, T, F> where F: FnMut(&mut T::Target) -> bool, { let curr = self.head; DrainFilter { curr, filter, list: self, } } } impl<'a, T, F> Iterator for DrainFilter<'a, T, F> where T: Link, F: FnMut(&mut T::Target) -> bool, { type Item = T::Handle; fn next(&mut self) -> Option { while let Some(curr) = self.curr { // safety: the pointer references data contained by the list self.curr = unsafe { T::pointers(curr).as_ref() }.get_next(); // safety: the value is still owned by the linked list. if (self.filter)(unsafe { &mut *curr.as_ptr() }) { return unsafe { self.list.remove(curr) }; } } None } } // ===== impl Pointers ===== impl Pointers { /// Create a new set of empty pointers pub fn new() -> Pointers { Pointers { inner: UnsafeCell::new(PointersInner { prev: None, next: None, _pin: PhantomPinned, }), } } fn get_prev(&self) -> Option> { // SAFETY: prev is the first field in PointersInner, which is #[repr(C)]. unsafe { let inner = self.inner.get(); let prev = inner as *const Option>; ptr::read(prev) } } fn get_next(&self) -> Option> { // SAFETY: next is the second field in PointersInner, which is #[repr(C)]. unsafe { let inner = self.inner.get(); let prev = inner as *const Option>; let next = prev.add(1); ptr::read(next) } } fn set_prev(&mut self, value: Option>) { // SAFETY: prev is the first field in PointersInner, which is #[repr(C)]. unsafe { let inner = self.inner.get(); let prev = inner as *mut Option>; ptr::write(prev, value); } } fn set_next(&mut self, value: Option>) { // SAFETY: next is the second field in PointersInner, which is #[repr(C)]. unsafe { let inner = self.inner.get(); let prev = inner as *mut Option>; let next = prev.add(1); ptr::write(next, value); } } } impl fmt::Debug for Pointers { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let prev = self.get_prev(); let next = self.get_next(); f.debug_struct("Pointers") .field("prev", &prev) .field("next", &next) .finish() } } rustpython-common-0.2.0/src/lock/cell_lock.rs000064400000000000000000000110671046102023000173710ustar 00000000000000use lock_api::{ GetThreadId, RawMutex, RawRwLock, RawRwLockDowngrade, RawRwLockRecursive, RawRwLockUpgrade, RawRwLockUpgradeDowngrade, }; use std::{cell::Cell, num::NonZeroUsize}; pub struct RawCellMutex { locked: Cell, } unsafe impl RawMutex for RawCellMutex { #[allow(clippy::declare_interior_mutable_const)] const INIT: Self = RawCellMutex { locked: Cell::new(false), }; type GuardMarker = lock_api::GuardNoSend; #[inline] fn lock(&self) { if self.is_locked() { deadlock("", "Mutex") } self.locked.set(true) } #[inline] fn try_lock(&self) -> bool { if self.is_locked() { false } else { self.locked.set(true); true } } unsafe fn unlock(&self) { self.locked.set(false) } #[inline] fn is_locked(&self) -> bool { self.locked.get() } } const WRITER_BIT: usize = 0b01; const ONE_READER: usize = 0b10; pub struct RawCellRwLock { state: Cell, } impl RawCellRwLock { #[inline] fn is_exclusive(&self) -> bool { self.state.get() & WRITER_BIT != 0 } } unsafe impl RawRwLock for RawCellRwLock { #[allow(clippy::declare_interior_mutable_const)] const INIT: Self = RawCellRwLock { state: Cell::new(0), }; type GuardMarker = ::GuardMarker; #[inline] fn lock_shared(&self) { if !self.try_lock_shared() { deadlock("sharedly ", "RwLock") } } #[inline] fn try_lock_shared(&self) -> bool { // TODO: figure out whether this is realistic; could maybe help // debug deadlocks from 2+ read() in the same thread? // if self.is_locked() { // false // } else { // self.state.set(ONE_READER); // true // } self.try_lock_shared_recursive() } #[inline] unsafe fn unlock_shared(&self) { self.state.set(self.state.get() - ONE_READER) } #[inline] fn lock_exclusive(&self) { if !self.try_lock_exclusive() { deadlock("exclusively ", "RwLock") } self.state.set(WRITER_BIT) } #[inline] fn try_lock_exclusive(&self) -> bool { if self.is_locked() { false } else { self.state.set(WRITER_BIT); true } } unsafe fn unlock_exclusive(&self) { self.state.set(0) } fn is_locked(&self) -> bool { self.state.get() != 0 } } unsafe impl RawRwLockDowngrade for RawCellRwLock { unsafe fn downgrade(&self) { self.state.set(ONE_READER); } } unsafe impl RawRwLockUpgrade for RawCellRwLock { #[inline] fn lock_upgradable(&self) { if !self.try_lock_upgradable() { deadlock("upgradably+sharedly ", "RwLock") } } #[inline] fn try_lock_upgradable(&self) -> bool { // defer to normal -- we can always try to upgrade self.try_lock_shared() } #[inline] unsafe fn unlock_upgradable(&self) { self.unlock_shared() } #[inline] unsafe fn upgrade(&self) { if !self.try_upgrade() { deadlock("upgrade ", "RwLock") } } #[inline] unsafe fn try_upgrade(&self) -> bool { if self.state.get() == ONE_READER { self.state.set(WRITER_BIT); true } else { false } } } unsafe impl RawRwLockUpgradeDowngrade for RawCellRwLock { #[inline] unsafe fn downgrade_upgradable(&self) { // no-op -- we're always upgradable } #[inline] unsafe fn downgrade_to_upgradable(&self) { self.state.set(ONE_READER); } } unsafe impl RawRwLockRecursive for RawCellRwLock { #[inline] fn lock_shared_recursive(&self) { if !self.try_lock_shared_recursive() { deadlock("recursively+sharedly ", "RwLock") } } #[inline] fn try_lock_shared_recursive(&self) -> bool { if self.is_exclusive() { false } else if let Some(new) = self.state.get().checked_add(ONE_READER) { self.state.set(new); true } else { false } } } #[cold] #[inline(never)] fn deadlock(lock_kind: &str, ty: &str) -> ! { panic!("deadlock: tried to {lock_kind}lock a Cell{ty} twice") } pub struct SingleThreadId(()); unsafe impl GetThreadId for SingleThreadId { const INIT: Self = SingleThreadId(()); fn nonzero_thread_id(&self) -> NonZeroUsize { NonZeroUsize::new(1).unwrap() } } rustpython-common-0.2.0/src/lock/immutable_mutex.rs000064400000000000000000000050261046102023000206410ustar 00000000000000use lock_api::{MutexGuard, RawMutex}; use std::{fmt, marker::PhantomData, ops::Deref}; /// A mutex guard that has an exclusive lock, but only an immutable reference; useful if you /// need to map a mutex guard with a function that returns an `&T`. Construct using the /// [`MapImmutable`] trait. pub struct ImmutableMappedMutexGuard<'a, R: RawMutex, T: ?Sized> { raw: &'a R, data: *const T, _marker: PhantomData<(&'a T, R::GuardMarker)>, } // main constructor for ImmutableMappedMutexGuard // TODO: patch lock_api to have a MappedMutexGuard::raw method, and have this implementation be for // MappedMutexGuard impl<'a, R: RawMutex, T: ?Sized> MapImmutable<'a, R, T> for MutexGuard<'a, R, T> { fn map_immutable(s: Self, f: F) -> ImmutableMappedMutexGuard<'a, R, U> where F: FnOnce(&T) -> &U, { let raw = unsafe { MutexGuard::mutex(&s).raw() }; let data = f(&s) as *const U; std::mem::forget(s); ImmutableMappedMutexGuard { raw, data, _marker: PhantomData, } } } impl<'a, R: RawMutex, T: ?Sized> ImmutableMappedMutexGuard<'a, R, T> { pub fn map(s: Self, f: F) -> ImmutableMappedMutexGuard<'a, R, U> where F: FnOnce(&T) -> &U, { let raw = s.raw; let data = f(&s) as *const U; std::mem::forget(s); ImmutableMappedMutexGuard { raw, data, _marker: PhantomData, } } } impl<'a, R: RawMutex, T: ?Sized> Deref for ImmutableMappedMutexGuard<'a, R, T> { type Target = T; fn deref(&self) -> &Self::Target { // SAFETY: self.data is valid for the lifetime of the guard unsafe { &*self.data } } } impl<'a, R: RawMutex, T: ?Sized> Drop for ImmutableMappedMutexGuard<'a, R, T> { fn drop(&mut self) { // SAFETY: An ImmutableMappedMutexGuard always holds the lock unsafe { self.raw.unlock() } } } impl<'a, R: RawMutex, T: fmt::Debug + ?Sized> fmt::Debug for ImmutableMappedMutexGuard<'a, R, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Debug::fmt(&**self, f) } } impl<'a, R: RawMutex, T: fmt::Display + ?Sized> fmt::Display for ImmutableMappedMutexGuard<'a, R, T> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fmt::Display::fmt(&**self, f) } } pub trait MapImmutable<'a, R: RawMutex, T: ?Sized> { fn map_immutable(s: Self, f: F) -> ImmutableMappedMutexGuard<'a, R, U> where F: FnOnce(&T) -> &U; } rustpython-common-0.2.0/src/lock/thread_mutex.rs000064400000000000000000000213171046102023000201320ustar 00000000000000use lock_api::{GetThreadId, GuardNoSend, RawMutex}; use std::{ cell::UnsafeCell, fmt, marker::PhantomData, ops::{Deref, DerefMut}, ptr::NonNull, sync::atomic::{AtomicUsize, Ordering}, }; // based off ReentrantMutex from lock_api /// A mutex type that knows when it would deadlock pub struct RawThreadMutex { owner: AtomicUsize, mutex: R, get_thread_id: G, } impl RawThreadMutex { #[allow(clippy::declare_interior_mutable_const)] pub const INIT: Self = RawThreadMutex { owner: AtomicUsize::new(0), mutex: R::INIT, get_thread_id: G::INIT, }; #[inline] fn lock_internal bool>(&self, try_lock: F) -> Option { let id = self.get_thread_id.nonzero_thread_id().get(); if self.owner.load(Ordering::Relaxed) == id { return None; } else { if !try_lock() { return Some(false); } self.owner.store(id, Ordering::Relaxed); } Some(true) } /// Blocks for the mutex to be available, and returns true if the mutex isn't already /// locked on the current thread. pub fn lock(&self) -> bool { self.lock_internal(|| { self.mutex.lock(); true }) .is_some() } /// Returns `Some(true)` if able to successfully lock without blocking, `Some(false)` /// otherwise, and `None` when the mutex is already locked on the current thread. pub fn try_lock(&self) -> Option { self.lock_internal(|| self.mutex.try_lock()) } /// Unlocks this mutex. The inner mutex may not be unlocked if /// this mutex was acquired previously in the current thread. /// /// # Safety /// /// This method may only be called if the mutex is held by the current thread. pub unsafe fn unlock(&self) { self.owner.store(0, Ordering::Relaxed); self.mutex.unlock(); } } unsafe impl Send for RawThreadMutex {} unsafe impl Sync for RawThreadMutex {} pub struct ThreadMutex { raw: RawThreadMutex, data: UnsafeCell, } impl ThreadMutex { pub fn new(val: T) -> Self { ThreadMutex { raw: RawThreadMutex::INIT, data: UnsafeCell::new(val), } } pub fn into_inner(self) -> T { self.data.into_inner() } } impl Default for ThreadMutex { fn default() -> Self { Self::new(T::default()) } } impl ThreadMutex { pub fn lock(&self) -> Option> { if self.raw.lock() { Some(ThreadMutexGuard { mu: self, marker: PhantomData, }) } else { None } } pub fn try_lock(&self) -> Result, TryLockThreadError> { match self.raw.try_lock() { Some(true) => Ok(ThreadMutexGuard { mu: self, marker: PhantomData, }), Some(false) => Err(TryLockThreadError::Other), None => Err(TryLockThreadError::Current), } } } // Whether ThreadMutex::try_lock failed because the mutex was already locked on another thread or // on the current thread pub enum TryLockThreadError { Other, Current, } struct LockedPlaceholder(&'static str); impl fmt::Debug for LockedPlaceholder { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str(self.0) } } impl fmt::Debug for ThreadMutex { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self.try_lock() { Ok(guard) => f .debug_struct("ThreadMutex") .field("data", &&*guard) .finish(), Err(e) => { let msg = match e { TryLockThreadError::Other => "", TryLockThreadError::Current => "", }; f.debug_struct("ThreadMutex") .field("data", &LockedPlaceholder(msg)) .finish() } } } } unsafe impl Send for ThreadMutex { } unsafe impl Sync for ThreadMutex { } pub struct ThreadMutexGuard<'a, R: RawMutex, G: GetThreadId, T: ?Sized> { mu: &'a ThreadMutex, marker: PhantomData<(&'a mut T, GuardNoSend)>, } impl<'a, R: RawMutex, G: GetThreadId, T: ?Sized> ThreadMutexGuard<'a, R, G, T> { pub fn map &mut U>( mut s: Self, f: F, ) -> MappedThreadMutexGuard<'a, R, G, U> { let data = f(&mut s).into(); let mu = &s.mu.raw; std::mem::forget(s); MappedThreadMutexGuard { mu, data, marker: PhantomData, } } pub fn try_map Option<&mut U>>( mut s: Self, f: F, ) -> Result, Self> { if let Some(data) = f(&mut s) { let data = data.into(); let mu = &s.mu.raw; std::mem::forget(s); Ok(MappedThreadMutexGuard { mu, data, marker: PhantomData, }) } else { Err(s) } } } impl<'a, R: RawMutex, G: GetThreadId, T: ?Sized> Deref for ThreadMutexGuard<'a, R, G, T> { type Target = T; fn deref(&self) -> &T { unsafe { &*self.mu.data.get() } } } impl<'a, R: RawMutex, G: GetThreadId, T: ?Sized> DerefMut for ThreadMutexGuard<'a, R, G, T> { fn deref_mut(&mut self) -> &mut T { unsafe { &mut *self.mu.data.get() } } } impl<'a, R: RawMutex, G: GetThreadId, T: ?Sized> Drop for ThreadMutexGuard<'a, R, G, T> { fn drop(&mut self) { unsafe { self.mu.raw.unlock() } } } impl<'a, R: RawMutex, G: GetThreadId, T: ?Sized + fmt::Display> fmt::Display for ThreadMutexGuard<'a, R, G, T> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Display::fmt(&**self, f) } } impl<'a, R: RawMutex, G: GetThreadId, T: ?Sized + fmt::Debug> fmt::Debug for ThreadMutexGuard<'a, R, G, T> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Debug::fmt(&**self, f) } } pub struct MappedThreadMutexGuard<'a, R: RawMutex, G: GetThreadId, T: ?Sized> { mu: &'a RawThreadMutex, data: NonNull, marker: PhantomData<(&'a mut T, GuardNoSend)>, } impl<'a, R: RawMutex, G: GetThreadId, T: ?Sized> MappedThreadMutexGuard<'a, R, G, T> { pub fn map &mut U>( mut s: Self, f: F, ) -> MappedThreadMutexGuard<'a, R, G, U> { let data = f(&mut s).into(); let mu = s.mu; std::mem::forget(s); MappedThreadMutexGuard { mu, data, marker: PhantomData, } } pub fn try_map Option<&mut U>>( mut s: Self, f: F, ) -> Result, Self> { if let Some(data) = f(&mut s) { let data = data.into(); let mu = s.mu; std::mem::forget(s); Ok(MappedThreadMutexGuard { mu, data, marker: PhantomData, }) } else { Err(s) } } } impl<'a, R: RawMutex, G: GetThreadId, T: ?Sized> Deref for MappedThreadMutexGuard<'a, R, G, T> { type Target = T; fn deref(&self) -> &T { unsafe { self.data.as_ref() } } } impl<'a, R: RawMutex, G: GetThreadId, T: ?Sized> DerefMut for MappedThreadMutexGuard<'a, R, G, T> { fn deref_mut(&mut self) -> &mut T { unsafe { self.data.as_mut() } } } impl<'a, R: RawMutex, G: GetThreadId, T: ?Sized> Drop for MappedThreadMutexGuard<'a, R, G, T> { fn drop(&mut self) { unsafe { self.mu.unlock() } } } impl<'a, R: RawMutex, G: GetThreadId, T: ?Sized + fmt::Display> fmt::Display for MappedThreadMutexGuard<'a, R, G, T> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Display::fmt(&**self, f) } } impl<'a, R: RawMutex, G: GetThreadId, T: ?Sized + fmt::Debug> fmt::Debug for MappedThreadMutexGuard<'a, R, G, T> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fmt::Debug::fmt(&**self, f) } } rustpython-common-0.2.0/src/lock.rs000064400000000000000000000035171046102023000154430ustar 00000000000000//! A module containing [`lock_api`]-based lock types that are or are not `Send + Sync` //! depending on whether the `threading` feature of this module is enabled. use lock_api::{ MappedMutexGuard, MappedRwLockReadGuard, MappedRwLockWriteGuard, Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockUpgradableReadGuard, RwLockWriteGuard, }; cfg_if::cfg_if! { if #[cfg(feature = "threading")] { pub use parking_lot::{RawMutex, RawRwLock, RawThreadId}; pub use once_cell::sync::{Lazy, OnceCell}; } else { mod cell_lock; pub use cell_lock::{RawCellMutex as RawMutex, RawCellRwLock as RawRwLock, SingleThreadId as RawThreadId}; pub use once_cell::unsync::{Lazy, OnceCell}; } } mod immutable_mutex; pub use immutable_mutex::*; mod thread_mutex; pub use thread_mutex::*; pub type PyMutex = Mutex; pub type PyMutexGuard<'a, T> = MutexGuard<'a, RawMutex, T>; pub type PyMappedMutexGuard<'a, T> = MappedMutexGuard<'a, RawMutex, T>; pub type PyImmutableMappedMutexGuard<'a, T> = ImmutableMappedMutexGuard<'a, RawMutex, T>; pub type PyThreadMutex = ThreadMutex; pub type PyThreadMutexGuard<'a, T> = ThreadMutexGuard<'a, RawMutex, RawThreadId, T>; pub type PyMappedThreadMutexGuard<'a, T> = MappedThreadMutexGuard<'a, RawMutex, RawThreadId, T>; pub type PyRwLock = RwLock; pub type PyRwLockUpgradableReadGuard<'a, T> = RwLockUpgradableReadGuard<'a, RawRwLock, T>; pub type PyRwLockReadGuard<'a, T> = RwLockReadGuard<'a, RawRwLock, T>; pub type PyMappedRwLockReadGuard<'a, T> = MappedRwLockReadGuard<'a, RawRwLock, T>; pub type PyRwLockWriteGuard<'a, T> = RwLockWriteGuard<'a, RawRwLock, T>; pub type PyMappedRwLockWriteGuard<'a, T> = MappedRwLockWriteGuard<'a, RawRwLock, T>; // can add fn const_{mutex,rwlock}() if necessary, but we probably won't need to rustpython-common-0.2.0/src/macros.rs000064400000000000000000000027321046102023000157750ustar 00000000000000/// Suppress the MSVC invalid parameter handler, which by default crashes the process. Does nothing /// on non-MSVC targets. #[macro_export] macro_rules! suppress_iph { ($e:expr) => { $crate::__suppress_iph_impl!($e) }; } #[macro_export] #[doc(hidden)] #[cfg(all(windows, target_env = "msvc"))] macro_rules! __suppress_iph_impl { ($e:expr) => {{ let old = $crate::__macro_private::_set_thread_local_invalid_parameter_handler( $crate::__macro_private::silent_iph_handler, ); let ret = $e; $crate::__macro_private::_set_thread_local_invalid_parameter_handler(old); ret }}; } #[cfg(not(all(windows, target_env = "msvc")))] #[macro_export] #[doc(hidden)] macro_rules! __suppress_iph_impl { ($e:expr) => { $e }; } #[doc(hidden)] pub mod __macro_private { #[cfg(target_env = "msvc")] type InvalidParamHandler = extern "C" fn( *const libc::wchar_t, *const libc::wchar_t, *const libc::wchar_t, libc::c_uint, libc::uintptr_t, ); #[cfg(target_env = "msvc")] extern "C" { pub fn _set_thread_local_invalid_parameter_handler( pNew: InvalidParamHandler, ) -> InvalidParamHandler; } #[cfg(target_env = "msvc")] pub extern "C" fn silent_iph_handler( _: *const libc::wchar_t, _: *const libc::wchar_t, _: *const libc::wchar_t, _: libc::c_uint, _: libc::uintptr_t, ) { } } rustpython-common-0.2.0/src/os.rs000064400000000000000000000012061046102023000151250ustar 00000000000000// TODO: we can move more os-specific bindings/interfaces from stdlib::{os, posix, nt} to here use std::io; #[cfg(windows)] pub fn errno() -> io::Error { let err = io::Error::last_os_error(); // FIXME: probably not ideal, we need a bigger dichotomy between GetLastError and errno if err.raw_os_error() == Some(0) { extern "C" { fn _get_errno(pValue: *mut i32) -> i32; } let mut e = 0; unsafe { suppress_iph!(_get_errno(&mut e)) }; io::Error::from_raw_os_error(e) } else { err } } #[cfg(not(windows))] pub fn errno() -> io::Error { io::Error::last_os_error() } rustpython-common-0.2.0/src/rc.rs000064400000000000000000000005731046102023000151160ustar 00000000000000#[cfg(not(feature = "threading"))] use std::rc::Rc; #[cfg(feature = "threading")] use std::sync::Arc; // type aliases instead of newtypes because you can't do `fn method(self: PyRc)` with a // newtype; requires the arbitrary_self_types unstable feature #[cfg(feature = "threading")] pub type PyRc = Arc; #[cfg(not(feature = "threading"))] pub type PyRc = Rc; rustpython-common-0.2.0/src/refcount.rs000064400000000000000000000035631046102023000163410ustar 00000000000000use crate::atomic::{Ordering::*, PyAtomic, Radium}; /// from alloc::sync /// A soft limit on the amount of references that may be made to an `Arc`. /// /// Going above this limit will abort your program (although not /// necessarily) at _exactly_ `MAX_REFCOUNT + 1` references. const MAX_REFCOUNT: usize = isize::MAX as usize; pub struct RefCount { strong: PyAtomic, } impl Default for RefCount { fn default() -> Self { Self::new() } } impl RefCount { const MASK: usize = MAX_REFCOUNT; pub fn new() -> Self { RefCount { strong: Radium::new(1), } } #[inline] pub fn get(&self) -> usize { self.strong.load(SeqCst) } #[inline] pub fn inc(&self) { let old_size = self.strong.fetch_add(1, Relaxed); if old_size & Self::MASK == Self::MASK { std::process::abort(); } } /// Returns true if successful #[inline] pub fn safe_inc(&self) -> bool { self.strong .fetch_update(AcqRel, Acquire, |prev| (prev != 0).then_some(prev + 1)) .is_ok() } /// Decrement the reference count. Returns true when the refcount drops to 0. #[inline] pub fn dec(&self) -> bool { if self.strong.fetch_sub(1, Release) != 1 { return false; } PyAtomic::::fence(Acquire); true } } impl RefCount { // move these functions out and give separated type once type range is stabilized pub fn leak(&self) { debug_assert!(!self.is_leaked()); const BIT_MARKER: usize = (std::isize::MAX as usize) + 1; debug_assert_eq!(BIT_MARKER.count_ones(), 1); debug_assert_eq!(BIT_MARKER.leading_zeros(), 0); self.strong.fetch_add(BIT_MARKER, Relaxed); } pub fn is_leaked(&self) -> bool { (self.strong.load(Acquire) as isize) < 0 } } rustpython-common-0.2.0/src/static_cell.rs000064400000000000000000000063421046102023000170000ustar 00000000000000#[cfg(not(feature = "threading"))] mod non_threading { use crate::lock::OnceCell; use std::thread::LocalKey; pub struct StaticCell { inner: &'static LocalKey>, } fn leak(x: T) -> &'static T { Box::leak(Box::new(x)) } impl StaticCell { #[doc(hidden)] pub const fn _from_localkey(inner: &'static LocalKey>) -> Self { Self { inner } } pub fn get(&'static self) -> Option<&'static T> { self.inner.with(|x| x.get().copied()) } pub fn set(&'static self, value: T) -> Result<(), T> { // thread-safe because it's a unsync::OnceCell self.inner.with(|x| { if x.get().is_some() { Err(value) } else { // will never fail let _ = x.set(leak(value)); Ok(()) } }) } pub fn get_or_init(&'static self, f: F) -> &'static T where F: FnOnce() -> T, { self.inner.with(|x| *x.get_or_init(|| leak(f()))) } pub fn get_or_try_init(&'static self, f: F) -> Result<&'static T, E> where F: FnOnce() -> Result, { self.inner .with(|x| x.get_or_try_init(|| f().map(leak)).map(|&x| x)) } } #[macro_export] macro_rules! static_cell { ($($(#[$attr:meta])* $vis:vis static $name:ident: $t:ty;)+) => { $($(#[$attr])* $vis static $name: $crate::static_cell::StaticCell<$t> = { ::std::thread_local! { $vis static $name: $crate::lock::OnceCell<&'static $t> = $crate::lock::OnceCell::new(); } $crate::static_cell::StaticCell::_from_localkey(&$name) };)+ }; } } #[cfg(not(feature = "threading"))] pub use non_threading::*; #[cfg(feature = "threading")] mod threading { use crate::lock::OnceCell; pub struct StaticCell { inner: OnceCell, } impl StaticCell { #[doc(hidden)] pub const fn _from_oncecell(inner: OnceCell) -> Self { Self { inner } } pub fn get(&'static self) -> Option<&'static T> { self.inner.get() } pub fn set(&'static self, value: T) -> Result<(), T> { self.inner.set(value) } pub fn get_or_init(&'static self, f: F) -> &'static T where F: FnOnce() -> T, { self.inner.get_or_init(f) } pub fn get_or_try_init(&'static self, f: F) -> Result<&'static T, E> where F: FnOnce() -> Result, { self.inner.get_or_try_init(f) } } #[macro_export] macro_rules! static_cell { ($($(#[$attr:meta])* $vis:vis static $name:ident: $t:ty;)+) => { $($(#[$attr])* $vis static $name: $crate::static_cell::StaticCell<$t> = $crate::static_cell::StaticCell::_from_oncecell($crate::lock::OnceCell::new());)+ }; } } #[cfg(feature = "threading")] pub use threading::*; rustpython-common-0.2.0/src/str.rs000064400000000000000000000345621046102023000153270ustar 00000000000000use crate::{ atomic::{PyAtomic, Radium}, hash::PyHash, }; use ascii::AsciiString; use once_cell::unsync::OnceCell; use std::{ fmt, ops::{Bound, RangeBounds}, }; #[cfg(not(target_arch = "wasm32"))] #[allow(non_camel_case_types)] pub type wchar_t = libc::wchar_t; #[cfg(target_arch = "wasm32")] #[allow(non_camel_case_types)] pub type wchar_t = u32; /// Utf8 + state.ascii (+ PyUnicode_Kind in future) #[derive(Debug, Copy, Clone, PartialEq)] pub enum PyStrKind { Ascii, Utf8, } impl std::ops::BitOr for PyStrKind { type Output = Self; fn bitor(self, other: Self) -> Self { match (self, other) { (Self::Ascii, Self::Ascii) => Self::Ascii, _ => Self::Utf8, } } } impl PyStrKind { #[inline] pub fn new_data(self) -> PyStrKindData { match self { PyStrKind::Ascii => PyStrKindData::Ascii, PyStrKind::Utf8 => PyStrKindData::Utf8(Radium::new(usize::MAX)), } } } #[derive(Debug)] pub enum PyStrKindData { Ascii, // uses usize::MAX as a sentinel for "uncomputed" Utf8(PyAtomic), } impl PyStrKindData { #[inline] pub fn kind(&self) -> PyStrKind { match self { PyStrKindData::Ascii => PyStrKind::Ascii, PyStrKindData::Utf8(_) => PyStrKind::Utf8, } } } pub struct BorrowedStr<'a> { bytes: &'a [u8], kind: PyStrKindData, #[allow(dead_code)] hash: PyAtomic, } impl<'a> BorrowedStr<'a> { /// # Safety /// `s` have to be an ascii string #[inline] pub unsafe fn from_ascii_unchecked(s: &'a [u8]) -> Self { debug_assert!(s.is_ascii()); Self { bytes: s, kind: PyStrKind::Ascii.new_data(), hash: PyAtomic::::new(0), } } #[inline] pub fn from_bytes(s: &'a [u8]) -> Self { let k = if s.is_ascii() { PyStrKind::Ascii.new_data() } else { PyStrKind::Utf8.new_data() }; Self { bytes: s, kind: k, hash: PyAtomic::::new(0), } } #[inline] pub fn as_str(&self) -> &str { unsafe { // SAFETY: Both PyStrKind::{Ascii, Utf8} are valid utf8 string std::str::from_utf8_unchecked(self.bytes) } } #[inline] pub fn char_len(&self) -> usize { match self.kind { PyStrKindData::Ascii => self.bytes.len(), PyStrKindData::Utf8(ref len) => match len.load(core::sync::atomic::Ordering::Relaxed) { usize::MAX => self._compute_char_len(), len => len, }, } } #[cold] fn _compute_char_len(&self) -> usize { match self.kind { PyStrKindData::Utf8(ref char_len) => { let len = self.as_str().chars().count(); // len cannot be usize::MAX, since vec.capacity() < sys.maxsize char_len.store(len, core::sync::atomic::Ordering::Relaxed); len } _ => unsafe { debug_assert!(false); // invalid for non-utf8 strings std::hint::unreachable_unchecked() }, } } } impl std::ops::Deref for BorrowedStr<'_> { type Target = str; fn deref(&self) -> &str { self.as_str() } } impl std::fmt::Display for BorrowedStr<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.as_str().fmt(f) } } pub fn try_get_chars(s: &str, range: impl RangeBounds) -> Option<&str> { let mut chars = s.chars(); let start = match range.start_bound() { Bound::Included(&i) => i, Bound::Excluded(&i) => i + 1, Bound::Unbounded => 0, }; for _ in 0..start { chars.next()?; } let s = chars.as_str(); let range_len = match range.end_bound() { Bound::Included(&i) => i + 1 - start, Bound::Excluded(&i) => i - start, Bound::Unbounded => return Some(s), }; char_range_end(s, range_len).map(|end| &s[..end]) } pub fn get_chars(s: &str, range: impl RangeBounds) -> &str { try_get_chars(s, range).unwrap() } #[inline] pub fn char_range_end(s: &str, nchars: usize) -> Option { let i = match nchars.checked_sub(1) { Some(last_char_index) => { let (index, c) = s.char_indices().nth(last_char_index)?; index + c.len_utf8() } None => 0, }; Some(i) } pub fn zfill(bytes: &[u8], width: usize) -> Vec { if width <= bytes.len() { bytes.to_vec() } else { let (sign, s) = match bytes.first() { Some(_sign @ b'+') | Some(_sign @ b'-') => { (unsafe { bytes.get_unchecked(..1) }, &bytes[1..]) } _ => (&b""[..], bytes), }; let mut filled = Vec::new(); filled.extend_from_slice(sign); filled.extend(std::iter::repeat(b'0').take(width - bytes.len())); filled.extend_from_slice(s); filled } } /// Convert a string to ascii compatible, escaping unicodes into escape /// sequences. pub fn to_ascii(value: &str) -> AsciiString { let mut ascii = Vec::new(); for c in value.chars() { if c.is_ascii() { ascii.push(c as u8); } else { let c = c as i64; let hex = if c < 0x100 { format!("\\x{c:02x}") } else if c < 0x10000 { format!("\\u{c:04x}") } else { format!("\\U{c:08x}") }; ascii.append(&mut hex.into_bytes()); } } unsafe { AsciiString::from_ascii_unchecked(ascii) } } #[doc(hidden)] pub const fn bytes_is_ascii(x: &str) -> bool { let x = x.as_bytes(); let mut i = 0; while i < x.len() { if !x[i].is_ascii() { return false; } i += 1; } true } pub mod levenshtein { use std::{cell::RefCell, thread_local}; pub const MOVE_COST: usize = 2; const CASE_COST: usize = 1; const MAX_STRING_SIZE: usize = 40; fn substitution_cost(mut a: u8, mut b: u8) -> usize { if (a & 31) != (b & 31) { return MOVE_COST; } if a == b { return 0; } if (b'A'..=b'Z').contains(&a) { a += b'a' - b'A'; } if (b'A'..=b'Z').contains(&b) { b += b'a' - b'A'; } if a == b { CASE_COST } else { MOVE_COST } } pub fn levenshtein_distance(a: &str, b: &str, max_cost: usize) -> usize { thread_local! { static BUFFER: RefCell<[usize; MAX_STRING_SIZE]> = RefCell::new([0usize; MAX_STRING_SIZE]); } if a == b { return 0; } let (mut a_bytes, mut b_bytes) = (a.as_bytes(), b.as_bytes()); let (mut a_begin, mut a_end) = (0usize, a.len()); let (mut b_begin, mut b_end) = (0usize, b.len()); while a_end > 0 && b_end > 0 && (a_bytes[a_begin] == b_bytes[b_begin]) { a_begin += 1; b_begin += 1; a_end -= 1; b_end -= 1; } while a_end > 0 && b_end > 0 && (a_bytes[a_begin + a_end - 1] == b_bytes[b_begin + b_end - 1]) { a_end -= 1; b_end -= 1; } if a_end == 0 || b_end == 0 { return (a_end + b_end) * MOVE_COST; } if a_end > MAX_STRING_SIZE || b_end > MAX_STRING_SIZE { return max_cost + 1; } if b_end < a_end { std::mem::swap(&mut a_bytes, &mut b_bytes); std::mem::swap(&mut a_begin, &mut b_begin); std::mem::swap(&mut a_end, &mut b_end); } if (b_end - a_end) * MOVE_COST > max_cost { return max_cost + 1; } BUFFER.with(|buffer| { let mut buffer = buffer.borrow_mut(); for i in 0..a_end { buffer[i] = (i + 1) * MOVE_COST; } let mut result = 0usize; for (b_index, b_code) in b_bytes[b_begin..(b_begin + b_end)].iter().enumerate() { result = b_index * MOVE_COST; let mut distance = result; let mut minimum = usize::MAX; for (a_index, a_code) in a_bytes[a_begin..(a_begin + a_end)].iter().enumerate() { let substitute = distance + substitution_cost(*b_code, *a_code); distance = buffer[a_index]; let insert_delete = usize::min(result, distance) + MOVE_COST; result = usize::min(insert_delete, substitute); buffer[a_index] = result; if result < minimum { minimum = result; } } if minimum > max_cost { return max_cost + 1; } } result }) } } #[macro_export] macro_rules! ascii { ($x:literal) => {{ const _: () = { ["not ascii"][!$crate::str::bytes_is_ascii($x) as usize]; }; unsafe { $crate::vendored::ascii::AsciiStr::from_ascii_unchecked($x.as_bytes()) } }}; } /// Get a Display-able type that formats to the python `repr()` of the string value #[inline] pub fn repr(s: &str) -> Repr<'_> { Repr { s, info: OnceCell::new(), } } #[derive(Debug, Copy, Clone)] #[non_exhaustive] pub struct ReprOverflowError; impl fmt::Display for ReprOverflowError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str("string is too long to generate repr") } } #[derive(Copy, Clone)] struct ReprInfo { dquoted: bool, out_len: usize, } impl ReprInfo { fn get(s: &str) -> Result { let mut out_len = 0usize; let mut squote = 0; let mut dquote = 0; for ch in s.chars() { let incr = match ch { '\'' => { squote += 1; 1 } '"' => { dquote += 1; 1 } '\\' | '\t' | '\r' | '\n' => 2, ch if ch < ' ' || ch as u32 == 0x7f => 4, // \xHH ch if ch.is_ascii() => 1, ch if crate::char::is_printable(ch) => { // max = std::cmp::max(ch, max); ch.len_utf8() } ch if (ch as u32) < 0x100 => 4, // \xHH ch if (ch as u32) < 0x10000 => 6, // \uHHHH _ => 10, // \uHHHHHHHH }; out_len += incr; if out_len > std::isize::MAX as usize { return Err(ReprOverflowError); } } let (quote, num_escaped_quotes) = choose_quotes_for_repr(squote, dquote); // we'll be adding backslashes in front of the existing inner quotes out_len += num_escaped_quotes; // start and ending quotes out_len += 2; let dquoted = quote == '"'; Ok(ReprInfo { dquoted, out_len }) } } pub struct Repr<'a> { s: &'a str, // the tuple is dquouted, out_len info: OnceCell>, } impl Repr<'_> { fn get_info(&self) -> Result { *self.info.get_or_init(|| ReprInfo::get(self.s)) } /// Same as `::to_string()`, but checks for a possible OverflowError. pub fn to_string_checked(&self) -> Result { let info = self.get_info()?; let mut repr = String::with_capacity(info.out_len); self._fmt(&mut repr, info).unwrap(); Ok(repr) } fn _fmt(&self, repr: &mut W, info: ReprInfo) -> fmt::Result { let s = self.s; let in_len = s.len(); let ReprInfo { dquoted, out_len } = info; let quote = if dquoted { '"' } else { '\'' }; // if we don't need to escape anything we can just copy let unchanged = out_len == in_len; repr.write_char(quote)?; if unchanged { repr.write_str(s)?; } else { for ch in s.chars() { match ch { '\n' => repr.write_str("\\n"), '\t' => repr.write_str("\\t"), '\r' => repr.write_str("\\r"), // these 2 branches *would* be handled below, but we shouldn't have to do a // unicodedata lookup just for ascii characters '\x20'..='\x7e' => { // printable ascii range if ch == quote || ch == '\\' { repr.write_char('\\')?; } repr.write_char(ch) } ch if ch.is_ascii() => { write!(repr, "\\x{:02x}", ch as u8) } ch if crate::char::is_printable(ch) => repr.write_char(ch), '\0'..='\u{ff}' => { write!(repr, "\\x{:02x}", ch as u32) } '\0'..='\u{ffff}' => { write!(repr, "\\u{:04x}", ch as u32) } _ => { write!(repr, "\\U{:08x}", ch as u32) } }?; } } repr.write_char(quote) } } impl fmt::Display for Repr<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let info = self.get_info().unwrap(); self._fmt(f, info) } } /// returns the outer quotes to use and the number of quotes that need to be escaped pub(crate) fn choose_quotes_for_repr(num_squotes: usize, num_dquotes: usize) -> (char, usize) { // always use squote unless we have squotes but no dquotes let use_dquote = num_squotes > 0 && num_dquotes == 0; if use_dquote { ('"', num_dquotes) } else { ('\'', num_squotes) } } #[cfg(test)] mod tests { use super::*; #[test] fn test_get_chars() { let s = "0123456789"; assert_eq!(get_chars(s, 3..7), "3456"); assert_eq!(get_chars(s, 3..7), &s[3..7]); let s = "0유니코드 문자열9"; assert_eq!(get_chars(s, 3..7), "코드 문"); let s = "0😀😃😄😁😆😅😂🤣9"; assert_eq!(get_chars(s, 3..7), "😄😁😆😅"); } } rustpython-common-0.2.0/src/windows.rs000064400000000000000000000013751046102023000162050ustar 00000000000000use std::{ ffi::{OsStr, OsString}, os::windows::ffi::{OsStrExt, OsStringExt}, }; pub trait ToWideString { fn to_wide(&self) -> Vec; fn to_wides_with_nul(&self) -> Vec; } impl ToWideString for T where T: AsRef, { fn to_wide(&self) -> Vec { self.as_ref().encode_wide().collect() } fn to_wides_with_nul(&self) -> Vec { self.as_ref().encode_wide().chain(Some(0)).collect() } } pub trait FromWideString where Self: Sized, { fn from_wides_until_nul(wide: &[u16]) -> Self; } impl FromWideString for OsString { fn from_wides_until_nul(wide: &[u16]) -> OsString { let len = wide.iter().take_while(|&&c| c != 0).count(); OsString::from_wide(&wide[..len]) } }