diff --git a/CHANGELOG.md b/CHANGELOG.md index fb96ae2b..252e7e0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - Added `Buffer::pixels()` for accessing the buffer's pixel data. - Added `Buffer::pixel_rows()` for iterating over rows of the buffer data. - Added `Buffer::pixels_iter()` for iterating over each pixel with its associated `x`/`y` coordinate. +- Added `Buffer::byte_stride()` for pixel buffers whose rows are aligned and may contain padding bytes at the end. Prefer to use the above helpers instead of accessing pixel data directly. - **Breaking:** Removed generic type parameters `D` and `W` from `Buffer<'_>` struct. - **Breaking:** Removed `Deref[Mut]` implementation on `Buffer<'_>`. Use `Buffer::pixels()` instead. - **Breaking:** Add `Pixel` struct, and use that for pixels instead of `u32`. diff --git a/examples/animation.rs b/examples/animation.rs index c793b4b2..7ae3b6c8 100644 --- a/examples/animation.rs +++ b/examples/animation.rs @@ -23,7 +23,7 @@ fn main() { let window = util::make_window(event_loop, |w| w); let old_size = (0, 0); - let frames = pre_render_frames(0, 0); + let frames = pre_render_frames(0, 0, 0); (window, old_size, frames) }, @@ -64,7 +64,7 @@ fn main() { let size = (buffer.width().get(), buffer.height().get()); if size != *old_size { *old_size = size; - *frames = pre_render_frames(size.0, size.1); + *frames = pre_render_frames(buffer.byte_stride().get() / 4, size.0, size.1); } let frame = &frames[((elapsed * 60.0).round() as usize).clamp(0, 59)]; @@ -94,11 +94,11 @@ fn main() { util::run_app(event_loop, app); } -fn pre_render_frames(width: u32, height: u32) -> Vec> { +fn pre_render_frames(stride: u32, width: u32, height: u32) -> Vec> { let render = |frame_id| { let elapsed = ((frame_id as f64) / (60.0)) * 2.0 * PI; - let coords = (0..height).flat_map(|x| (0..width).map(move |y| (x, y))); + let coords = (0..height).flat_map(|x| (0..stride).map(move |y| (x, y))); coords .map(|(x, y)| { let y = (y as f64) / (height as f64); diff --git a/examples/fruit.rs b/examples/fruit.rs index e8164d6b..4b36bc1b 100644 --- a/examples/fruit.rs +++ b/examples/fruit.rs @@ -52,10 +52,10 @@ fn main() { }; let mut buffer = surface.buffer_mut().unwrap(); - let width = fruit.width(); + let stride = buffer.byte_stride().get() / 4; for (x, y, pixel) in fruit.pixels() { let pixel = Pixel::new_rgb(pixel.0[0], pixel.0[1], pixel.0[2]); - buffer.pixels()[(y * width + x) as usize] = pixel; + buffer.pixels()[(y * stride + x) as usize] = pixel; } buffer.present().unwrap(); diff --git a/src/backend_dispatch.rs b/src/backend_dispatch.rs index 98fa9049..30a8f6d4 100644 --- a/src/backend_dispatch.rs +++ b/src/backend_dispatch.rs @@ -146,6 +146,16 @@ macro_rules! make_dispatch { } impl BufferInterface for BufferDispatch<'_> { + #[inline] + fn byte_stride(&self) -> NonZeroU32 { + match self { + $( + $(#[$attr])* + Self::$name(inner) => inner.byte_stride(), + )* + } + } + #[inline] fn width(&self) -> NonZeroU32 { match self { diff --git a/src/backend_interface.rs b/src/backend_interface.rs index 1d0f8b53..54822acd 100644 --- a/src/backend_interface.rs +++ b/src/backend_interface.rs @@ -35,6 +35,7 @@ pub(crate) trait SurfaceInterface NonZeroU32; fn width(&self) -> NonZeroU32; fn height(&self) -> NonZeroU32; fn pixels_mut(&mut self) -> &mut [Pixel]; diff --git a/src/backends/android.rs b/src/backends/android.rs index 18916772..8a521b8d 100644 --- a/src/backends/android.rs +++ b/src/backends/android.rs @@ -100,7 +100,7 @@ impl SurfaceInterface for Android } let buffer = - vec![Pixel::default(); native_window_buffer.width() * native_window_buffer.height()]; + vec![Pixel::default(); native_window_buffer.stride() * native_window_buffer.height()]; Ok(BufferImpl { native_window_buffer, @@ -124,6 +124,10 @@ pub struct BufferImpl<'a> { unsafe impl Send for BufferImpl<'_> {} impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(self.native_window_buffer.stride() as u32 * 4).unwrap() + } + fn width(&self) -> NonZeroU32 { NonZeroU32::new(self.native_window_buffer.width() as u32).unwrap() } @@ -144,27 +148,19 @@ impl BufferInterface for BufferImpl<'_> { // TODO: This function is pretty slow this way fn present(mut self) -> Result<(), SoftBufferError> { - let input_lines = self.buffer.chunks(self.native_window_buffer.width()); - for (output, input) in self - .native_window_buffer - .lines() - // Unreachable as we checked before that this is a valid, mappable format - .unwrap() - .zip(input_lines) - { - // .lines() removed the stride - assert_eq!(output.len(), input.len() * 4); - - // Write RGB(A) to the output. - // TODO: Use `slice::write_copy_of_slice` once stable and in MSRV. - // TODO(madsmtm): Verify that this compiles down to an efficient copy. - for (i, pixel) in input.iter().enumerate() { - output[i * 4].write(pixel.r); - output[i * 4 + 1].write(pixel.g); - output[i * 4 + 2].write(pixel.b); - output[i * 4 + 3].write(pixel.a); - } + // Unreachable as we checked before that this is a valid, mappable format + let native_buffer = self.native_window_buffer.bytes().unwrap(); + + // Write RGB(A) to the output. + // TODO: Use `slice::write_copy_of_slice` once stable and in MSRV. + // TODO(madsmtm): Verify that this compiles down to an efficient copy. + for (pixel, output) in self.buffer.iter().zip(native_buffer.chunks_mut(4)) { + output[0].write(pixel.r); + output[1].write(pixel.g); + output[2].write(pixel.b); + output[3].write(pixel.a); } + Ok(()) } diff --git a/src/backends/cg.rs b/src/backends/cg.rs index f4d675e1..1048596f 100644 --- a/src/backends/cg.rs +++ b/src/backends/cg.rs @@ -259,8 +259,9 @@ impl SurfaceInterface for CGImpl< } fn buffer_mut(&mut self) -> Result, SoftBufferError> { + let buffer_size = util::byte_stride(self.width as u32) as usize * self.height / 4; Ok(BufferImpl { - buffer: util::PixelBuffer(vec![Pixel::default(); self.width * self.height]), + buffer: util::PixelBuffer(vec![Pixel::default(); buffer_size]), width: self.width, height: self.height, color_space: &self.color_space, @@ -279,6 +280,10 @@ pub struct BufferImpl<'a> { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(util::byte_stride(self.width as u32)).unwrap() + } + fn width(&self) -> NonZeroU32 { NonZeroU32::new(self.width as u32).unwrap() } @@ -338,7 +343,7 @@ impl BufferInterface for BufferImpl<'_> { self.height, 8, 32, - self.width * 4, + util::byte_stride(self.width as u32) as usize, Some(self.color_space), bitmap_info, Some(&data_provider), diff --git a/src/backends/kms.rs b/src/backends/kms.rs index b3a95c26..91184cb6 100644 --- a/src/backends/kms.rs +++ b/src/backends/kms.rs @@ -308,6 +308,10 @@ impl Drop for KmsImpl { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(self.width().get() * 4).unwrap() + } + fn width(&self) -> NonZeroU32 { self.size.0 } diff --git a/src/backends/orbital.rs b/src/backends/orbital.rs index 7a86303d..43e8faea 100644 --- a/src/backends/orbital.rs +++ b/src/backends/orbital.rs @@ -153,6 +153,10 @@ pub struct BufferImpl<'a> { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(self.width().get() * 4).unwrap() + } + fn width(&self) -> NonZeroU32 { NonZeroU32::new(self.width as u32).unwrap() } diff --git a/src/backends/wayland/buffer.rs b/src/backends/wayland/buffer.rs index dc611b92..60a1bb0e 100644 --- a/src/backends/wayland/buffer.rs +++ b/src/backends/wayland/buffer.rs @@ -15,6 +15,7 @@ use wayland_client::{ }; use super::State; +use crate::util; use crate::Pixel; #[cfg(any(target_os = "linux", target_os = "freebsd"))] @@ -99,7 +100,7 @@ impl WaylandBuffer { 0, width, height, - width * 4, + util::byte_stride(width as u32) as i32, // This is documented as `0xXXRRGGBB` on a little-endian machine, which means a byte // order of `[B, G, R, X]`. wl_shm::Format::Xrgb8888, @@ -141,7 +142,7 @@ impl WaylandBuffer { 0, width, height, - width * 4, + util::byte_stride(width as u32) as i32, wl_shm::Format::Xrgb8888, &self.qh, self.released.clone(), @@ -161,7 +162,7 @@ impl WaylandBuffer { } fn len(&self) -> usize { - self.width as usize * self.height as usize + util::byte_stride(self.width as u32) as usize * self.height as usize / 4 } #[inline] diff --git a/src/backends/wayland/mod.rs b/src/backends/wayland/mod.rs index 1682f76f..ad3ce9a1 100644 --- a/src/backends/wayland/mod.rs +++ b/src/backends/wayland/mod.rs @@ -1,7 +1,7 @@ use crate::{ backend_interface::*, error::{InitError, SwResultExt}, - Pixel, Rect, SoftBufferError, + util, Pixel, Rect, SoftBufferError, }; use raw_window_handle::{HasDisplayHandle, HasWindowHandle, RawDisplayHandle, RawWindowHandle}; use std::{ @@ -219,6 +219,10 @@ pub struct BufferImpl<'a> { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(util::byte_stride(self.width as u32)).unwrap() + } + fn width(&self) -> NonZeroU32 { NonZeroU32::new(self.width as u32).unwrap() } diff --git a/src/backends/web.rs b/src/backends/web.rs index beb6de7f..f3eea049 100644 --- a/src/backends/web.rs +++ b/src/backends/web.rs @@ -312,6 +312,10 @@ pub struct BufferImpl<'a> { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(self.width().get() * 4).unwrap() + } + fn width(&self) -> NonZeroU32 { self.size .expect("must set size of surface before calling `width()` on the buffer") diff --git a/src/backends/win32.rs b/src/backends/win32.rs index bf002d79..df20fdfd 100644 --- a/src/backends/win32.rs +++ b/src/backends/win32.rs @@ -8,7 +8,7 @@ use raw_window_handle::{HasDisplayHandle, HasWindowHandle, RawWindowHandle}; use std::io; use std::marker::PhantomData; -use std::mem; +use std::mem::size_of; use std::num::{NonZeroI32, NonZeroU32}; use std::ptr::{self, NonNull}; use std::slice; @@ -55,7 +55,7 @@ impl Buffer { // Create a new bitmap info struct. let bitmap_info = BitmapInfo { bmi_header: Gdi::BITMAPINFOHEADER { - biSize: mem::size_of::() as u32, + biSize: size_of::() as u32, biWidth: width.get(), biHeight: -height.get(), biPlanes: 1, @@ -116,12 +116,9 @@ impl Buffer { #[inline] fn pixels_mut(&mut self) -> &mut [Pixel] { - unsafe { - slice::from_raw_parts_mut( - self.pixels.as_ptr(), - i32::from(self.width) as usize * i32::from(self.height) as usize, - ) - } + let num_bytes = + byte_stride(self.width.get() as u32, 32) as usize * self.height.get() as usize; + unsafe { slice::from_raw_parts_mut(self.pixels.as_ptr(), num_bytes / size_of::()) } } } @@ -250,6 +247,11 @@ pub struct BufferImpl<'a> { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + let width = self.buffer.width.get() as u32; + NonZeroU32::new(byte_stride(width, 32)).unwrap() + } + fn width(&self) -> NonZeroU32 { self.buffer.width.try_into().unwrap() } @@ -467,3 +469,10 @@ impl From for OnlyUsedFromOrigin { Self(t) } } + +#[inline] +fn byte_stride(width: u32, bit_count: u32) -> u32 { + // + // When `bit_count == 32`, this is always just equal to `width * 4`. + ((width * bit_count + 31) & !31) >> 3 +} diff --git a/src/backends/x11.rs b/src/backends/x11.rs index c4c060c8..17a8f55f 100644 --- a/src/backends/x11.rs +++ b/src/backends/x11.rs @@ -328,10 +328,13 @@ impl SurfaceInterface fo height, }))?; + let byte_stride = width.get() * 4; + let num_bytes = byte_stride as usize * height.get() as usize; + if self.size != Some((width, height)) { self.buffer_presented = false; self.buffer - .resize(self.display.connection(), width.get(), height.get()) + .resize(self.display.connection(), num_bytes) .swbuf_err("Failed to resize X11 buffer")?; // We successfully resized the buffer. @@ -416,6 +419,10 @@ pub struct BufferImpl<'a> { } impl BufferInterface for BufferImpl<'_> { + fn byte_stride(&self) -> NonZeroU32 { + NonZeroU32::new(self.width().get() * 4).unwrap() + } + fn width(&self) -> NonZeroU32 { self.size.unwrap().0.into() } @@ -547,20 +554,12 @@ impl BufferInterface for BufferImpl<'_> { } impl Buffer { - /// Resize the buffer to the given size. - fn resize( - &mut self, - conn: &impl Connection, - width: u16, - height: u16, - ) -> Result<(), PushBufferError> { + /// Resize the buffer to the given number of bytes. + fn resize(&mut self, conn: &impl Connection, num_bytes: usize) -> Result<(), PushBufferError> { match self { - Buffer::Shm(ref mut shm) => shm.alloc_segment(conn, total_len(width, height)), + Buffer::Shm(ref mut shm) => shm.alloc_segment(conn, num_bytes), Buffer::Wire(wire) => { - wire.resize( - total_len(width, height) / size_of::(), - Pixel::default(), - ); + wire.resize(num_bytes / size_of::(), Pixel::default()); Ok(()) } } @@ -996,15 +995,3 @@ impl> PushResultExt for Result { self.map_err(Into::into) } } - -/// Get the length that a slice needs to be to hold a buffer of the given dimensions. -#[inline(always)] -fn total_len(width: u16, height: u16) -> usize { - let width: usize = width.into(); - let height: usize = height.into(); - - width - .checked_mul(height) - .and_then(|len| len.checked_mul(size_of::())) - .unwrap_or_else(|| panic!("Dimensions are too large: ({} x {})", width, height)) -} diff --git a/src/lib.rs b/src/lib.rs index be8169a4..fc96bd00 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -114,7 +114,14 @@ impl Surface { /// to have the buffer fill the entire window. Use your windowing library to find the size /// of the window. pub fn resize(&mut self, width: NonZeroU32, height: NonZeroU32) -> Result<(), SoftBufferError> { - self.surface_impl.resize(width, height) + if u32::MAX / 4 < width.get() { + // Stride would be too large. + return Err(SoftBufferError::SizeOutOfRange { width, height }); + } + + self.surface_impl.resize(width, height)?; + + Ok(()) } /// Copies the window contents into a buffer. @@ -140,11 +147,21 @@ impl Surface { /// sending another frame. pub fn buffer_mut(&mut self) -> Result, SoftBufferError> { let mut buffer_impl = self.surface_impl.buffer_mut()?; + + debug_assert_eq!( + buffer_impl.byte_stride().get() % 4, + 0, + "stride must be a multiple of 4" + ); debug_assert_eq!( - buffer_impl.height().get() as usize * buffer_impl.width().get() as usize, - buffer_impl.pixels_mut().len(), + buffer_impl.height().get() as usize * buffer_impl.byte_stride().get() as usize, + buffer_impl.pixels_mut().len() * 4, "buffer must be sized correctly" ); + debug_assert!( + buffer_impl.width().get() * 4 <= buffer_impl.byte_stride().get(), + "width * 4 must be less than or equal to stride" + ); Ok(Buffer { buffer_impl, @@ -235,6 +252,23 @@ pub struct Buffer<'a> { } impl Buffer<'_> { + /// The number of bytes wide each row in the buffer is. + /// + /// On some platforms, the buffer is slightly larger than `width * height * 4`, usually for + /// performance reasons to align each row such that they are never split across cache lines. + /// + /// In your code, prefer to use [`Buffer::pixel_rows`] (which handles this correctly), or + /// failing that, make sure to chunk rows by the stride instead of the width. + /// + /// This is guaranteed to be `>= width * 4`, and is guaranteed to be a multiple of 4. + #[doc(alias = "pitch")] // SDL and Direct3D + #[doc(alias = "bytes_per_row")] // WebGPU + #[doc(alias = "row_stride")] + #[doc(alias = "stride")] + pub fn byte_stride(&self) -> NonZeroU32 { + self.buffer_impl.byte_stride() + } + /// The amount of pixels wide the buffer is. pub fn width(&self) -> NonZeroU32 { self.buffer_impl.width() @@ -291,7 +325,7 @@ impl Buffer<'_> { impl Buffer<'_> { /// Get a mutable reference to the buffer's pixels. /// - /// The size of the returned slice is `buffer.width() * buffer.height()`. + /// The size of the returned slice is `buffer.byte_stride() * buffer.height() / 4`. /// /// # Examples /// @@ -308,7 +342,7 @@ impl Buffer<'_> { /// Iterate over each row of pixels. /// - /// Each slice returned from the iterator has a length of `buffer.width()`. + /// Each slice returned from the iterator has a length of `buffer.byte_stride() / 4`. /// /// # Examples /// @@ -360,11 +394,11 @@ impl Buffer<'_> { pub fn pixel_rows( &mut self, ) -> impl DoubleEndedIterator + ExactSizeIterator { - let width = self.width().get() as usize; + let pixel_stride = self.byte_stride().get() as usize / 4; let pixels = self.pixels(); - assert_eq!(pixels.len() % width, 0, "buffer must be multiple of width"); - // NOTE: This won't panic because `width` is `NonZeroU32` - pixels.chunks_mut(width) + assert_eq!(pixels.len() % pixel_stride, 0, "must be multiple of stride"); + // NOTE: This won't panic because `pixel_stride` came from `NonZeroU32` + pixels.chunks_mut(pixel_stride) } /// Iterate over each pixel in the data. diff --git a/src/pixel.rs b/src/pixel.rs index 5c3949ad..3d60424a 100644 --- a/src/pixel.rs +++ b/src/pixel.rs @@ -77,10 +77,10 @@ /// let height = buffer.height().get(); /// /// // Use fast, zero-copy implementation when possible, and fall back to slower version when not. -/// if PixelFormat::Rgbx.is_default() { +/// if PixelFormat::Rgbx.is_default() && buffer.byte_stride().get() == width * 4 { /// // SAFETY: `Pixel` can be reinterpreted as `[u8; 4]`. /// let pixels = unsafe { std::mem::transmute::<&mut [Pixel], &mut [[u8; 4]]>(buffer.pixels()) }; -/// // CORRECTNESS: We just checked that the format is RGBX. +/// // CORRECTNESS: We just checked that the format is RGBX, and that `stride == width * 4`. /// render(pixels, width, height); /// } else { /// // Render into temporary buffer. @@ -88,8 +88,10 @@ /// render(&mut temporary, width, height); /// /// // And copy from temporary buffer to actual pixel data. -/// for (tmp, actual) in temporary.iter_mut().zip(buffer.pixels()) { -/// *actual = Pixel::new_rgb(tmp[0], tmp[1], tmp[2]); +/// for (tmp_row, actual_row) in temporary.chunks(width as usize).zip(buffer.pixel_rows()) { +/// for (tmp, actual) in tmp_row.iter().zip(actual_row) { +/// *actual = Pixel::new_rgb(tmp[0], tmp[1], tmp[2]); +/// } /// } /// } /// diff --git a/src/util.rs b/src/util.rs index a21e2cac..8b9747ad 100644 --- a/src/util.rs +++ b/src/util.rs @@ -65,3 +65,19 @@ impl ops::DerefMut for PixelBuffer { &mut self.0 } } + +/// Compute the byte stride desired by Softbuffer when a platform can use any stride. +/// +/// TODO(madsmtm): This should take the pixel format / bit depth as input after: +/// +#[inline] +pub(crate) fn byte_stride(width: u32) -> u32 { + let row_alignment = if cfg!(debug_assertions) { + 16 // Use a higher alignment to help users catch issues with their stride calculations. + } else { + 4 // At least 4 is necessary for `Buffer` to return `&mut [u32]`. + }; + // TODO: Use `next_multiple_of` when in MSRV. + let mask = row_alignment * 4 - 1; + ((width * 32 + mask) & !mask) >> 3 +}