From cb8cf9a6c01e90070bea3ae80918c7c90a504e58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=82=E3=81=99=E3=81=B1=E3=82=8B?= Date: Wed, 27 May 2026 19:05:30 +0900 Subject: [PATCH 1/4] feat: add GPU-capable raster surfaces --- tellur-core/src/composite.rs | 14 +- tellur-core/src/layer.rs | 10 +- tellur-core/src/layout.rs | 21 +-- tellur-core/src/raster.rs | 150 +++++++++++++++++- tellur-core/src/render_context.rs | 60 ++++++- tellur-live/src/server.rs | 9 +- .../examples/raster_layer_to_png.rs | 2 +- tellur-renderer/examples/scene_to_png.rs | 2 +- tellur-renderer/examples/text_to_png.rs | 2 +- tellur-renderer/src/outline.rs | 34 ++-- tellur-renderer/src/rasterize.rs | 8 +- tellur-renderer/src/render_context.rs | 37 ++++- tellur-renderer/src/shadow.rs | 32 ++-- tellur-renderer/src/video.rs | 2 + 14 files changed, 289 insertions(+), 94 deletions(-) diff --git a/tellur-core/src/composite.rs b/tellur-core/src/composite.rs index 80e96ca..663b08f 100644 --- a/tellur-core/src/composite.rs +++ b/tellur-core/src/composite.rs @@ -16,7 +16,7 @@ //! source pixels skip the write entirely and fully-opaque ones go //! through a 4-byte copy. -use crate::raster::{PixelFormat, RasterImage, Resolution}; +use crate::raster::{CpuRasterImage, PixelFormat, Resolution}; /// Source-over composites `src` onto `dst` at pixel offset /// `(offset_x, offset_y)`. Both buffers hold 8-bit straight-alpha RGBA @@ -28,7 +28,7 @@ use crate::raster::{PixelFormat, RasterImage, Resolution}; pub fn composite_at( dst: &mut [u8], dst_size: Resolution, - src: &RasterImage, + src: &CpuRasterImage, offset_x: i32, offset_y: i32, ) { @@ -130,16 +130,10 @@ fn blend_row(dst: &mut [u8], src: &[u8]) { #[cfg(test)] mod tests { use super::*; - use bytes::Bytes; - fn image(width: u32, height: u32, pixels: Vec) -> RasterImage { + fn image(width: u32, height: u32, pixels: Vec) -> CpuRasterImage { assert_eq!(pixels.len(), (width * height * 4) as usize); - RasterImage { - width, - height, - format: PixelFormat::Rgba8, - pixels: Bytes::from(pixels), - } + CpuRasterImage::new(width, height, PixelFormat::Rgba8, pixels) } /// Straight-alpha Porter-Duff source-over carried out in `f64`, used diff --git a/tellur-core/src/layer.rs b/tellur-core/src/layer.rs index a03db7d..fa10f0c 100644 --- a/tellur-core/src/layer.rs +++ b/tellur-core/src/layer.rs @@ -19,8 +19,6 @@ //! source-over compositing it onto the output at the corresponding pixel //! offset. -use bytes::Bytes; - use crate::composite::composite_at; use crate::geometry::{Constraints, Rect, Transform, Vec2}; use crate::placement::Placed; @@ -265,15 +263,11 @@ pub(crate) fn composite_children( // Route the child render through the context so cache lookups // can intercept it before the underlying `render` runs. let image = ctx.render(*child, *child_size, Resolution::new(child_px_w, child_px_h)); + let image = ctx.readback(image); composite_at(&mut accum, target, &image, offset_x, offset_y); } - RasterImage { - width: target.width, - height: target.height, - format: PixelFormat::Rgba8, - pixels: Bytes::from(accum), - } + RasterImage::cpu(target.width, target.height, PixelFormat::Rgba8, accum) } /// Smallest axis-aligned rectangle containing both `a` and `b`. diff --git a/tellur-core/src/layout.rs b/tellur-core/src/layout.rs index 3a300cf..3c64262 100644 --- a/tellur-core/src/layout.rs +++ b/tellur-core/src/layout.rs @@ -663,8 +663,6 @@ pub mod raster { //! Raster equivalents of the vector layout containers. Same shape //! and semantics; operate on `Box`. - use bytes::Bytes; - use std::hash::{Hash, Hasher}; use super::{ @@ -1097,12 +1095,12 @@ pub mod raster { _ctx: &mut dyn RenderContext, ) -> RasterImage { let bytes = (target.width as usize) * (target.height as usize) * 4; - RasterImage { - width: target.width, - height: target.height, - format: PixelFormat::Rgba8, - pixels: Bytes::from(vec![0u8; bytes]), - } + RasterImage::cpu( + target.width, + target.height, + PixelFormat::Rgba8, + vec![0u8; bytes], + ) } } @@ -1136,12 +1134,7 @@ pub mod raster { buf.push(b); buf.push(a); } - RasterImage { - width: target.width, - height: target.height, - format: PixelFormat::Rgba8, - pixels: Bytes::from(buf), - } + RasterImage::cpu(target.width, target.height, PixelFormat::Rgba8, buf) } } diff --git a/tellur-core/src/raster.rs b/tellur-core/src/raster.rs index cee9d42..6977e99 100644 --- a/tellur-core/src/raster.rs +++ b/tellur-core/src/raster.rs @@ -1,6 +1,8 @@ use std::any::Any; +use std::fmt; use std::hash::{Hash, Hasher}; use std::io::Write; +use std::sync::Arc; use bytes::Bytes; use thiserror::Error; @@ -10,13 +12,132 @@ use crate::geometry::{Constraints, Rect, Vec2}; use crate::render_context::RenderContext; #[derive(Debug, Clone)] -pub struct RasterImage { +pub enum RasterImage { + Cpu(CpuRasterImage), + Gpu(GpuSurface), +} + +impl RasterImage { + pub fn cpu(width: u32, height: u32, format: PixelFormat, pixels: impl Into) -> Self { + Self::Cpu(CpuRasterImage { + width, + height, + format, + pixels: pixels.into(), + }) + } + + pub fn width(&self) -> u32 { + match self { + Self::Cpu(image) => image.width, + Self::Gpu(surface) => surface.width, + } + } + + pub fn height(&self) -> u32 { + match self { + Self::Cpu(image) => image.height, + Self::Gpu(surface) => surface.height, + } + } + + pub fn format(&self) -> PixelFormat { + match self { + Self::Cpu(image) => image.format, + Self::Gpu(surface) => surface.format, + } + } + + pub fn as_cpu(&self) -> Option<&CpuRasterImage> { + match self { + Self::Cpu(image) => Some(image), + Self::Gpu(_) => None, + } + } + + pub fn into_cpu(self) -> Result { + match self { + Self::Cpu(image) => Ok(image), + Self::Gpu(_) => Err(self), + } + } +} + +impl From for RasterImage { + fn from(image: CpuRasterImage) -> Self { + Self::Cpu(image) + } +} + +impl From for RasterImage { + fn from(surface: GpuSurface) -> Self { + Self::Gpu(surface) + } +} + +#[derive(Debug, Clone)] +pub struct CpuRasterImage { pub width: u32, pub height: u32, pub format: PixelFormat, pub pixels: Bytes, } +/// Backend-owned GPU image handle. +/// +/// `tellur-core` deliberately keeps this opaque: concrete backends can store a +/// `wgpu::Texture`, texture view, command-graph node, or another device-local +/// handle behind the `Arc`, while core remains dependency-free. +#[derive(Clone)] +pub struct GpuSurface { + pub width: u32, + pub height: u32, + pub format: PixelFormat, + backend: &'static str, + handle: Arc, +} + +impl GpuSurface { + pub fn new( + width: u32, + height: u32, + format: PixelFormat, + backend: &'static str, + handle: Arc, + ) -> Self { + Self { + width, + height, + format, + backend, + handle, + } + } + + pub fn backend(&self) -> &'static str { + self.backend + } + + pub fn handle(&self) -> &(dyn Any + Send + Sync) { + self.handle.as_ref() + } + + pub fn downcast_handle(&self) -> Option<&T> { + self.handle.as_ref().downcast_ref::() + } +} + +impl fmt::Debug for GpuSurface { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("GpuSurface") + .field("width", &self.width) + .field("height", &self.height) + .field("format", &self.format) + .field("backend", &self.backend) + .finish_non_exhaustive() + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum PixelFormat { /// 8-bit per channel sRGB with straight (non-premultiplied) alpha. @@ -111,11 +232,22 @@ pub enum PngExportError { UnsupportedFormat(PixelFormat), #[error("pixel buffer size mismatch: expected {expected} bytes, got {actual}")] SizeMismatch { expected: usize, actual: usize }, + #[error("PNG export requires a CPU image; got GPU surface from backend {backend}")] + GpuSurface { backend: &'static str }, #[error("PNG encoding failed: {0}")] Encode(#[from] png::EncodingError), } -impl RasterImage { +impl CpuRasterImage { + pub fn new(width: u32, height: u32, format: PixelFormat, pixels: impl Into) -> Self { + Self { + width, + height, + format, + pixels: pixels.into(), + } + } + /// Encodes the image as PNG and writes it to `writer`. /// /// Only `PixelFormat::Rgba8` is currently supported. HDR formats require @@ -141,3 +273,17 @@ impl RasterImage { Ok(()) } } + +impl RasterImage { + /// Encodes a CPU image as PNG and writes it to `writer`. + /// + /// GPU images must be read back through the active render context first. + pub fn export_png(&self, writer: W) -> Result<(), PngExportError> { + match self { + Self::Cpu(image) => image.export_png(writer), + Self::Gpu(surface) => Err(PngExportError::GpuSurface { + backend: surface.backend(), + }), + } + } +} diff --git a/tellur-core/src/render_context.rs b/tellur-core/src/render_context.rs index edbab9f..2ba6e35 100644 --- a/tellur-core/src/render_context.rs +++ b/tellur-core/src/render_context.rs @@ -12,8 +12,28 @@ //! pay for cache bookkeeping. The renderer crate provides a caching //! implementation on top of this trait. +use std::any::Any; + use crate::geometry::Vec2; -use crate::raster::{RasterComponent, RasterImage, Resolution}; +use crate::raster::{CpuRasterImage, RasterComponent, RasterImage, Resolution}; + +/// How aggressively a render context should try to keep work on the GPU. +/// +/// This is a policy signal, not a guarantee. Components should ask the context +/// for GPU hooks only when this prefers GPU work, and every hook is optional so +/// CPU fallback remains the default behavior. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] +pub enum GpuPreference { + #[default] + Disabled, + PreferGpu, +} + +impl GpuPreference { + pub const fn prefers_gpu(self) -> bool { + matches!(self, Self::PreferGpu) + } +} /// Drives raster component rendering and provides a hook for caching. /// @@ -22,6 +42,23 @@ use crate::raster::{RasterComponent, RasterImage, Resolution}; /// `ctx.render(&*child, size, target)`) so the context can intercept and /// reuse previously-produced results. pub trait RenderContext { + /// Exposes the concrete context for backend-specific rendering paths. + /// + /// Components should still branch on [`RenderContext::prefers_gpu`] first; + /// downcasting is only for the implementation detail of talking to a + /// concrete GPU backend when one is present. + fn as_any_mut(&mut self) -> &mut dyn Any; + + /// Whether components should try optional GPU paths before falling back to + /// CPU rendering. + fn gpu_preference(&self) -> GpuPreference { + GpuPreference::Disabled + } + + fn prefers_gpu(&self) -> bool { + self.gpu_preference().prefers_gpu() + } + /// Renders `component` at the given logical `size` into a /// `target`-sized pixel buffer, possibly returning a cached result /// from a previous identical request. @@ -31,6 +68,23 @@ pub trait RenderContext { size: Vec2, target: Resolution, ) -> RasterImage; + + /// Reads a rendered image back into CPU memory. + /// + /// GPU contexts that return `RasterImage::Gpu` must override this. The + /// default handles the CPU fallback path and treats an unresolved GPU image + /// as a backend bug. + fn readback(&mut self, image: RasterImage) -> CpuRasterImage { + match image { + RasterImage::Cpu(image) => image, + RasterImage::Gpu(surface) => { + panic!( + "render context returned a GPU image for backend '{}' but did not implement readback", + surface.backend() + ) + } + } + } } /// A `RenderContext` that performs no caching. Every call goes straight @@ -39,6 +93,10 @@ pub trait RenderContext { pub struct PassThrough; impl RenderContext for PassThrough { + fn as_any_mut(&mut self) -> &mut dyn Any { + self + } + fn render( &mut self, component: &dyn RasterComponent, diff --git a/tellur-live/src/server.rs b/tellur-live/src/server.rs index 1d14e52..958e19f 100644 --- a/tellur-live/src/server.rs +++ b/tellur-live/src/server.rs @@ -11,7 +11,8 @@ use std::sync::{ use std::thread; use std::time::{Duration, Instant}; -use tellur_core::raster::{PixelFormat, RasterImage, Resolution}; +use tellur_core::raster::{CpuRasterImage, PixelFormat, Resolution}; +use tellur_core::render_context::RenderContext; use tellur_core::time::TimelineTime; use tellur_renderer::CachingRenderContext; @@ -444,6 +445,7 @@ impl PreviewApp { &mut self.ctx, ) .ok_or("timeline did not produce a frame")?; + let image = self.ctx.readback(image); let render_time = render_start.elapsed(); if image.format != PixelFormat::Rgba8 { return Err(format!("h264 stream requires Rgba8, got {:?}", image.format).into()); @@ -535,6 +537,7 @@ impl PreviewApp { &mut self.ctx, ) .ok_or("timeline did not produce a frame")?; + let image = self.ctx.readback(image); let render_time = render_start.elapsed(); let after = self.ctx.metrics(); @@ -571,7 +574,7 @@ struct VideoStreamSetup { } struct VideoFrame { - image: RasterImage, + image: CpuRasterImage, render_time: Duration, cache_hits: u64, cache_misses: u64, @@ -781,7 +784,7 @@ struct RenderedFrame { } struct RenderedImage { - image: RasterImage, + image: CpuRasterImage, stats: FrameRenderStats, total_start: Instant, } diff --git a/tellur-renderer/examples/raster_layer_to_png.rs b/tellur-renderer/examples/raster_layer_to_png.rs index 4d80f67..06f1686 100644 --- a/tellur-renderer/examples/raster_layer_to_png.rs +++ b/tellur-renderer/examples/raster_layer_to_png.rs @@ -70,5 +70,5 @@ fn main() { let file = File::create(path).expect("create output file"); image.export_png(file).expect("export PNG"); - println!("Wrote {} ({}x{})", path, image.width, image.height); + println!("Wrote {} ({}x{})", path, image.width(), image.height()); } diff --git a/tellur-renderer/examples/scene_to_png.rs b/tellur-renderer/examples/scene_to_png.rs index 03139df..6b8de2c 100644 --- a/tellur-renderer/examples/scene_to_png.rs +++ b/tellur-renderer/examples/scene_to_png.rs @@ -48,5 +48,5 @@ fn main() { let file = File::create(path).expect("create output file"); image.export_png(file).expect("export PNG"); - println!("Wrote {} ({}x{})", path, image.width, image.height); + println!("Wrote {} ({}x{})", path, image.width(), image.height()); } diff --git a/tellur-renderer/examples/text_to_png.rs b/tellur-renderer/examples/text_to_png.rs index aec9e20..182a079 100644 --- a/tellur-renderer/examples/text_to_png.rs +++ b/tellur-renderer/examples/text_to_png.rs @@ -53,5 +53,5 @@ fn main() { let out = "/tmp/text.png"; let file = File::create(out).expect("create output file"); image.export_png(file).expect("export PNG"); - println!("Wrote {} ({}x{})", out, image.width, image.height); + println!("Wrote {} ({}x{})", out, image.width(), image.height()); } diff --git a/tellur-renderer/src/outline.rs b/tellur-renderer/src/outline.rs index 589dcbe..86f8afa 100644 --- a/tellur-renderer/src/outline.rs +++ b/tellur-renderer/src/outline.rs @@ -10,12 +10,11 @@ use std::hash::{Hash, Hasher}; -use bytes::Bytes; use tellur_core::color::Color; use tellur_core::composite::composite_at; use tellur_core::dyn_compare::hash_f32; use tellur_core::geometry::{Constraints, Rect, Vec2}; -use tellur_core::raster::{PixelFormat, RasterComponent, RasterImage, Resolution}; +use tellur_core::raster::{CpuRasterImage, PixelFormat, RasterComponent, RasterImage, Resolution}; use tellur_core::render_context::RenderContext; pub struct Outline { @@ -75,6 +74,7 @@ impl RasterComponent for Outline { size, Resolution::new(child_px_w, child_px_h), ); + let child_image = ctx.readback(child_image); // Dilate the child alpha by `width` logical units and subtract // the original alpha so only the ring outside the child @@ -109,31 +109,26 @@ impl RasterComponent for Outline { let child_px_y = (child_local_y * sy).round() as i32; composite_at(&mut accum, target, &child_image, child_px_x, child_px_y); - RasterImage { - width: target.width, - height: target.height, - format: PixelFormat::Rgba8, - pixels: Bytes::from(accum), - } + RasterImage::cpu(target.width, target.height, PixelFormat::Rgba8, accum) } } fn blank_image(target: Resolution) -> RasterImage { let bytes = (target.width as usize) * (target.height as usize) * 4; - RasterImage { - width: target.width, - height: target.height, - format: PixelFormat::Rgba8, - pixels: Bytes::from(vec![0u8; bytes]), - } + RasterImage::cpu( + target.width, + target.height, + PixelFormat::Rgba8, + vec![0u8; bytes], + ) } fn make_outline( - image: &RasterImage, + image: &CpuRasterImage, width_px_x: u32, width_px_y: u32, color: Color, -) -> RasterImage { +) -> CpuRasterImage { assert_eq!(image.format, PixelFormat::Rgba8); let pad_x = width_px_x as usize; let pad_y = width_px_y as usize; @@ -181,12 +176,7 @@ fn make_outline( out.push(a); } - RasterImage { - width: out_w as u32, - height: out_h as u32, - format: PixelFormat::Rgba8, - pixels: Bytes::from(out), - } + CpuRasterImage::new(out_w as u32, out_h as u32, PixelFormat::Rgba8, out) } /// Morphological dilation by an axis-aligned ellipse with semi-axes diff --git a/tellur-renderer/src/rasterize.rs b/tellur-renderer/src/rasterize.rs index 483a05a..98883ac 100644 --- a/tellur-renderer/src/rasterize.rs +++ b/tellur-renderer/src/rasterize.rs @@ -1,6 +1,5 @@ use std::hash::Hash; -use bytes::Bytes; use tellur_core::color::Color; use tellur_core::geometry::{Constraints, Rect, Transform, Vec2}; use tellur_core::raster::{PixelFormat, RasterComponent, RasterImage, Resolution}; @@ -57,12 +56,7 @@ fn rasterize(graphic: &VectorGraphic, width: u32, height: u32) -> RasterImage { straight.extend_from_slice(&[c.red(), c.green(), c.blue(), c.alpha()]); } - RasterImage { - width, - height, - format: PixelFormat::Rgba8, - pixels: Bytes::from(straight), - } + RasterImage::cpu(width, height, PixelFormat::Rgba8, straight) } /// Transform that maps the graphic's local coordinate space diff --git a/tellur-renderer/src/render_context.rs b/tellur-renderer/src/render_context.rs index a76b11e..3aec951 100644 --- a/tellur-renderer/src/render_context.rs +++ b/tellur-renderer/src/render_context.rs @@ -23,8 +23,8 @@ use lru::LruCache; use sysinfo::System; use tellur_core::dyn_compare::DynEq; use tellur_core::geometry::Vec2; -use tellur_core::raster::{RasterComponent, RasterImage, Resolution}; -use tellur_core::render_context::RenderContext; +use tellur_core::raster::{PixelFormat, RasterComponent, RasterImage, Resolution}; +use tellur_core::render_context::{GpuPreference, RenderContext}; /// Default cache size in bytes (1 GiB) when constructed with /// [`CachingRenderContext::new`]. @@ -213,6 +213,13 @@ fn format_bytes(b: u64) -> String { } } +fn pixel_stride(format: PixelFormat) -> usize { + match format { + PixelFormat::Rgba8 => 4, + PixelFormat::Rgba16Float => 8, + } +} + /// A render context that memoizes `RasterImage` outputs. /// /// Construct one per export / preview session and pass it into @@ -232,6 +239,7 @@ pub struct CachingRenderContext { pressure_skips: u64, oversize_skips: u64, per_type: HashMap, + gpu_preference: GpuPreference, // Running total of every `ctx.render` call's inclusive duration. // A `render` invocation snapshots this on entry and re-reads it on // exit to derive how much time was spent inside nested child @@ -259,10 +267,20 @@ impl CachingRenderContext { pressure_skips: 0, oversize_skips: 0, per_type: HashMap::new(), + gpu_preference: GpuPreference::Disabled, total_render_time: Duration::ZERO, } } + pub fn with_gpu_preference(mut self, gpu_preference: GpuPreference) -> Self { + self.gpu_preference = gpu_preference; + self + } + + pub fn set_gpu_preference(&mut self, gpu_preference: GpuPreference) { + self.gpu_preference = gpu_preference; + } + /// Current memory footprint of cached images, in bytes. pub fn current_bytes(&self) -> usize { self.cur_bytes @@ -325,7 +343,12 @@ impl CachingRenderContext { } fn image_bytes(image: &RasterImage) -> usize { - image.pixels.len() + match image { + RasterImage::Cpu(image) => image.pixels.len(), + RasterImage::Gpu(surface) => { + (surface.width as usize) * (surface.height as usize) * pixel_stride(surface.format) + } + } } /// Evict least-recently-used entries until `needed` more bytes fit @@ -366,6 +389,14 @@ impl Default for CachingRenderContext { } impl RenderContext for CachingRenderContext { + fn as_any_mut(&mut self) -> &mut dyn std::any::Any { + self + } + + fn gpu_preference(&self) -> GpuPreference { + self.gpu_preference + } + fn render( &mut self, component: &dyn RasterComponent, diff --git a/tellur-renderer/src/shadow.rs b/tellur-renderer/src/shadow.rs index 6211806..4177ce0 100644 --- a/tellur-renderer/src/shadow.rs +++ b/tellur-renderer/src/shadow.rs @@ -8,12 +8,11 @@ use std::hash::{Hash, Hasher}; -use bytes::Bytes; use tellur_core::color::Color; use tellur_core::composite::composite_at; use tellur_core::dyn_compare::hash_f32; use tellur_core::geometry::{Constraints, Rect, Vec2}; -use tellur_core::raster::{PixelFormat, RasterComponent, RasterImage, Resolution}; +use tellur_core::raster::{CpuRasterImage, PixelFormat, RasterComponent, RasterImage, Resolution}; use tellur_core::render_context::RenderContext; pub struct DropShadow { @@ -97,6 +96,7 @@ impl RasterComponent for DropShadow { size, Resolution::new(child_px_w, child_px_h), ); + let child_image = ctx.readback(child_image); // Build a padded shadow image whose alpha is a blurred copy of // the child's alpha, tinted with `color`. Padding equals the @@ -128,26 +128,21 @@ impl RasterComponent for DropShadow { let child_px_y = (child_local_y * sy).round() as i32; composite_at(&mut accum, target, &child_image, child_px_x, child_px_y); - RasterImage { - width: target.width, - height: target.height, - format: PixelFormat::Rgba8, - pixels: Bytes::from(accum), - } + RasterImage::cpu(target.width, target.height, PixelFormat::Rgba8, accum) } } fn blank_image(target: Resolution) -> RasterImage { let bytes = (target.width as usize) * (target.height as usize) * 4; - RasterImage { - width: target.width, - height: target.height, - format: PixelFormat::Rgba8, - pixels: Bytes::from(vec![0u8; bytes]), - } + RasterImage::cpu( + target.width, + target.height, + PixelFormat::Rgba8, + vec![0u8; bytes], + ) } -fn make_shadow(image: &RasterImage, blur_radius: u32, color: Color) -> RasterImage { +fn make_shadow(image: &CpuRasterImage, blur_radius: u32, color: Color) -> CpuRasterImage { assert_eq!(image.format, PixelFormat::Rgba8); let pad = (blur_radius as f32 * BLUR_EXTENT_MULTIPLIER).round() as usize; let in_w = image.width as usize; @@ -185,12 +180,7 @@ fn make_shadow(image: &RasterImage, blur_radius: u32, color: Color) -> RasterIma out.push(a); } - RasterImage { - width: out_w as u32, - height: out_h as u32, - format: PixelFormat::Rgba8, - pixels: Bytes::from(out), - } + CpuRasterImage::new(out_w as u32, out_h as u32, PixelFormat::Rgba8, out) } fn box_blur_3pass(buf: &mut [u8], w: usize, h: usize, radius: usize) { diff --git a/tellur-renderer/src/video.rs b/tellur-renderer/src/video.rs index 049afce..c938384 100644 --- a/tellur-renderer/src/video.rs +++ b/tellur-renderer/src/video.rs @@ -34,6 +34,7 @@ use std::time::{Duration, Instant}; use indicatif::{MultiProgress, ProgressBar, ProgressState, ProgressStyle}; use tellur_core::raster::{PixelFormat, Resolution}; +use tellur_core::render_context::RenderContext; use tellur_core::time::TimelineTime; use tellur_core::timeline::Timeline; use thiserror::Error; @@ -221,6 +222,7 @@ impl FfmpegEncoder { let build_start = Instant::now(); let image = tl.build(t, self.resolution, &mut ctx); + let image = ctx.readback(image); build_time += build_start.elapsed(); if image.format != PixelFormat::Rgba8 { From 51135a36ba69a1d1cfebbc8dd65d155c673b7a4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=82=E3=81=99=E3=81=B1=E3=82=8B?= Date: Wed, 27 May 2026 20:01:46 +0900 Subject: [PATCH 2/4] feat: add GPU raster effects backend --- Cargo.lock | 842 +++++++++++++++++++- flake.nix | 6 + tellur-core/src/layer.rs | 44 +- tellur-core/src/raster.rs | 4 + tellur-core/src/render_context.rs | 72 +- tellur-live/src/main.rs | 11 +- tellur-live/src/server.rs | 37 +- tellur-renderer/Cargo.toml | 3 + tellur-renderer/src/gpu.rs | 1051 +++++++++++++++++++++++++ tellur-renderer/src/lib.rs | 1 + tellur-renderer/src/outline.rs | 40 +- tellur-renderer/src/render_context.rs | 55 +- tellur-renderer/src/shadow.rs | 51 +- 13 files changed, 2155 insertions(+), 62 deletions(-) create mode 100644 tellur-renderer/src/gpu.rs diff --git a/Cargo.lock b/Cargo.lock index c840591..01f2e8d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,12 +8,33 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "allocator-api2" version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "arrayref" version = "0.3.9" @@ -26,12 +47,54 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" +[[package]] +name = "ash" +version = "0.37.3+1.3.251" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e9c3835d686b0a6084ab4234fcd1b07dbf6e4767dce60874b12356a25ecd4a" +dependencies = [ + "libloading 0.7.4", +] + +[[package]] +name = "autocfg" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53" + +[[package]] +name = "bit-set" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1" +dependencies = [ + "bit-vec", +] + +[[package]] +name = "bit-vec" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" +[[package]] +name = "block" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" + [[package]] name = "bumpalo" version = "3.20.3" @@ -50,12 +113,69 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "cc" +version = "1.2.62" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" +dependencies = [ + "find-msvc-tools", + "shlex", +] + [[package]] name = "cfg-if" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width 0.1.14", +] + +[[package]] +name = "com" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e17887fd17353b65b1b2ef1c526c83e26cd72e74f598a8dc1bee13a48f3d9f6" +dependencies = [ + "com_macros", +] + +[[package]] +name = "com_macros" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d375883580a668c7481ea6631fc1a8863e33cc335bf56bfad8d7e6d4b04b13a5" +dependencies = [ + "com_macros_support", + "proc-macro2", + "syn 1.0.109", +] + +[[package]] +name = "com_macros_support" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad899a1087a9296d5644792d7cb72b8e34c1bec8e7d4fbc002230169a6e8710c" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "console" version = "0.16.3" @@ -64,16 +184,37 @@ checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" dependencies = [ "encode_unicode", "libc", - "unicode-width", + "unicode-width 0.2.2", "windows-sys", ] +[[package]] +name = "core-foundation" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core-graphics-types" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "libc", +] + [[package]] name = "core_maths" version = "0.1.1" @@ -92,13 +233,24 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "d3d12" +version = "0.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e3d747f100290a1ca24b752186f61f6637e1deffe3bf6320de6fcb29510a307" +dependencies = [ + "bitflags 2.11.1", + "libloading 0.8.9", + "winapi", +] + [[package]] name = "dlib" version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab8ecd87370524b461f8557c119c405552c396ed91fc0a8eec68679eab26f94a" dependencies = [ - "libloading", + "libloading 0.8.9", ] [[package]] @@ -122,6 +274,12 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" + [[package]] name = "flate2" version = "1.1.9" @@ -170,6 +328,33 @@ dependencies = [ "ttf-parser", ] +[[package]] +name = "foreign-types" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" +dependencies = [ + "foreign-types-macros", + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-macros" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "foreign-types-shared" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" + [[package]] name = "futures-core" version = "0.3.32" @@ -194,6 +379,100 @@ dependencies = [ "slab", ] +[[package]] +name = "gl_generator" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a95dfc23a2b4a9a2f5ab41d194f8bfda3cabec42af4e39f08c339eb2a0c124d" +dependencies = [ + "khronos_api", + "log", + "xml-rs", +] + +[[package]] +name = "glow" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd348e04c43b32574f2de31c8bb397d96c9fcfa1371bd4ca6d8bdc464ab121b1" +dependencies = [ + "js-sys", + "slotmap", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "glutin_wgl_sys" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8098adac955faa2d31079b65dc48841251f69efd3ac25477903fc424362ead" +dependencies = [ + "gl_generator", +] + +[[package]] +name = "gpu-alloc" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbcd2dba93594b227a1f57ee09b8b9da8892c34d55aa332e034a228d0fe6a171" +dependencies = [ + "bitflags 2.11.1", + "gpu-alloc-types", +] + +[[package]] +name = "gpu-alloc-types" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98ff03b468aa837d70984d55f5d3f846f6ec31fe34bbb97c4f85219caeee1ca4" +dependencies = [ + "bitflags 2.11.1", +] + +[[package]] +name = "gpu-allocator" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f56f6318968d03c18e1bcf4857ff88c61157e9da8e47c5f29055d60e1228884" +dependencies = [ + "log", + "presser", + "thiserror 1.0.69", + "winapi", + "windows 0.52.0", +] + +[[package]] +name = "gpu-descriptor" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc11df1ace8e7e564511f53af41f3e42ddc95b56fd07b3f4445d2a6048bc682c" +dependencies = [ + "bitflags 2.11.1", + "gpu-descriptor-types", + "hashbrown 0.14.5", +] + +[[package]] +name = "gpu-descriptor-types" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bf0b36e6f090b7e1d8a4b49c0cb81c1f8376f72198c65dd3ad9ff3556b8b78c" +dependencies = [ + "bitflags 2.11.1", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] + [[package]] name = "hashbrown" version = "0.15.5" @@ -205,6 +484,43 @@ dependencies = [ "foldhash", ] +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + +[[package]] +name = "hassle-rs" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af2a7e73e1f34c48da31fb668a907f250794837e08faa144fd24f0b8b741e890" +dependencies = [ + "bitflags 2.11.1", + "com", + "libc", + "libloading 0.8.9", + "thiserror 1.0.69", + "widestring", + "winapi", +] + +[[package]] +name = "hexf-parse" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfa686283ad6dd069f105e5ab091b04c62850d3e4cf5d67debad1933f55023df" + +[[package]] +name = "indexmap" +version = "2.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" +dependencies = [ + "equivalent", + "hashbrown 0.17.1", +] + [[package]] name = "indicatif" version = "0.18.4" @@ -213,11 +529,39 @@ checksum = "25470f23803092da7d239834776d653104d551bc4d7eacaf31e6837854b8e9eb" dependencies = [ "console", "portable-atomic", - "unicode-width", + "unicode-width 0.2.2", "unit-prefix", "web-time", ] +[[package]] +name = "jni-sys" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41a652e1f9b6e0275df1f15b32661cf0d4b78d4d87ddec5e0c3c20f097433258" +dependencies = [ + "jni-sys 0.4.1", +] + +[[package]] +name = "jni-sys" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6377a88cb3910bee9b0fa88d4f42e1d2da8e79915598f65fb0c7ee14c878af2" +dependencies = [ + "jni-sys-macros", +] + +[[package]] +name = "jni-sys-macros" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38c0b942f458fe50cdac086d2f946512305e5631e720728f2a61aabcd47a6264" +dependencies = [ + "quote", + "syn 2.0.117", +] + [[package]] name = "js-sys" version = "0.3.99" @@ -230,12 +574,39 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "khronos-egl" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6aae1df220ece3c0ada96b8153459b67eebe9ae9212258bb0134ae60416fdf76" +dependencies = [ + "libc", + "libloading 0.8.9", + "pkg-config", +] + +[[package]] +name = "khronos_api" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2db585e1d738fc771bf08a151420d3ed193d9d895a36df7f6f8a9456b911ddc" + [[package]] name = "libc" version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" +[[package]] +name = "libloading" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b67380fd3b2fbe7527a606e18729d21c6f3951633d0500574c4dc22d2d638b9f" +dependencies = [ + "cfg-if", + "winapi", +] + [[package]] name = "libloading" version = "0.8.9" @@ -252,6 +623,15 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" +[[package]] +name = "lock_api" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" +dependencies = [ + "scopeguard", +] + [[package]] name = "log" version = "0.4.29" @@ -264,7 +644,16 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" dependencies = [ - "hashbrown", + "hashbrown 0.15.5", +] + +[[package]] +name = "malloc_buf" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" +dependencies = [ + "libc", ] [[package]] @@ -282,6 +671,21 @@ dependencies = [ "libc", ] +[[package]] +name = "metal" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c43f73953f8cbe511f021b58f18c3ce1c3d1ae13fe953293e13345bf83217f25" +dependencies = [ + "bitflags 2.11.1", + "block", + "core-graphics-types", + "foreign-types", + "log", + "objc", + "paste", +] + [[package]] name = "miniz_oxide" version = "0.8.9" @@ -292,6 +696,35 @@ dependencies = [ "simd-adler32", ] +[[package]] +name = "naga" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50e3524642f53d9af419ab5e8dd29d3ba155708267667c2f3f06c88c9e130843" +dependencies = [ + "bit-set", + "bitflags 2.11.1", + "codespan-reporting", + "hexf-parse", + "indexmap", + "log", + "num-traits", + "rustc-hash", + "spirv", + "termcolor", + "thiserror 1.0.69", + "unicode-xid", +] + +[[package]] +name = "ndk-sys" +version = "0.5.0+25.2.9519653" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c196769dd60fd4f363e11d948139556a344e79d451aeb2fa2fd040738ef7691" +dependencies = [ + "jni-sys 0.3.1", +] + [[package]] name = "ntapi" version = "0.4.3" @@ -301,12 +734,69 @@ dependencies = [ "winapi", ] +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + +[[package]] +name = "objc" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" +dependencies = [ + "malloc_buf", + "objc_exception", +] + +[[package]] +name = "objc_exception" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad970fb455818ad6cba4c122ad012fae53ae8b4795f86378bce65e4f6bab2ca4" +dependencies = [ + "cc", +] + [[package]] name = "once_cell" version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" +[[package]] +name = "parking_lot" +version = "0.12.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-link", +] + +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "pin-project-lite" version = "0.2.17" @@ -325,19 +815,31 @@ version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61" dependencies = [ - "bitflags", + "bitflags 2.11.1", "crc32fast", "fdeflate", "flate2", "miniz_oxide", ] +[[package]] +name = "pollster" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22686f4785f02a4fcc856d3b3bb19bf6c8160d103f7a99cc258bddd0251dc7f2" + [[package]] name = "portable-atomic" version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" +[[package]] +name = "presser" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8cf8e6a8aa66ce33f63993ffc4ea4271eb5b0530a9002db8455ea6050c77bfa" + [[package]] name = "proc-macro2" version = "1.0.106" @@ -347,6 +849,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "profiling" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d595e54a326bc53c1c197b32d295e14b169e3cfeaa8dc82b529f947fba6bcf5" + [[package]] name = "quote" version = "1.0.45" @@ -356,12 +864,45 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "range-alloc" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca45419789ae5a7899559e9512e58ca889e41f04f1f2445e9f4b290ceccd1d08" + +[[package]] +name = "raw-window-handle" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" + +[[package]] +name = "redox_syscall" +version = "0.5.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" +dependencies = [ + "bitflags 2.11.1", +] + +[[package]] +name = "renderdoc-sys" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19b30a45b0cd0bcca8037f3d0dc3421eaf95327a17cad11964fb8179b4fc4832" + [[package]] name = "roxmltree" version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + [[package]] name = "rustversion" version = "1.0.22" @@ -374,7 +915,7 @@ version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fd3c7c96f8a08ee34eff8857b11b49b07d71d1c3f4e88f8a88d4c9e9f90b1702" dependencies = [ - "bitflags", + "bitflags 2.11.1", "bytemuck", "core_maths", "log", @@ -386,6 +927,18 @@ dependencies = [ "unicode-script", ] +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "simd-adler32" version = "0.3.9" @@ -413,12 +966,38 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "spirv" +version = "0.3.0+sdk-1.3.268.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eda41003dc44290527a59b13432d4a0379379fa074b70174882adfbdfd917844" +dependencies = [ + "bitflags 2.11.1", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strict-num" version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6637bab7722d379c8b41ba849228d680cc12d0a45ba1fa2b48f2a30577a06731" +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.117" @@ -440,7 +1019,7 @@ dependencies = [ "libc", "memchr", "ntapi", - "windows", + "windows 0.57.0", ] [[package]] @@ -453,7 +1032,7 @@ dependencies = [ "png", "rustybuzz", "tellur-macros", - "thiserror", + "thiserror 2.0.18", ] [[package]] @@ -470,21 +1049,42 @@ version = "0.1.0" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] name = "tellur-renderer" version = "0.1.0" dependencies = [ + "bytemuck", "bytes", "console", "indicatif", "lru", + "pollster", "sysinfo", "tellur-core", - "thiserror", + "thiserror 2.0.18", "tiny-skia", + "wgpu", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "thiserror" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" +dependencies = [ + "thiserror-impl 1.0.69", ] [[package]] @@ -493,7 +1093,18 @@ version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl", + "thiserror-impl 2.0.18", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.69" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", ] [[package]] @@ -504,7 +1115,7 @@ checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -587,12 +1198,24 @@ version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "383ad40bb927465ec0ce7720e033cb4ca06912855fc35db31b5755d0de75b1ee" +[[package]] +name = "unicode-width" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" + [[package]] name = "unicode-width" version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" +[[package]] +name = "unicode-xid" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" + [[package]] name = "unit-prefix" version = "0.5.2" @@ -618,6 +1241,16 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9473dbd2991ae90b6291c3c32c30c6187ac49aa32f9905d1cce280ec1e110b0f" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.122" @@ -637,7 +1270,7 @@ dependencies = [ "bumpalo", "proc-macro2", "quote", - "syn", + "syn 2.0.117", "wasm-bindgen-shared", ] @@ -650,6 +1283,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "web-sys" +version = "0.3.99" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621441cfc37b84979402712047321980c178f299193a3589d05b99e8763436" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "web-time" version = "1.1.0" @@ -660,6 +1303,119 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "wgpu" +version = "0.19.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cbd7311dbd2abcfebaabf1841a2824ed7c8be443a0f29166e5d3c6a53a762c01" +dependencies = [ + "arrayvec", + "cfg-if", + "cfg_aliases", + "js-sys", + "log", + "naga", + "parking_lot", + "profiling", + "raw-window-handle", + "smallvec", + "static_assertions", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "wgpu-core", + "wgpu-hal", + "wgpu-types", +] + +[[package]] +name = "wgpu-core" +version = "0.19.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28b94525fc99ba9e5c9a9e24764f2bc29bad0911a7446c12f446a8277369bf3a" +dependencies = [ + "arrayvec", + "bit-vec", + "bitflags 2.11.1", + "cfg_aliases", + "codespan-reporting", + "indexmap", + "log", + "naga", + "once_cell", + "parking_lot", + "profiling", + "raw-window-handle", + "rustc-hash", + "smallvec", + "thiserror 1.0.69", + "web-sys", + "wgpu-hal", + "wgpu-types", +] + +[[package]] +name = "wgpu-hal" +version = "0.19.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfabcfc55fd86611a855816326b2d54c3b2fd7972c27ce414291562650552703" +dependencies = [ + "android_system_properties", + "arrayvec", + "ash", + "bit-set", + "bitflags 2.11.1", + "block", + "cfg_aliases", + "core-graphics-types", + "d3d12", + "glow", + "glutin_wgl_sys", + "gpu-alloc", + "gpu-allocator", + "gpu-descriptor", + "hassle-rs", + "js-sys", + "khronos-egl", + "libc", + "libloading 0.8.9", + "log", + "metal", + "naga", + "ndk-sys", + "objc", + "once_cell", + "parking_lot", + "profiling", + "range-alloc", + "raw-window-handle", + "renderdoc-sys", + "rustc-hash", + "smallvec", + "thiserror 1.0.69", + "wasm-bindgen", + "web-sys", + "wgpu-types", + "winapi", +] + +[[package]] +name = "wgpu-types" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b671ff9fb03f78b46ff176494ee1ebe7d603393f42664be55b64dc8d53969805" +dependencies = [ + "bitflags 2.11.1", + "js-sys", + "web-sys", +] + +[[package]] +name = "widestring" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72069c3113ab32ab29e5584db3c6ec55d416895e60715417b5b883a357c3e471" + [[package]] name = "winapi" version = "0.3.9" @@ -676,19 +1432,47 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +[[package]] +name = "winapi-util" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" +dependencies = [ + "windows-sys", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +dependencies = [ + "windows-core 0.52.0", + "windows-targets", +] + [[package]] name = "windows" version = "0.57.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "12342cb4d8e3b046f3d80effd474a7a02447231330ef77d71daa6fbc40681143" dependencies = [ - "windows-core", + "windows-core 0.57.0", + "windows-targets", +] + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ "windows-targets", ] @@ -712,7 +1496,7 @@ checksum = "9107ddc059d5b6fbfbffdfa7a7fe3e22a226def0b2608f72e9d552763d3e1ad7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -723,7 +1507,7 @@ checksum = "29bee4b38ea3cde66011baa44dba677c432a78593e202392d1e9070cf2a7fca7" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.117", ] [[package]] @@ -814,6 +1598,12 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "xml-rs" +version = "0.8.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ae8337f8a065cfc972643663ea4279e04e7256de865aa66fe25cec5fb912d3f" + [[package]] name = "yeslogic-fontconfig-sys" version = "6.0.1" @@ -824,3 +1614,23 @@ dependencies = [ "once_cell", "pkg-config", ] + +[[package]] +name = "zerocopy" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.48" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] diff --git a/flake.nix b/flake.nix index e2c19de..660bc3f 100644 --- a/flake.nix +++ b/flake.nix @@ -35,7 +35,13 @@ pkgs.pkg-config pkgs.mold pkgs.fontconfig + pkgs.vulkan-loader + pkgs.vulkan-tools ]; + + LD_LIBRARY_PATH = pkgs.lib.optionalString pkgs.stdenv.isLinux ( + pkgs.lib.makeLibraryPath [ pkgs.vulkan-loader ] + ); }; } ); diff --git a/tellur-core/src/layer.rs b/tellur-core/src/layer.rs index fa10f0c..bcf4a03 100644 --- a/tellur-core/src/layer.rs +++ b/tellur-core/src/layer.rs @@ -23,7 +23,7 @@ use crate::composite::composite_at; use crate::geometry::{Constraints, Rect, Transform, Vec2}; use crate::placement::Placed; use crate::raster::{PixelFormat, RasterComponent, RasterImage, Resolution}; -use crate::render_context::RenderContext; +use crate::render_context::{CompositeInput, RenderContext}; use crate::vector::{Group, Node, VectorComponent, VectorGraphic}; #[derive(PartialEq, Hash)] @@ -245,12 +245,48 @@ pub(crate) fn composite_children( placed: &[(Vec2, Vec2, &dyn RasterComponent)], ctx: &mut dyn RenderContext, ) -> RasterImage { - let pixel_count = (target.width as usize) * (target.height as usize); - let mut accum = vec![0u8; pixel_count * 4]; - let scale_x = target.width as f32 / paint_rect.size.0; let scale_y = target.height as f32 / paint_rect.size.1; + let gpu_available = ctx.prefers_gpu() && ctx.gpu_backend().is_some(); + + if gpu_available { + let mut rendered = Vec::with_capacity(placed.len()); + for (position, child_size, child) in placed { + let bounds = child.paint_bounds(*child_size); + let child_px_w = (bounds.size.0 * scale_x).round().max(1.0) as u32; + let child_px_h = (bounds.size.1 * scale_y).round().max(1.0) as u32; + let paint_x = position.0 + bounds.origin.0 - paint_rect.origin.0; + let paint_y = position.1 + bounds.origin.1 - paint_rect.origin.1; + let offset_x = (paint_x * scale_x).round() as i32; + let offset_y = (paint_y * scale_y).round() as i32; + let image = ctx.render(*child, *child_size, Resolution::new(child_px_w, child_px_h)); + rendered.push((image, offset_x, offset_y)); + } + + let inputs: Vec> = rendered + .iter() + .map(|(image, offset_x, offset_y)| CompositeInput { + image, + offset_x: *offset_x, + offset_y: *offset_y, + }) + .collect(); + if let Some(gpu) = ctx.gpu_backend() { + if let Some(image) = gpu.composite(target, &inputs) { + return image; + } + } + + let mut accum = vec![0u8; (target.width as usize) * (target.height as usize) * 4]; + for (image, offset_x, offset_y) in rendered { + let image = ctx.readback(image); + composite_at(&mut accum, target, &image, offset_x, offset_y); + } + + return RasterImage::cpu(target.width, target.height, PixelFormat::Rgba8, accum); + } + let mut accum = vec![0u8; (target.width as usize) * (target.height as usize) * 4]; for (position, child_size, child) in placed { let bounds = child.paint_bounds(*child_size); let child_px_w = (bounds.size.0 * scale_x).round().max(1.0) as u32; diff --git a/tellur-core/src/raster.rs b/tellur-core/src/raster.rs index 6977e99..bf372f1 100644 --- a/tellur-core/src/raster.rs +++ b/tellur-core/src/raster.rs @@ -122,6 +122,10 @@ impl GpuSurface { self.handle.as_ref() } + pub fn handle_arc(&self) -> Arc { + Arc::clone(&self.handle) + } + pub fn downcast_handle(&self) -> Option<&T> { self.handle.as_ref().downcast_ref::() } diff --git a/tellur-core/src/render_context.rs b/tellur-core/src/render_context.rs index 2ba6e35..75fbdc8 100644 --- a/tellur-core/src/render_context.rs +++ b/tellur-core/src/render_context.rs @@ -14,6 +14,7 @@ use std::any::Any; +use crate::color::Color; use crate::geometry::Vec2; use crate::raster::{CpuRasterImage, RasterComponent, RasterImage, Resolution}; @@ -24,14 +25,15 @@ use crate::raster::{CpuRasterImage, RasterComponent, RasterImage, Resolution}; /// CPU fallback remains the default behavior. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)] pub enum GpuPreference { - #[default] Disabled, + #[default] + Auto, PreferGpu, } impl GpuPreference { pub const fn prefers_gpu(self) -> bool { - matches!(self, Self::PreferGpu) + matches!(self, Self::Auto | Self::PreferGpu) } } @@ -59,6 +61,15 @@ pub trait RenderContext { self.gpu_preference().prefers_gpu() } + /// Returns a device backend for GPU-capable components. + /// + /// The backend exposes generic raster primitives rather than per-component + /// hooks; `Layer`, `DropShadow`, `Outline`, and future elements decide + /// inside their own `render` implementation whether those primitives apply. + fn gpu_backend(&mut self) -> Option<&mut dyn GpuRasterBackend> { + None + } + /// Renders `component` at the given logical `size` into a /// `target`-sized pixel buffer, possibly returning a cached result /// from a previous identical request. @@ -77,16 +88,67 @@ pub trait RenderContext { fn readback(&mut self, image: RasterImage) -> CpuRasterImage { match image { RasterImage::Cpu(image) => image, - RasterImage::Gpu(surface) => { + image @ RasterImage::Gpu(_) => { + let backend = match &image { + RasterImage::Gpu(surface) => surface.backend(), + RasterImage::Cpu(_) => unreachable!(), + }; + if let Some(gpu) = self.gpu_backend() { + if let Some(image) = gpu.readback(image) { + return image; + } + } panic!( - "render context returned a GPU image for backend '{}' but did not implement readback", - surface.backend() + "render context returned a GPU image for backend '{backend}' but did not implement readback", ) } } } } +pub struct CompositeInput<'a> { + pub image: &'a RasterImage, + pub offset_x: i32, + pub offset_y: i32, +} + +pub struct DropShadowInput<'a> { + pub child: &'a RasterImage, + pub target: Resolution, + pub child_offset_x: i32, + pub child_offset_y: i32, + pub shadow_offset_x: i32, + pub shadow_offset_y: i32, + pub blur_radius: u32, + pub color: Color, +} + +pub struct OutlineInput<'a> { + pub child: &'a RasterImage, + pub target: Resolution, + pub child_offset_x: i32, + pub child_offset_y: i32, + pub outline_offset_x: i32, + pub outline_offset_y: i32, + pub radius_x: u32, + pub radius_y: u32, + pub color: Color, +} + +pub trait GpuRasterBackend { + fn composite( + &mut self, + target: Resolution, + inputs: &[CompositeInput<'_>], + ) -> Option; + + fn drop_shadow(&mut self, input: DropShadowInput<'_>) -> Option; + + fn outline(&mut self, input: OutlineInput<'_>) -> Option; + + fn readback(&mut self, image: RasterImage) -> Option; +} + /// A `RenderContext` that performs no caching. Every call goes straight /// through to the component's `render` method. Useful for tests and any /// caller that wants to opt out of memoization. diff --git a/tellur-live/src/main.rs b/tellur-live/src/main.rs index c64f37a..742eb10 100644 --- a/tellur-live/src/main.rs +++ b/tellur-live/src/main.rs @@ -5,6 +5,7 @@ use std::path::{Path, PathBuf}; use std::time::Duration; use tellur_core::raster::Resolution; +use tellur_core::render_context::GpuPreference; use tellur_live::{serve, AutoBuildOptions, ServerOptions}; fn main() -> Result<(), Box> { @@ -31,6 +32,7 @@ fn parse_args(mut args: impl Iterator) -> Result = None; @@ -75,6 +77,12 @@ fn parse_args(mut args: impl Iterator) -> Result { verbose = true; } + "--gpu" => { + gpu_preference = GpuPreference::PreferGpu; + } + "--no-gpu" => { + gpu_preference = GpuPreference::Disabled; + } "--watch" => { auto_build_requested = true; } @@ -135,6 +143,7 @@ fn parse_args(mut args: impl Iterator) -> Result, path: PathBuf) { } fn usage() -> String { - "usage: tellur-live serve (--plugin | -p --example ) [--host 127.0.0.1] [--port 4317] [--bind 127.0.0.1:4317] [--fps 30] [--verbose] [--watch] [--watch-path ] [--build-manifest ]".to_owned() + "usage: tellur-live serve (--plugin | -p --example ) [--host 127.0.0.1] [--port 4317] [--bind 127.0.0.1:4317] [--fps 30] [--gpu|--no-gpu] [--verbose] [--watch] [--watch-path ] [--build-manifest ]".to_owned() } diff --git a/tellur-live/src/server.rs b/tellur-live/src/server.rs index 958e19f..ca7be63 100644 --- a/tellur-live/src/server.rs +++ b/tellur-live/src/server.rs @@ -12,7 +12,7 @@ use std::thread; use std::time::{Duration, Instant}; use tellur_core::raster::{CpuRasterImage, PixelFormat, Resolution}; -use tellur_core::render_context::RenderContext; +use tellur_core::render_context::{GpuPreference, RenderContext}; use tellur_core::time::TimelineTime; use tellur_renderer::CachingRenderContext; @@ -27,6 +27,7 @@ pub struct ServerOptions { pub bind: String, pub resolution: Resolution, pub fps: u32, + pub gpu_preference: GpuPreference, pub verbose: bool, pub auto_build: Option, } @@ -46,10 +47,8 @@ pub fn serve(options: ServerOptions) -> Result<(), Box> { .unwrap_or_default(), auto_build.example ); - if !options.plugin_path.is_file() { - eprintln!("plugin is missing; running initial release build"); - run_release_build_once(auto_build).map_err(|e| -> Box { e.into() })?; - } + eprintln!("running initial release build"); + run_release_build_once(auto_build).map_err(|e| -> Box { e.into() })?; } let compile_state = options @@ -60,7 +59,7 @@ pub fn serve(options: ServerOptions) -> Result<(), Box> { let app = Arc::new(Mutex::new(PreviewApp { plugin: HotReloadPlugin::new(options.plugin_path), - ctx: CachingRenderContext::new(), + ctx: CachingRenderContext::new().with_gpu_preference(options.gpu_preference), resolution: options.resolution, fps: options.fps, verbose: options.verbose, @@ -458,6 +457,9 @@ impl PreviewApp { cache_hits: after.hits.saturating_sub(before.hits), cache_misses: after.misses.saturating_sub(before.misses), bytes_cached: after.bytes_cached, + gpu_available: after.gpu_available, + gpu_ops: after.gpu.total_ops().saturating_sub(before.gpu.total_ops()), + gpu_readbacks: after.gpu.readbacks.saturating_sub(before.gpu.readbacks), }) } @@ -555,6 +557,9 @@ impl PreviewApp { cache_hits: after.hits.saturating_sub(before.hits), cache_misses: after.misses.saturating_sub(before.misses), bytes_cached: after.bytes_cached, + gpu_available: after.gpu_available, + gpu_ops: after.gpu.total_ops().saturating_sub(before.gpu.total_ops()), + gpu_readbacks: after.gpu.readbacks.saturating_sub(before.gpu.readbacks), }, total_start, }) @@ -579,6 +584,9 @@ struct VideoFrame { cache_hits: u64, cache_misses: u64, bytes_cached: usize, + gpu_available: bool, + gpu_ops: u64, + gpu_readbacks: u64, } fn handle_video_stream( @@ -738,7 +746,7 @@ fn handle_video_stream( let frame = app.render_video_rgba(&setup.timeline_id, seconds, setup.resolution)?; if app.verbose { println!( - "video timeline={} t={:.3}s size={}x{} fps={} gop={} render={:.2}ms bytes={} cache_delta={}h/{}m cache_size={}", + "video timeline={} t={:.3}s size={}x{} fps={} gop={} render={:.2}ms bytes={} cache_delta={}h/{}m cache_size={} gpu_available={} gpu_ops={} gpu_readbacks={}", setup.timeline_id, seconds, setup.resolution.width, @@ -750,6 +758,9 @@ fn handle_video_stream( frame.cache_hits, frame.cache_misses, format_bytes(frame.bytes_cached as u64), + frame.gpu_available, + frame.gpu_ops, + frame.gpu_readbacks, ); } frame.image @@ -801,6 +812,9 @@ struct FrameRenderStats { cache_hits: u64, cache_misses: u64, bytes_cached: usize, + gpu_available: bool, + gpu_ops: u64, + gpu_readbacks: u64, } impl FrameRenderStats { @@ -818,6 +832,10 @@ impl FrameRenderStats { ("X-Tellur-Height", self.resolution.height.to_string()), ("X-Tellur-Cache-Hits", self.cache_hits.to_string()), ("X-Tellur-Cache-Misses", self.cache_misses.to_string()), + ("X-Tellur-GPU-Available", self.gpu_available.to_string()), + ("X-Tellur-GPU-Active", (self.gpu_ops > 0).to_string()), + ("X-Tellur-GPU-Ops", self.gpu_ops.to_string()), + ("X-Tellur-GPU-Readbacks", self.gpu_readbacks.to_string()), ] } } @@ -846,7 +864,7 @@ impl FrameFormat { fn log_frame_stats(stats: &FrameRenderStats) { println!( - "frame timeline={} t={:.3}s size={}x{} format={} render={:.2}ms encode={:.2}ms total={:.2}ms bytes={} cache_delta={}h/{}m cache_size={}", + "frame timeline={} t={:.3}s size={}x{} format={} render={:.2}ms encode={:.2}ms total={:.2}ms bytes={} cache_delta={}h/{}m cache_size={} gpu_available={} gpu_ops={} gpu_readbacks={}", stats.timeline_id, stats.seconds, stats.resolution.width, @@ -859,6 +877,9 @@ fn log_frame_stats(stats: &FrameRenderStats) { stats.cache_hits, stats.cache_misses, format_bytes(stats.bytes_cached as u64), + stats.gpu_available, + stats.gpu_ops, + stats.gpu_readbacks, ); } diff --git a/tellur-renderer/Cargo.toml b/tellur-renderer/Cargo.toml index 8c265de..de0a266 100644 --- a/tellur-renderer/Cargo.toml +++ b/tellur-renderer/Cargo.toml @@ -4,11 +4,14 @@ version = "0.1.0" edition = "2021" [dependencies] +bytemuck = "1.25.0" bytes = "1.11.1" console = "0.16.3" indicatif = "0.18.4" lru = "0.12" +pollster = "0.3" sysinfo = { version = "0.32", default-features = false, features = ["system"] } tellur-core = { path = "../tellur-core" } thiserror = "2.0.18" tiny-skia = "0.12.0" +wgpu = "0.19" diff --git a/tellur-renderer/src/gpu.rs b/tellur-renderer/src/gpu.rs new file mode 100644 index 0000000..6b27145 --- /dev/null +++ b/tellur-renderer/src/gpu.rs @@ -0,0 +1,1051 @@ +use std::borrow::Cow; +use std::sync::Arc; + +use tellur_core::color::Color; +use tellur_core::raster::{CpuRasterImage, GpuSurface, PixelFormat, RasterImage, Resolution}; +use tellur_core::render_context::{ + CompositeInput, DropShadowInput, GpuRasterBackend, OutlineInput, +}; +use wgpu::util::DeviceExt; + +const BACKEND: &str = "tellur-wgpu-buffer-v1"; +const WORKGROUP: u32 = 16; + +pub struct GpuRenderer { + device: wgpu::Device, + queue: wgpu::Queue, + composite_pipeline: wgpu::ComputePipeline, + copy_alpha_pipeline: wgpu::ComputePipeline, + blur_pipeline: wgpu::ComputePipeline, + shadow_pipeline: wgpu::ComputePipeline, + outline_pipeline: wgpu::ComputePipeline, + stats: GpuRenderStats, +} + +#[derive(Debug, Clone, Copy, Default)] +pub struct GpuRenderStats { + pub composites: u64, + pub drop_shadows: u64, + pub outlines: u64, + pub readbacks: u64, +} + +impl GpuRenderStats { + pub fn total_ops(self) -> u64 { + self.composites + self.drop_shadows + self.outlines + } +} + +struct GpuBufferImage { + width: u32, + height: u32, + format: PixelFormat, + buffer: wgpu::Buffer, +} + +#[repr(C)] +#[derive(Clone, Copy)] +struct CompositeParams { + dst_w: u32, + dst_h: u32, + src_w: u32, + src_h: u32, + offset_x: i32, + offset_y: i32, + _pad0: u32, + _pad1: u32, +} + +unsafe impl bytemuck::Zeroable for CompositeParams {} +unsafe impl bytemuck::Pod for CompositeParams {} + +#[repr(C)] +#[derive(Clone, Copy)] +struct CopyAlphaParams { + src_w: u32, + src_h: u32, + out_w: u32, + out_h: u32, + pad_x: u32, + pad_y: u32, + _pad0: u32, + _pad1: u32, +} + +unsafe impl bytemuck::Zeroable for CopyAlphaParams {} +unsafe impl bytemuck::Pod for CopyAlphaParams {} + +#[repr(C)] +#[derive(Clone, Copy)] +struct BlurParams { + width: u32, + height: u32, + radius: u32, + horizontal: u32, +} + +unsafe impl bytemuck::Zeroable for BlurParams {} +unsafe impl bytemuck::Pod for BlurParams {} + +#[repr(C)] +#[derive(Clone, Copy)] +struct ColorCompositeParams { + dst_w: u32, + dst_h: u32, + src_w: u32, + src_h: u32, + offset_x: i32, + offset_y: i32, + r: u32, + g: u32, + b: u32, + a: u32, + radius_x: u32, + radius_y: u32, +} + +unsafe impl bytemuck::Zeroable for ColorCompositeParams {} +unsafe impl bytemuck::Pod for ColorCompositeParams {} + +impl GpuRenderer { + pub fn new() -> Result { + pollster::block_on(Self::new_async()) + } + + async fn new_async() -> Result { + let instance = wgpu::Instance::default(); + let adapter = instance + .request_adapter(&wgpu::RequestAdapterOptions { + power_preference: wgpu::PowerPreference::HighPerformance, + compatible_surface: None, + force_fallback_adapter: false, + }) + .await + .ok_or("no GPU adapter available")?; + let (device, queue) = adapter + .request_device( + &wgpu::DeviceDescriptor { + label: Some("tellur-gpu-device"), + required_features: wgpu::Features::empty(), + required_limits: wgpu::Limits::downlevel_defaults(), + }, + None, + ) + .await + .map_err(|e| format!("failed to create GPU device: {e}"))?; + + Ok(Self { + composite_pipeline: compute_pipeline( + &device, + "tellur-composite", + &format!("{COMMON_WGSL}{COMPOSITE_SHADER}"), + ), + copy_alpha_pipeline: compute_pipeline(&device, "tellur-copy-alpha", COPY_ALPHA_SHADER), + blur_pipeline: compute_pipeline(&device, "tellur-box-blur", BLUR_SHADER), + shadow_pipeline: compute_pipeline( + &device, + "tellur-shadow-composite", + &format!("{COMMON_WGSL}{SHADOW_SHADER}"), + ), + outline_pipeline: compute_pipeline( + &device, + "tellur-outline-composite", + &format!("{COMMON_WGSL}{OUTLINE_SHADER}"), + ), + device, + queue, + stats: GpuRenderStats::default(), + }) + } + + pub fn stats(&self) -> GpuRenderStats { + self.stats + } + + fn upload(&self, image: &CpuRasterImage) -> Option> { + if image.format != PixelFormat::Rgba8 { + return None; + } + let buffer = self.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("tellur-gpu-upload"), + size: image.pixels.len() as u64, + usage: wgpu::BufferUsages::STORAGE + | wgpu::BufferUsages::COPY_DST + | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + self.queue.write_buffer(&buffer, 0, &image.pixels); + Some(Arc::new(GpuBufferImage { + width: image.width, + height: image.height, + format: image.format, + buffer, + })) + } + + fn image_ref(&self, image: &RasterImage) -> Option> { + match image { + RasterImage::Cpu(image) => self.upload(image), + RasterImage::Gpu(surface) if surface.backend() == BACKEND => { + Arc::downcast::(surface.handle_arc()).ok() + } + RasterImage::Gpu(_) => None, + } + } + + fn raster_image(&self, image: Arc) -> RasterImage { + RasterImage::Gpu(GpuSurface::new( + image.width, + image.height, + image.format, + BACKEND, + image, + )) + } + + fn empty_image(&self, resolution: Resolution) -> Arc { + let len = (resolution.width as usize) * (resolution.height as usize) * 4; + let buffer = self.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("tellur-gpu-target"), + size: len as u64, + usage: wgpu::BufferUsages::STORAGE + | wgpu::BufferUsages::COPY_DST + | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + self.queue.write_buffer(&buffer, 0, &vec![0u8; len]); + Arc::new(GpuBufferImage { + width: resolution.width, + height: resolution.height, + format: PixelFormat::Rgba8, + buffer, + }) + } + + fn alpha_image(&self, width: u32, height: u32) -> GpuBufferImage { + let len = (width as usize) * (height as usize) * 4; + let buffer = self.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("tellur-gpu-alpha"), + size: len as u64, + usage: wgpu::BufferUsages::STORAGE + | wgpu::BufferUsages::COPY_DST + | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + GpuBufferImage { + width, + height, + format: PixelFormat::Rgba8, + buffer, + } + } + + fn composite_one( + &self, + encoder: &mut wgpu::CommandEncoder, + dst: &GpuBufferImage, + src: &GpuBufferImage, + offset_x: i32, + offset_y: i32, + ) { + let params = CompositeParams { + dst_w: dst.width, + dst_h: dst.height, + src_w: src.width, + src_h: src.height, + offset_x, + offset_y, + _pad0: 0, + _pad1: 0, + }; + dispatch_three_buffer( + &self.device, + encoder, + &self.composite_pipeline, + &dst.buffer, + &src.buffer, + ¶ms, + src.width, + src.height, + ); + } + + fn copy_alpha( + &self, + encoder: &mut wgpu::CommandEncoder, + src: &GpuBufferImage, + alpha: &GpuBufferImage, + pad_x: u32, + pad_y: u32, + ) { + let params = CopyAlphaParams { + src_w: src.width, + src_h: src.height, + out_w: alpha.width, + out_h: alpha.height, + pad_x, + pad_y, + _pad0: 0, + _pad1: 0, + }; + dispatch_three_buffer( + &self.device, + encoder, + &self.copy_alpha_pipeline, + &src.buffer, + &alpha.buffer, + ¶ms, + alpha.width, + alpha.height, + ); + } + + fn blur_alpha( + &self, + encoder: &mut wgpu::CommandEncoder, + a: &GpuBufferImage, + b: &GpuBufferImage, + radius: u32, + ) { + if radius == 0 { + return; + } + for _ in 0..3 { + self.blur_pass(encoder, a, b, radius, true); + self.blur_pass(encoder, b, a, radius, false); + } + } + + fn blur_pass( + &self, + encoder: &mut wgpu::CommandEncoder, + src: &GpuBufferImage, + dst: &GpuBufferImage, + radius: u32, + horizontal: bool, + ) { + let params = BlurParams { + width: src.width, + height: src.height, + radius, + horizontal: u32::from(horizontal), + }; + dispatch_three_buffer( + &self.device, + encoder, + &self.blur_pipeline, + &src.buffer, + &dst.buffer, + ¶ms, + src.width, + src.height, + ); + } + + fn composite_shadow_alpha( + &self, + encoder: &mut wgpu::CommandEncoder, + dst: &GpuBufferImage, + alpha: &GpuBufferImage, + offset_x: i32, + offset_y: i32, + color: Color, + ) { + let [r, g, b, a] = color_u8(color); + let params = ColorCompositeParams { + dst_w: dst.width, + dst_h: dst.height, + src_w: alpha.width, + src_h: alpha.height, + offset_x, + offset_y, + r, + g, + b, + a, + radius_x: 0, + radius_y: 0, + }; + dispatch_three_buffer( + &self.device, + encoder, + &self.shadow_pipeline, + &dst.buffer, + &alpha.buffer, + ¶ms, + alpha.width, + alpha.height, + ); + } + + fn composite_outline_alpha( + &self, + encoder: &mut wgpu::CommandEncoder, + dst: &GpuBufferImage, + alpha: &GpuBufferImage, + offset_x: i32, + offset_y: i32, + radius_x: u32, + radius_y: u32, + color: Color, + ) { + let [r, g, b, a] = color_u8(color); + let params = ColorCompositeParams { + dst_w: dst.width, + dst_h: dst.height, + src_w: alpha.width, + src_h: alpha.height, + offset_x, + offset_y, + r, + g, + b, + a, + radius_x, + radius_y, + }; + dispatch_three_buffer( + &self.device, + encoder, + &self.outline_pipeline, + &dst.buffer, + &alpha.buffer, + ¶ms, + alpha.width, + alpha.height, + ); + } +} + +impl GpuRasterBackend for GpuRenderer { + fn composite( + &mut self, + target: Resolution, + inputs: &[CompositeInput<'_>], + ) -> Option { + let target_image = self.empty_image(target); + let mut encoder = self + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("tellur-gpu-composite"), + }); + + for input in inputs { + let src = self.image_ref(input.image)?; + if src.format != PixelFormat::Rgba8 { + return None; + } + self.composite_one( + &mut encoder, + &target_image, + &src, + input.offset_x, + input.offset_y, + ); + } + + self.queue.submit(Some(encoder.finish())); + self.stats.composites = self.stats.composites.saturating_add(1); + Some(self.raster_image(target_image)) + } + + fn drop_shadow(&mut self, input: DropShadowInput<'_>) -> Option { + let child = self.image_ref(input.child)?; + if child.format != PixelFormat::Rgba8 { + return None; + } + let pad = input.blur_radius.saturating_mul(3); + let shadow_w = child.width.checked_add(pad.checked_mul(2)?)?; + let shadow_h = child.height.checked_add(pad.checked_mul(2)?)?; + let alpha_a = self.alpha_image(shadow_w, shadow_h); + let alpha_b = self.alpha_image(shadow_w, shadow_h); + let target = self.empty_image(input.target); + + let mut encoder = self + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("tellur-gpu-drop-shadow"), + }); + self.copy_alpha(&mut encoder, &child, &alpha_a, pad, pad); + self.blur_alpha(&mut encoder, &alpha_a, &alpha_b, input.blur_radius); + self.composite_shadow_alpha( + &mut encoder, + &target, + &alpha_a, + input.shadow_offset_x, + input.shadow_offset_y, + input.color, + ); + self.composite_one( + &mut encoder, + &target, + &child, + input.child_offset_x, + input.child_offset_y, + ); + + self.queue.submit(Some(encoder.finish())); + self.stats.drop_shadows = self.stats.drop_shadows.saturating_add(1); + Some(self.raster_image(target)) + } + + fn outline(&mut self, input: OutlineInput<'_>) -> Option { + let child = self.image_ref(input.child)?; + if child.format != PixelFormat::Rgba8 { + return None; + } + let outline_w = child.width.checked_add(input.radius_x.checked_mul(2)?)?; + let outline_h = child.height.checked_add(input.radius_y.checked_mul(2)?)?; + let alpha = self.alpha_image(outline_w, outline_h); + let target = self.empty_image(input.target); + + let mut encoder = self + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("tellur-gpu-outline"), + }); + self.copy_alpha(&mut encoder, &child, &alpha, input.radius_x, input.radius_y); + self.composite_outline_alpha( + &mut encoder, + &target, + &alpha, + input.outline_offset_x, + input.outline_offset_y, + input.radius_x, + input.radius_y, + input.color, + ); + self.composite_one( + &mut encoder, + &target, + &child, + input.child_offset_x, + input.child_offset_y, + ); + + self.queue.submit(Some(encoder.finish())); + self.stats.outlines = self.stats.outlines.saturating_add(1); + Some(self.raster_image(target)) + } + + fn readback(&mut self, image: RasterImage) -> Option { + match image { + RasterImage::Cpu(image) => Some(image), + RasterImage::Gpu(surface) if surface.backend() == BACKEND => { + let image = Arc::downcast::(surface.handle_arc()).ok()?; + let byte_len = (image.width as usize) * (image.height as usize) * 4; + let staging = self.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("tellur-gpu-readback"), + size: byte_len as u64, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + let mut encoder = + self.device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("tellur-gpu-readback"), + }); + encoder.copy_buffer_to_buffer(&image.buffer, 0, &staging, 0, byte_len as u64); + self.queue.submit(Some(encoder.finish())); + + let slice = staging.slice(..); + let (tx, rx) = std::sync::mpsc::channel(); + slice.map_async(wgpu::MapMode::Read, move |result| { + let _ = tx.send(result); + }); + self.device.poll(wgpu::Maintain::Wait); + rx.recv().ok()?.ok()?; + + let data = { + let mapped = slice.get_mapped_range(); + mapped.to_vec() + }; + staging.unmap(); + self.stats.readbacks = self.stats.readbacks.saturating_add(1); + Some(CpuRasterImage::new( + image.width, + image.height, + image.format, + data, + )) + } + RasterImage::Gpu(_) => None, + } + } +} + +fn compute_pipeline( + device: &wgpu::Device, + label: &'static str, + source: &str, +) -> wgpu::ComputePipeline { + let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor { + label: Some(label), + source: wgpu::ShaderSource::Wgsl(Cow::Owned(source.to_owned())), + }); + device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor { + label: Some(label), + layout: None, + module: &shader, + entry_point: "main", + }) +} + +fn dispatch_three_buffer( + device: &wgpu::Device, + encoder: &mut wgpu::CommandEncoder, + pipeline: &wgpu::ComputePipeline, + a: &wgpu::Buffer, + b: &wgpu::Buffer, + params: &P, + width: u32, + height: u32, +) { + let params = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("tellur-gpu-params"), + contents: bytemuck::bytes_of(params), + usage: wgpu::BufferUsages::STORAGE, + }); + let layout = pipeline.get_bind_group_layout(0); + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("tellur-gpu-bind-group"), + layout: &layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: a.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: b.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 2, + resource: params.as_entire_binding(), + }, + ], + }); + + let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("tellur-gpu-pass"), + timestamp_writes: None, + }); + pass.set_pipeline(pipeline); + pass.set_bind_group(0, &bind_group, &[]); + pass.dispatch_workgroups(div_ceil(width, WORKGROUP), div_ceil(height, WORKGROUP), 1); +} + +fn div_ceil(n: u32, d: u32) -> u32 { + if n == 0 { + 0 + } else { + 1 + (n - 1) / d + } +} + +fn color_u8(color: Color) -> [u32; 4] { + [ + (color.r * 255.0).round().clamp(0.0, 255.0) as u32, + (color.g * 255.0).round().clamp(0.0, 255.0) as u32, + (color.b * 255.0).round().clamp(0.0, 255.0) as u32, + (color.a * 255.0).round().clamp(0.0, 255.0) as u32, + ] +} + +const COMMON_WGSL: &str = r#" +fn unpack_rgba(px: u32) -> vec4 { + return vec4( + px & 255u, + (px >> 8u) & 255u, + (px >> 16u) & 255u, + (px >> 24u) & 255u, + ); +} + +fn pack_rgba(c: vec4) -> u32 { + return (c.x & 255u) | ((c.y & 255u) << 8u) | ((c.z & 255u) << 16u) | ((c.w & 255u) << 24u); +} + +fn blend_over(dst_px: u32, src_px: u32) -> u32 { + let s = unpack_rgba(src_px); + let sa = s.w; + if (sa == 0u) { + return dst_px; + } + if (sa == 255u) { + return src_px; + } + + let d = unpack_rgba(dst_px); + let inv_sa = 255u - sa; + let out_a_x255 = sa * 255u + d.w * inv_sa; + let half = out_a_x255 / 2u; + let out_r = (s.x * sa * 255u + d.x * d.w * inv_sa + half) / out_a_x255; + let out_g = (s.y * sa * 255u + d.y * d.w * inv_sa + half) / out_a_x255; + let out_b = (s.z * sa * 255u + d.z * d.w * inv_sa + half) / out_a_x255; + let out_a = (out_a_x255 + 127u) / 255u; + return pack_rgba(vec4(out_r, out_g, out_b, out_a)); +} +"#; + +const COMPOSITE_SHADER: &str = r#" +struct Params { + dst_w: u32, + dst_h: u32, + src_w: u32, + src_h: u32, + offset_x: i32, + offset_y: i32, + pad0: u32, + pad1: u32, +} + +@group(0) @binding(0) var dst: array; +@group(0) @binding(1) var src: array; +@group(0) @binding(2) var params: Params; + +@compute @workgroup_size(16, 16) +fn main(@builtin(global_invocation_id) id: vec3) { + let x = id.x; + let y = id.y; + if (x >= params.src_w || y >= params.src_h) { + return; + } + let dx = i32(x) + params.offset_x; + let dy = i32(y) + params.offset_y; + if (dx < 0 || dy < 0 || dx >= i32(params.dst_w) || dy >= i32(params.dst_h)) { + return; + } + let sidx = y * params.src_w + x; + let didx = u32(dy) * params.dst_w + u32(dx); + dst[didx] = blend_over(dst[didx], src[sidx]); +} +"#; + +const COPY_ALPHA_SHADER: &str = r#" +struct Params { + src_w: u32, + src_h: u32, + out_w: u32, + out_h: u32, + pad_x: u32, + pad_y: u32, + pad0: u32, + pad1: u32, +} + +@group(0) @binding(0) var src: array; +@group(0) @binding(1) var alpha: array; +@group(0) @binding(2) var params: Params; + +@compute @workgroup_size(16, 16) +fn main(@builtin(global_invocation_id) id: vec3) { + let x = id.x; + let y = id.y; + if (x >= params.out_w || y >= params.out_h) { + return; + } + let out_idx = y * params.out_w + x; + if (x < params.pad_x || y < params.pad_y) { + alpha[out_idx] = 0u; + return; + } + let sx = x - params.pad_x; + let sy = y - params.pad_y; + if (sx >= params.src_w || sy >= params.src_h) { + alpha[out_idx] = 0u; + return; + } + let px = src[sy * params.src_w + sx]; + alpha[out_idx] = (px >> 24u) & 255u; +} +"#; + +const BLUR_SHADER: &str = r#" +struct Params { + width: u32, + height: u32, + radius: u32, + horizontal: u32, +} + +@group(0) @binding(0) var src: array; +@group(0) @binding(1) var dst: array; +@group(0) @binding(2) var params: Params; + +@compute @workgroup_size(16, 16) +fn main(@builtin(global_invocation_id) id: vec3) { + let x = id.x; + let y = id.y; + if (x >= params.width || y >= params.height) { + return; + } + + var sum = 0u; + var count = 0u; + if (params.horizontal != 0u) { + let start = select(0u, x - params.radius, x >= params.radius); + let end = min(params.width - 1u, x + params.radius); + var ix = start; + loop { + sum = sum + src[y * params.width + ix]; + count = count + 1u; + if (ix >= end) { + break; + } + ix = ix + 1u; + } + } else { + let start = select(0u, y - params.radius, y >= params.radius); + let end = min(params.height - 1u, y + params.radius); + var iy = start; + loop { + sum = sum + src[iy * params.width + x]; + count = count + 1u; + if (iy >= end) { + break; + } + iy = iy + 1u; + } + } + dst[y * params.width + x] = sum / count; +} +"#; + +const SHADOW_SHADER: &str = r#" +struct Params { + dst_w: u32, + dst_h: u32, + src_w: u32, + src_h: u32, + offset_x: i32, + offset_y: i32, + r: u32, + g: u32, + b: u32, + a: u32, + radius_x: u32, + radius_y: u32, +} + +@group(0) @binding(0) var dst: array; +@group(0) @binding(1) var alpha: array; +@group(0) @binding(2) var params: Params; + +@compute @workgroup_size(16, 16) +fn main(@builtin(global_invocation_id) id: vec3) { + let x = id.x; + let y = id.y; + if (x >= params.src_w || y >= params.src_h) { + return; + } + let dx = i32(x) + params.offset_x; + let dy = i32(y) + params.offset_y; + if (dx < 0 || dy < 0 || dx >= i32(params.dst_w) || dy >= i32(params.dst_h)) { + return; + } + let a = (alpha[y * params.src_w + x] * params.a + 127u) / 255u; + if (a == 0u) { + return; + } + let src = pack_rgba(vec4(params.r, params.g, params.b, a)); + let didx = u32(dy) * params.dst_w + u32(dx); + dst[didx] = blend_over(dst[didx], src); +} +"#; + +const OUTLINE_SHADER: &str = r#" +struct Params { + dst_w: u32, + dst_h: u32, + src_w: u32, + src_h: u32, + offset_x: i32, + offset_y: i32, + r: u32, + g: u32, + b: u32, + a: u32, + radius_x: u32, + radius_y: u32, +} + +@group(0) @binding(0) var dst: array; +@group(0) @binding(1) var alpha: array; +@group(0) @binding(2) var params: Params; + +@compute @workgroup_size(16, 16) +fn main(@builtin(global_invocation_id) id: vec3) { + let x = id.x; + let y = id.y; + if (x >= params.src_w || y >= params.src_h) { + return; + } + + let rx = i32(params.radius_x); + let ry = i32(params.radius_y); + let rx2 = max(rx * rx, 1); + let ry2 = max(ry * ry, 1); + let limit = rx2 * ry2; + var m = 0u; + var oy = -ry; + loop { + var ox = -rx; + loop { + if (ox * ox * ry2 + oy * oy * rx2 <= limit) { + let sx = i32(x) + ox; + let sy = i32(y) + oy; + if (sx >= 0 && sy >= 0 && sx < i32(params.src_w) && sy < i32(params.src_h)) { + m = max(m, alpha[u32(sy) * params.src_w + u32(sx)]); + } + } + if (ox >= rx) { + break; + } + ox = ox + 1; + } + if (oy >= ry) { + break; + } + oy = oy + 1; + } + + let orig = alpha[y * params.src_w + x]; + let ring = select(0u, m - orig, m > orig); + let a = (ring * params.a + 127u) / 255u; + if (a == 0u) { + return; + } + let dx = i32(x) + params.offset_x; + let dy = i32(y) + params.offset_y; + if (dx < 0 || dy < 0 || dx >= i32(params.dst_w) || dy >= i32(params.dst_h)) { + return; + } + let src = pack_rgba(vec4(params.r, params.g, params.b, a)); + let didx = u32(dy) * params.dst_w + u32(dx); + dst[didx] = blend_over(dst[didx], src); +} +"#; + +#[cfg(test)] +mod tests { + use super::*; + use tellur_core::composite::composite_at; + use tellur_core::render_context::{ + CompositeInput, DropShadowInput, GpuRasterBackend, OutlineInput, + }; + + fn gpu_or_skip() -> Option { + match GpuRenderer::new() { + Ok(gpu) => Some(gpu), + Err(err) => { + eprintln!("skipping GPU smoke test: {err}"); + None + } + } + } + + fn image(width: u32, height: u32, pixels: &[u8]) -> RasterImage { + RasterImage::cpu(width, height, PixelFormat::Rgba8, pixels.to_vec()) + } + + fn readback(gpu: &mut GpuRenderer, image: RasterImage) -> CpuRasterImage { + GpuRasterBackend::readback(gpu, image).expect("GPU image should read back") + } + + #[test] + #[ignore = "requires a GPU adapter"] + fn composite_matches_cpu_blend() { + let Some(mut gpu) = gpu_or_skip() else { + return; + }; + let src = image( + 2, + 1, + &[ + 100, 0, 0, 128, // + 0, 255, 0, 255, + ], + ); + let target = Resolution::new(3, 2); + let input = CompositeInput { + image: &src, + offset_x: 1, + offset_y: 1, + }; + + let rendered = GpuRasterBackend::composite(&mut gpu, target, &[input]).unwrap(); + let rendered = readback(&mut gpu, rendered); + + let mut expected = vec![0u8; 3 * 2 * 4]; + let src = match src { + RasterImage::Cpu(src) => src, + RasterImage::Gpu(_) => unreachable!(), + }; + composite_at(&mut expected, target, &src, 1, 1); + assert_eq!(rendered.pixels.as_ref(), expected.as_slice()); + } + + #[test] + #[ignore = "requires a GPU adapter"] + fn drop_shadow_composites_shadow_then_child() { + let Some(mut gpu) = gpu_or_skip() else { + return; + }; + let child = image(1, 1, &[200, 10, 20, 255]); + let input = DropShadowInput { + child: &child, + target: Resolution::new(3, 1), + child_offset_x: 1, + child_offset_y: 0, + shadow_offset_x: 0, + shadow_offset_y: 0, + blur_radius: 0, + color: Color::rgba_u8(1, 2, 3, 128), + }; + + let rendered = GpuRasterBackend::drop_shadow(&mut gpu, input).unwrap(); + let rendered = readback(&mut gpu, rendered); + + assert_eq!( + rendered.pixels.as_ref(), + &[ + 1, 2, 3, 128, // + 200, 10, 20, 255, // + 0, 0, 0, 0, + ] + ); + } + + #[test] + #[ignore = "requires a GPU adapter"] + fn outline_dilates_child_alpha() { + let Some(mut gpu) = gpu_or_skip() else { + return; + }; + let child = image(1, 1, &[9, 8, 7, 255]); + let input = OutlineInput { + child: &child, + target: Resolution::new(3, 3), + child_offset_x: 1, + child_offset_y: 1, + outline_offset_x: 0, + outline_offset_y: 0, + radius_x: 1, + radius_y: 1, + color: Color::rgba_u8(1, 2, 3, 255), + }; + + let rendered = GpuRasterBackend::outline(&mut gpu, input).unwrap(); + let rendered = readback(&mut gpu, rendered); + + assert_eq!( + rendered.pixels.as_ref(), + &[ + 0, 0, 0, 0, 1, 2, 3, 255, 0, 0, 0, 0, // + 1, 2, 3, 255, 9, 8, 7, 255, 1, 2, 3, 255, // + 0, 0, 0, 0, 1, 2, 3, 255, 0, 0, 0, 0, + ] + ); + } +} diff --git a/tellur-renderer/src/lib.rs b/tellur-renderer/src/lib.rs index d875abf..1c312c0 100644 --- a/tellur-renderer/src/lib.rs +++ b/tellur-renderer/src/lib.rs @@ -1,3 +1,4 @@ +pub mod gpu; pub mod outline; pub mod rasterize; pub mod render_context; diff --git a/tellur-renderer/src/outline.rs b/tellur-renderer/src/outline.rs index 86f8afa..d6b024d 100644 --- a/tellur-renderer/src/outline.rs +++ b/tellur-renderer/src/outline.rs @@ -15,7 +15,7 @@ use tellur_core::composite::composite_at; use tellur_core::dyn_compare::hash_f32; use tellur_core::geometry::{Constraints, Rect, Vec2}; use tellur_core::raster::{CpuRasterImage, PixelFormat, RasterComponent, RasterImage, Resolution}; -use tellur_core::render_context::RenderContext; +use tellur_core::render_context::{OutlineInput, RenderContext}; pub struct Outline { /// Stroke width on the outside of the child, in logical units. @@ -63,6 +63,7 @@ impl RasterComponent for Outline { } let sx = target.width as f32 / paint.size.0; let sy = target.height as f32 / paint.size.1; + let gpu_available = ctx.prefers_gpu() && ctx.gpu_backend().is_some(); // Render the child through the context so its output is memoized // independently of the outline — matches the shadow component's @@ -74,7 +75,6 @@ impl RasterComponent for Outline { size, Resolution::new(child_px_w, child_px_h), ); - let child_image = ctx.readback(child_image); // Dilate the child alpha by `width` logical units and subtract // the original alpha so only the ring outside the child @@ -85,9 +85,6 @@ impl RasterComponent for Outline { // past the buffer edge and get clipped. let width_px_x = (self.width.max(0.0) * sx).round() as u32; let width_px_y = (self.width.max(0.0) * sy).round() as u32; - let outline_image = make_outline(&child_image, width_px_x, width_px_y, self.color); - - let mut accum = vec![0u8; (target.width as usize) * (target.height as usize) * 4]; let pad_lu_x = width_px_x as f32 / sx; let pad_lu_y = width_px_y as f32 / sy; @@ -95,6 +92,35 @@ impl RasterComponent for Outline { let outline_local_y = (child_paint.origin.1 - pad_lu_y) - paint.origin.1; let outline_px_x = (outline_local_x * sx).round() as i32; let outline_px_y = (outline_local_y * sy).round() as i32; + let child_local_x = child_paint.origin.0 - paint.origin.0; + let child_local_y = child_paint.origin.1 - paint.origin.1; + let child_px_x = (child_local_x * sx).round() as i32; + let child_px_y = (child_local_y * sy).round() as i32; + + if gpu_available { + let input = OutlineInput { + child: &child_image, + target, + child_offset_x: child_px_x, + child_offset_y: child_px_y, + outline_offset_x: outline_px_x, + outline_offset_y: outline_px_y, + radius_x: width_px_x, + radius_y: width_px_y, + color: self.color, + }; + if let Some(gpu) = ctx.gpu_backend() { + if let Some(image) = gpu.outline(input) { + return image; + } + } + } + + let child_image = ctx.readback(child_image); + let outline_image = make_outline(&child_image, width_px_x, width_px_y, self.color); + + let mut accum = vec![0u8; (target.width as usize) * (target.height as usize) * 4]; + composite_at( &mut accum, target, @@ -103,10 +129,6 @@ impl RasterComponent for Outline { outline_px_y, ); - let child_local_x = child_paint.origin.0 - paint.origin.0; - let child_local_y = child_paint.origin.1 - paint.origin.1; - let child_px_x = (child_local_x * sx).round() as i32; - let child_px_y = (child_local_y * sy).round() as i32; composite_at(&mut accum, target, &child_image, child_px_x, child_px_y); RasterImage::cpu(target.width, target.height, PixelFormat::Rgba8, accum) diff --git a/tellur-renderer/src/render_context.rs b/tellur-renderer/src/render_context.rs index 3aec951..c54b2b9 100644 --- a/tellur-renderer/src/render_context.rs +++ b/tellur-renderer/src/render_context.rs @@ -24,7 +24,9 @@ use sysinfo::System; use tellur_core::dyn_compare::DynEq; use tellur_core::geometry::Vec2; use tellur_core::raster::{PixelFormat, RasterComponent, RasterImage, Resolution}; -use tellur_core::render_context::{GpuPreference, RenderContext}; +use tellur_core::render_context::{GpuPreference, GpuRasterBackend, RenderContext}; + +use crate::gpu::{GpuRenderStats, GpuRenderer}; /// Default cache size in bytes (1 GiB) when constructed with /// [`CachingRenderContext::new`]. @@ -125,6 +127,14 @@ pub struct CacheMetrics { /// Misses where the freshly-produced image was not admitted /// because a single image exceeded the configured cap. pub oversize_skips: u64, + /// Current GPU policy for this context. + pub gpu_preference: GpuPreference, + /// Whether the context has tried to create a GPU backend. + pub gpu_init_attempted: bool, + /// Whether a GPU backend is currently active. + pub gpu_available: bool, + /// GPU operation counters accumulated by the active backend. + pub gpu: GpuRenderStats, /// Breakdown by the concrete `RasterComponent` type that was queried, /// keyed by display name (`std::any::type_name`). pub per_type: HashMap<&'static str, TypeStats>, @@ -160,6 +170,18 @@ impl fmt::Display for CacheMetrics { self.pressure_skips, self.oversize_skips, )?; + writeln!( + f, + "GPU preference={:?}, attempted={}, available={}, ops={} (composite {}, shadow {}, outline {}, readback {})", + self.gpu_preference, + self.gpu_init_attempted, + self.gpu_available, + self.gpu.total_ops(), + self.gpu.composites, + self.gpu.drop_shadows, + self.gpu.outlines, + self.gpu.readbacks, + )?; if !self.per_type.is_empty() { writeln!(f, "Cache by type (sorted by self_time, descending):")?; // Sort by self_time so the type that's actually burning @@ -240,6 +262,8 @@ pub struct CachingRenderContext { oversize_skips: u64, per_type: HashMap, gpu_preference: GpuPreference, + gpu: Option, + gpu_init_attempted: bool, // Running total of every `ctx.render` call's inclusive duration. // A `render` invocation snapshots this on entry and re-reads it on // exit to derive how much time was spent inside nested child @@ -267,7 +291,9 @@ impl CachingRenderContext { pressure_skips: 0, oversize_skips: 0, per_type: HashMap::new(), - gpu_preference: GpuPreference::Disabled, + gpu_preference: GpuPreference::Auto, + gpu: None, + gpu_init_attempted: false, total_render_time: Duration::ZERO, } } @@ -281,6 +307,18 @@ impl CachingRenderContext { self.gpu_preference = gpu_preference; } + fn gpu_backend_mut(&mut self) -> Option<&mut GpuRenderer> { + if self.gpu.is_some() { + return self.gpu.as_mut(); + } + if !self.gpu_preference.prefers_gpu() || self.gpu_init_attempted { + return None; + } + self.gpu_init_attempted = true; + self.gpu = GpuRenderer::new().ok(); + self.gpu.as_mut() + } + /// Current memory footprint of cached images, in bytes. pub fn current_bytes(&self) -> usize { self.cur_bytes @@ -308,6 +346,14 @@ impl CachingRenderContext { bytes_evicted: self.bytes_evicted, pressure_skips: self.pressure_skips, oversize_skips: self.oversize_skips, + gpu_preference: self.gpu_preference, + gpu_init_attempted: self.gpu_init_attempted, + gpu_available: self.gpu.is_some(), + gpu: self + .gpu + .as_ref() + .map(GpuRenderer::stats) + .unwrap_or_default(), per_type, } } @@ -397,6 +443,11 @@ impl RenderContext for CachingRenderContext { self.gpu_preference } + fn gpu_backend(&mut self) -> Option<&mut dyn GpuRasterBackend> { + self.gpu_backend_mut() + .map(|gpu| gpu as &mut dyn GpuRasterBackend) + } + fn render( &mut self, component: &dyn RasterComponent, diff --git a/tellur-renderer/src/shadow.rs b/tellur-renderer/src/shadow.rs index 4177ce0..4cf0a73 100644 --- a/tellur-renderer/src/shadow.rs +++ b/tellur-renderer/src/shadow.rs @@ -13,7 +13,7 @@ use tellur_core::composite::composite_at; use tellur_core::dyn_compare::hash_f32; use tellur_core::geometry::{Constraints, Rect, Vec2}; use tellur_core::raster::{CpuRasterImage, PixelFormat, RasterComponent, RasterImage, Resolution}; -use tellur_core::render_context::RenderContext; +use tellur_core::render_context::{DropShadowInput, RenderContext}; pub struct DropShadow { /// Offset of the shadow relative to the child, in logical units. @@ -85,6 +85,7 @@ impl RasterComponent for DropShadow { } let sx = target.width as f32 / paint.size.0; let sy = target.height as f32 / paint.size.1; + let gpu_available = ctx.prefers_gpu() && ctx.gpu_backend().is_some(); // Render the child through the context so its output is memoized // independently of the shadow — that's the key win for static @@ -96,36 +97,52 @@ impl RasterComponent for DropShadow { size, Resolution::new(child_px_w, child_px_h), ); + + let blur_px = (self.blur * sx.max(sy)).round().max(0.0) as u32; + let pad_px = blur_px as f32 * BLUR_EXTENT_MULTIPLIER; + let pad_lu_x = pad_px / sx; + let pad_lu_y = pad_px / sy; + let shadow_local_x = (child_paint.origin.0 + self.offset.0 - pad_lu_x) - paint.origin.0; + let shadow_local_y = (child_paint.origin.1 + self.offset.1 - pad_lu_y) - paint.origin.1; + let shadow_px_x = (shadow_local_x * sx).round() as i32; + let shadow_px_y = (shadow_local_y * sy).round() as i32; + let child_local_x = child_paint.origin.0 - paint.origin.0; + let child_local_y = child_paint.origin.1 - paint.origin.1; + let child_px_x = (child_local_x * sx).round() as i32; + let child_px_y = (child_local_y * sy).round() as i32; + + if gpu_available { + let input = DropShadowInput { + child: &child_image, + target, + child_offset_x: child_px_x, + child_offset_y: child_px_y, + shadow_offset_x: shadow_px_x, + shadow_offset_y: shadow_px_y, + blur_radius: blur_px, + color: self.color, + }; + if let Some(gpu) = ctx.gpu_backend() { + if let Some(image) = gpu.drop_shadow(input) { + return image; + } + } + } + let child_image = ctx.readback(child_image); // Build a padded shadow image whose alpha is a blurred copy of // the child's alpha, tinted with `color`. Padding equals the // 3-pass box-blur extent (3 * radius) so the shadow can spread // beyond the child's own bounds. - let blur_px = (self.blur * sx.max(sy)).round().max(0.0) as u32; let shadow_image = make_shadow(&child_image, blur_px, self.color); // Composite shadow then child into a buffer covering `paint`. let mut accum = vec![0u8; (target.width as usize) * (target.height as usize) * 4]; - // Position the shadow's top-left in the output buffer. The - // shadow image's local origin corresponds to - // `(child_paint.origin + offset - pad)` in our paint-bounds - // coordinate space, where `pad` is the 3-pass extent in pixels. - let pad_px = blur_px as f32 * BLUR_EXTENT_MULTIPLIER; - let pad_lu_x = pad_px / sx; - let pad_lu_y = pad_px / sy; - let shadow_local_x = (child_paint.origin.0 + self.offset.0 - pad_lu_x) - paint.origin.0; - let shadow_local_y = (child_paint.origin.1 + self.offset.1 - pad_lu_y) - paint.origin.1; - let shadow_px_x = (shadow_local_x * sx).round() as i32; - let shadow_px_y = (shadow_local_y * sy).round() as i32; composite_at(&mut accum, target, &shadow_image, shadow_px_x, shadow_px_y); // Position the child relative to the paint-bounds origin. - let child_local_x = child_paint.origin.0 - paint.origin.0; - let child_local_y = child_paint.origin.1 - paint.origin.1; - let child_px_x = (child_local_x * sx).round() as i32; - let child_px_y = (child_local_y * sy).round() as i32; composite_at(&mut accum, target, &child_image, child_px_x, child_px_y); RasterImage::cpu(target.width, target.height, PixelFormat::Rgba8, accum) From 25b9e6f639aee7f80e4adba2d90dad7086339908 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=82=E3=81=99=E3=81=B1=E3=82=8B?= Date: Thu, 28 May 2026 03:15:15 +0900 Subject: [PATCH 3/4] feat: add GPU vector rasterization --- Cargo.lock | 265 ++++++++++------ tellur-core/src/render_context.rs | 3 + tellur-renderer/Cargo.toml | 3 +- tellur-renderer/src/gpu.rs | 431 +++++++++++++++++++++++++- tellur-renderer/src/rasterize.rs | 9 +- tellur-renderer/src/render_context.rs | 3 +- 6 files changed, 621 insertions(+), 93 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 01f2e8d..cdecafa 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8,18 +8,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" -[[package]] -name = "ahash" -version = "0.8.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" -dependencies = [ - "cfg-if", - "once_cell", - "version_check", - "zerocopy", -] - [[package]] name = "allocator-api2" version = "0.2.21" @@ -106,22 +94,26 @@ name = "bytemuck" version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +dependencies = [ + "bytemuck_derive", +] [[package]] -name = "bytes" -version = "1.11.1" +name = "bytemuck_derive" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] [[package]] -name = "cc" -version = "1.2.62" +name = "bytes" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" -dependencies = [ - "find-msvc-tools", - "shlex", -] +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "cfg-if" @@ -235,9 +227,9 @@ dependencies = [ [[package]] name = "d3d12" -version = "0.19.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e3d747f100290a1ca24b752186f61f6637e1deffe3bf6320de6fcb29510a307" +checksum = "b28bfe653d79bd16c77f659305b195b82bb5ce0c0eb2a4846b82ddbd77586813" dependencies = [ "bitflags 2.11.1", "libloading 0.8.9", @@ -253,6 +245,15 @@ dependencies = [ "libloading 0.8.9", ] +[[package]] +name = "document-features" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61" +dependencies = [ + "litrs", +] + [[package]] name = "encode_unicode" version = "1.0.0" @@ -265,6 +266,15 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" +[[package]] +name = "euclid" +version = "0.22.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1a05365e3b1c6d1650318537c7460c6923f1abdd272ad6842baa2b509957a06" +dependencies = [ + "num-traits", +] + [[package]] name = "fdeflate" version = "0.3.7" @@ -274,12 +284,6 @@ dependencies = [ "simd-adler32", ] -[[package]] -name = "find-msvc-tools" -version = "0.1.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" - [[package]] name = "flate2" version = "1.1.9" @@ -296,6 +300,15 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "font-types" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34fd7136aca682873d859ef34494ab1a7d3f57ecd485ed40eb6437ee8c85aa29" +dependencies = [ + "bytemuck", +] + [[package]] name = "fontconfig" version = "0.10.2" @@ -361,6 +374,17 @@ version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" +[[package]] +name = "futures-intrusive" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d930c203dd0b6ff06e0201a4a2fe9149b43c684fd4420555b26d21b1a02956f" +dependencies = [ + "futures-core", + "lock_api", + "parking_lot", +] + [[package]] name = "futures-task" version = "0.3.32" @@ -445,32 +469,32 @@ dependencies = [ [[package]] name = "gpu-descriptor" -version = "0.2.4" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc11df1ace8e7e564511f53af41f3e42ddc95b56fd07b3f4445d2a6048bc682c" +checksum = "b89c83349105e3732062a895becfc71a8f921bb71ecbbdd8ff99263e3b53a0ca" dependencies = [ "bitflags 2.11.1", "gpu-descriptor-types", - "hashbrown 0.14.5", + "hashbrown 0.15.5", ] [[package]] name = "gpu-descriptor-types" -version = "0.1.2" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bf0b36e6f090b7e1d8a4b49c0cb81c1f8376f72198c65dd3ad9ff3556b8b78c" +checksum = "fdf242682df893b86f33a73828fb09ca4b2d3bb6cc95249707fc684d27484b91" dependencies = [ "bitflags 2.11.1", ] [[package]] -name = "hashbrown" -version = "0.14.5" +name = "guillotiere" +version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +checksum = "b62d5865c036cb1393e23c50693df631d3f5d7bcca4c04fe4cc0fd592e74a782" dependencies = [ - "ahash", - "allocator-api2", + "euclid", + "svg_fmt", ] [[package]] @@ -591,6 +615,17 @@ version = "3.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e2db585e1d738fc771bf08a151420d3ed193d9d895a36df7f6f8a9456b911ddc" +[[package]] +name = "kurbo" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c62026ae44756f8a599ba21140f350303d4f08dcdcc71b5ad9c9bb8128c13c62" +dependencies = [ + "arrayvec", + "euclid", + "smallvec", +] + [[package]] name = "libc" version = "0.2.186" @@ -623,6 +658,12 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" +[[package]] +name = "litrs" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092" + [[package]] name = "lock_api" version = "0.4.14" @@ -673,9 +714,9 @@ dependencies = [ [[package]] name = "metal" -version = "0.27.0" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43f73953f8cbe511f021b58f18c3ce1c3d1ae13fe953293e13345bf83217f25" +checksum = "5637e166ea14be6063a3f8ba5ccb9a4159df7d8f6d61c02fc3d480b1f90dcfcb" dependencies = [ "bitflags 2.11.1", "block", @@ -698,10 +739,11 @@ dependencies = [ [[package]] name = "naga" -version = "0.19.2" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50e3524642f53d9af419ab5e8dd29d3ba155708267667c2f3f06c88c9e130843" +checksum = "e536ae46fcab0876853bd4a632ede5df4b1c2527a58f6c5a4150fe86be858231" dependencies = [ + "arrayvec", "bit-set", "bitflags 2.11.1", "codespan-reporting", @@ -750,16 +792,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" dependencies = [ "malloc_buf", - "objc_exception", -] - -[[package]] -name = "objc_exception" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad970fb455818ad6cba4c122ad012fae53ae8b4795f86378bce65e4f6bab2ca4" -dependencies = [ - "cc", ] [[package]] @@ -797,6 +829,16 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "peniko" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c28d7294093837856bb80ad191cc46a2fcec8a30b43b7a3b0285325f0a917a9" +dependencies = [ + "kurbo", + "smallvec", +] + [[package]] name = "pin-project-lite" version = "0.2.17" @@ -876,6 +918,16 @@ version = "0.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "20675572f6f24e9e76ef639bc5552774ed45f1c30e2951e1e99c59888861c539" +[[package]] +name = "read-fonts" +version = "0.19.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8b8af39d1f23869711ad4cea5e7835a20daa987f80232f7f2a2374d648ca64d" +dependencies = [ + "bytemuck", + "font-types", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -933,18 +985,22 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "shlex" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" - [[package]] name = "simd-adler32" version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" +[[package]] +name = "skrifa" +version = "0.19.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ab45fb68b53576a43d4fc0e9ec8ea64e29a4d2cc7f44506964cb75f288222e9" +dependencies = [ + "bytemuck", + "read-fonts", +] + [[package]] name = "slab" version = "0.4.12" @@ -987,6 +1043,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6637bab7722d379c8b41ba849228d680cc12d0a45ba1fa2b48f2a30577a06731" +[[package]] +name = "svg_fmt" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0193cc4331cfd2f3d2011ef287590868599a2f33c3e69bc22c1a3d3acf9e02fb" + [[package]] name = "syn" version = "1.0.109" @@ -1066,6 +1128,7 @@ dependencies = [ "tellur-core", "thiserror 2.0.18", "tiny-skia", + "vello", "wgpu", ] @@ -1222,6 +1285,50 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" +[[package]] +name = "vello" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "861c12258ed7e72762765e2c88a07bb528040ec4e5f87514d65b19b29a7cccf0" +dependencies = [ + "bytemuck", + "futures-intrusive", + "log", + "peniko", + "raw-window-handle", + "skrifa", + "static_assertions", + "thiserror 1.0.69", + "vello_encoding", + "vello_shaders", + "wgpu", +] + +[[package]] +name = "vello_encoding" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5d73777327877fa824a45c7195f850390dd3f91feb15f47d331db1fc01abf6d" +dependencies = [ + "bytemuck", + "guillotiere", + "peniko", + "skrifa", + "smallvec", +] + +[[package]] +name = "vello_shaders" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13ab6bcb2b079c3cf57e964d1ba0b1f08901284be1c7f5cba34d3e0e08154bce" +dependencies = [ + "bytemuck", + "naga", + "thiserror 1.0.69", + "vello_encoding", +] + [[package]] name = "version_check" version = "0.9.5" @@ -1305,13 +1412,14 @@ dependencies = [ [[package]] name = "wgpu" -version = "0.19.4" +version = "0.20.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbd7311dbd2abcfebaabf1841a2824ed7c8be443a0f29166e5d3c6a53a762c01" +checksum = "90e37c7b9921b75dfd26dd973fdcbce36f13dfa6e2dc82aece584e0ed48c355c" dependencies = [ "arrayvec", "cfg-if", "cfg_aliases", + "document-features", "js-sys", "log", "naga", @@ -1330,15 +1438,16 @@ dependencies = [ [[package]] name = "wgpu-core" -version = "0.19.4" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28b94525fc99ba9e5c9a9e24764f2bc29bad0911a7446c12f446a8277369bf3a" +checksum = "d50819ab545b867d8a454d1d756b90cd5f15da1f2943334ca314af10583c9d39" dependencies = [ "arrayvec", "bit-vec", "bitflags 2.11.1", "cfg_aliases", "codespan-reporting", + "document-features", "indexmap", "log", "naga", @@ -1356,9 +1465,9 @@ dependencies = [ [[package]] name = "wgpu-hal" -version = "0.19.5" +version = "0.21.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfabcfc55fd86611a855816326b2d54c3b2fd7972c27ce414291562650552703" +checksum = "172e490a87295564f3fcc0f165798d87386f6231b04d4548bca458cbbfd63222" dependencies = [ "android_system_properties", "arrayvec", @@ -1401,9 +1510,9 @@ dependencies = [ [[package]] name = "wgpu-types" -version = "0.19.2" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b671ff9fb03f78b46ff176494ee1ebe7d603393f42664be55b64dc8d53969805" +checksum = "1353d9a46bff7f955a680577f34c69122628cc2076e1d6f3a9be6ef00ae793ef" dependencies = [ "bitflags 2.11.1", "js-sys", @@ -1614,23 +1723,3 @@ dependencies = [ "once_cell", "pkg-config", ] - -[[package]] -name = "zerocopy" -version = "0.8.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.8.48" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.117", -] diff --git a/tellur-core/src/render_context.rs b/tellur-core/src/render_context.rs index 75fbdc8..7ab963b 100644 --- a/tellur-core/src/render_context.rs +++ b/tellur-core/src/render_context.rs @@ -17,6 +17,7 @@ use std::any::Any; use crate::color::Color; use crate::geometry::Vec2; use crate::raster::{CpuRasterImage, RasterComponent, RasterImage, Resolution}; +use crate::vector::VectorGraphic; /// How aggressively a render context should try to keep work on the GPU. /// @@ -146,6 +147,8 @@ pub trait GpuRasterBackend { fn outline(&mut self, input: OutlineInput<'_>) -> Option; + fn rasterize(&mut self, graphic: &VectorGraphic, target: Resolution) -> Option; + fn readback(&mut self, image: RasterImage) -> Option; } diff --git a/tellur-renderer/Cargo.toml b/tellur-renderer/Cargo.toml index de0a266..59169c5 100644 --- a/tellur-renderer/Cargo.toml +++ b/tellur-renderer/Cargo.toml @@ -14,4 +14,5 @@ sysinfo = { version = "0.32", default-features = false, features = ["system"] } tellur-core = { path = "../tellur-core" } thiserror = "2.0.18" tiny-skia = "0.12.0" -wgpu = "0.19" +vello = "0.2.1" +wgpu = "0.20.1" diff --git a/tellur-renderer/src/gpu.rs b/tellur-renderer/src/gpu.rs index 6b27145..3b63df4 100644 --- a/tellur-renderer/src/gpu.rs +++ b/tellur-renderer/src/gpu.rs @@ -1,11 +1,15 @@ use std::borrow::Cow; +use std::num::NonZeroUsize; use std::sync::Arc; use tellur_core::color::Color; +use tellur_core::geometry::{Transform, Vec2}; use tellur_core::raster::{CpuRasterImage, GpuSurface, PixelFormat, RasterImage, Resolution}; use tellur_core::render_context::{ CompositeInput, DropShadowInput, GpuRasterBackend, OutlineInput, }; +use tellur_core::vector::{Node, Paint, Path as TellurPath, PathCommand, VectorGraphic}; +use vello::kurbo::{Affine, BezPath, Rect as VelloRect, Stroke as VelloStroke}; use wgpu::util::DeviceExt; const BACKEND: &str = "tellur-wgpu-buffer-v1"; @@ -19,6 +23,8 @@ pub struct GpuRenderer { blur_pipeline: wgpu::ComputePipeline, shadow_pipeline: wgpu::ComputePipeline, outline_pipeline: wgpu::ComputePipeline, + texture_to_buffer_pipeline: wgpu::ComputePipeline, + vello_renderer: Option, stats: GpuRenderStats, } @@ -27,12 +33,13 @@ pub struct GpuRenderStats { pub composites: u64, pub drop_shadows: u64, pub outlines: u64, + pub rasterizes: u64, pub readbacks: u64, } impl GpuRenderStats { pub fn total_ops(self) -> u64 { - self.composites + self.drop_shadows + self.outlines + self.composites + self.drop_shadows + self.outlines + self.rasterizes } } @@ -107,6 +114,18 @@ struct ColorCompositeParams { unsafe impl bytemuck::Zeroable for ColorCompositeParams {} unsafe impl bytemuck::Pod for ColorCompositeParams {} +#[repr(C)] +#[derive(Clone, Copy)] +struct TextureToBufferParams { + width: u32, + height: u32, + _pad0: u32, + _pad1: u32, +} + +unsafe impl bytemuck::Zeroable for TextureToBufferParams {} +unsafe impl bytemuck::Pod for TextureToBufferParams {} + impl GpuRenderer { pub fn new() -> Result { pollster::block_on(Self::new_async()) @@ -127,7 +146,7 @@ impl GpuRenderer { &wgpu::DeviceDescriptor { label: Some("tellur-gpu-device"), required_features: wgpu::Features::empty(), - required_limits: wgpu::Limits::downlevel_defaults(), + required_limits: wgpu::Limits::default(), }, None, ) @@ -152,8 +171,14 @@ impl GpuRenderer { "tellur-outline-composite", &format!("{COMMON_WGSL}{OUTLINE_SHADER}"), ), + texture_to_buffer_pipeline: compute_pipeline( + &device, + "tellur-texture-to-buffer", + TEXTURE_TO_BUFFER_SHADER, + ), device, queue, + vello_renderer: None, stats: GpuRenderStats::default(), }) } @@ -415,6 +440,109 @@ impl GpuRenderer { alpha.height, ); } + + fn render_vello_graphic( + &mut self, + graphic: &VectorGraphic, + target: Resolution, + ) -> Option> { + let scene = build_vello_scene(graphic, target)?; + let texture = self.device.create_texture(&wgpu::TextureDescriptor { + label: Some("tellur-vello-target"), + size: wgpu::Extent3d { + width: target.width, + height: target.height, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }); + let view = texture.create_view(&wgpu::TextureViewDescriptor::default()); + if self.vello_renderer.is_none() { + self.vello_renderer = Some(create_vello_renderer(&self.device)?); + } + self.vello_renderer + .as_mut()? + .render_to_texture( + &self.device, + &self.queue, + &scene, + &view, + &vello::RenderParams { + base_color: vello::peniko::Color::TRANSPARENT, + width: target.width, + height: target.height, + antialiasing_method: vello::AaConfig::Area, + }, + ) + .ok()?; + + let target_image = self.empty_image(target); + let mut encoder = self + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { + label: Some("tellur-vello-copy-to-buffer"), + }); + self.texture_to_buffer(&mut encoder, &view, &target_image); + self.queue.submit(Some(encoder.finish())); + Some(target_image) + } + + fn texture_to_buffer( + &self, + encoder: &mut wgpu::CommandEncoder, + texture: &wgpu::TextureView, + dst: &GpuBufferImage, + ) { + let params = TextureToBufferParams { + width: dst.width, + height: dst.height, + _pad0: 0, + _pad1: 0, + }; + let params_buffer = self + .device + .create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: Some("tellur-texture-to-buffer-params"), + contents: bytemuck::bytes_of(¶ms), + usage: wgpu::BufferUsages::STORAGE, + }); + let layout = self.texture_to_buffer_pipeline.get_bind_group_layout(0); + let bind_group = self.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: Some("tellur-texture-to-buffer-bind-group"), + layout: &layout, + entries: &[ + wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(texture), + }, + wgpu::BindGroupEntry { + binding: 1, + resource: dst.buffer.as_entire_binding(), + }, + wgpu::BindGroupEntry { + binding: 2, + resource: params_buffer.as_entire_binding(), + }, + ], + }); + + let mut pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor { + label: Some("tellur-texture-to-buffer-pass"), + timestamp_writes: None, + }); + pass.set_pipeline(&self.texture_to_buffer_pipeline); + pass.set_bind_group(0, &bind_group, &[]); + pass.dispatch_workgroups( + div_ceil(dst.width, WORKGROUP), + div_ceil(dst.height, WORKGROUP), + 1, + ); + } } impl GpuRasterBackend for GpuRenderer { @@ -528,6 +656,12 @@ impl GpuRasterBackend for GpuRenderer { Some(self.raster_image(target)) } + fn rasterize(&mut self, graphic: &VectorGraphic, target: Resolution) -> Option { + let target_image = self.render_vello_graphic(graphic, target)?; + self.stats.rasterizes = self.stats.rasterizes.saturating_add(1); + Some(self.raster_image(target_image)) + } + fn readback(&mut self, image: RasterImage) -> Option { match image { RasterImage::Cpu(image) => Some(image), @@ -588,9 +722,23 @@ fn compute_pipeline( layout: None, module: &shader, entry_point: "main", + compilation_options: Default::default(), }) } +fn create_vello_renderer(device: &wgpu::Device) -> Option { + vello::Renderer::new( + device, + vello::RendererOptions { + surface_format: None, + use_cpu: false, + antialiasing_support: vello::AaSupport::all(), + num_init_threads: NonZeroUsize::new(1), + }, + ) + .ok() +} + fn dispatch_three_buffer( device: &wgpu::Device, encoder: &mut wgpu::CommandEncoder, @@ -652,6 +800,179 @@ fn color_u8(color: Color) -> [u32; 4] { ] } +fn build_vello_scene(graphic: &VectorGraphic, target: Resolution) -> Option { + if target.width == 0 + || target.height == 0 + || graphic.view_box.size.0 <= 0.0 + || graphic.view_box.size.1 <= 0.0 + { + return None; + } + + let sx = target.width as f32 / graphic.view_box.size.0; + let sy = target.height as f32 / graphic.view_box.size.1; + let view = Transform { + a: sx, + b: 0.0, + c: 0.0, + d: sy, + tx: -graphic.view_box.origin.0 * sx, + ty: -graphic.view_box.origin.1 * sy, + }; + let clip = VelloRect::new(0.0, 0.0, target.width as f64, target.height as f64); + let mut scene = vello::Scene::new(); + encode_vello_node(&mut scene, &graphic.root, view, &clip)?; + Some(scene) +} + +fn encode_vello_node( + scene: &mut vello::Scene, + node: &Node, + transform: Transform, + clip: &VelloRect, +) -> Option<()> { + match node { + Node::Group(group) => { + let opacity = group.opacity.clamp(0.0, 1.0); + if opacity <= 0.0 { + return Some(()); + } + let transform = concat_transform(transform, group.transform); + if opacity < 1.0 { + scene.push_layer( + vello::peniko::BlendMode::default(), + opacity, + Affine::IDENTITY, + clip, + ); + } + for child in &group.children { + encode_vello_node(scene, child, transform, clip)?; + } + if opacity < 1.0 { + scene.pop_layer(); + } + } + Node::Path(path) => encode_vello_path(scene, path, transform)?, + } + Some(()) +} + +fn encode_vello_path( + scene: &mut vello::Scene, + path: &TellurPath, + transform: Transform, +) -> Option<()> { + if path.fill.is_none() && path.stroke.is_none() { + return Some(()); + } + let transform = concat_transform(transform, path.transform); + let Some(vello_path) = build_vello_path(&path.commands) else { + return Some(()); + }; + let transform = to_vello_affine(transform); + + if let Some(fill) = &path.fill { + if let Some(paint) = to_vello_color(&fill.paint) { + scene.fill( + vello::peniko::Fill::NonZero, + transform, + paint, + None, + &vello_path, + ); + } + } + + if let Some(stroke) = &path.stroke { + if stroke.width > 0.0 { + if let Some(paint) = to_vello_color(&stroke.paint) { + scene.stroke( + &VelloStroke::new(stroke.width as f64), + transform, + paint, + None, + &vello_path, + ); + } + } + } + + Some(()) +} + +fn build_vello_path(commands: &[PathCommand]) -> Option { + let mut path = BezPath::new(); + let mut has_open_subpath = false; + for command in commands { + match *command { + PathCommand::MoveTo(p) => { + path.move_to(to_vello_point(p)); + has_open_subpath = true; + } + PathCommand::LineTo(p) => { + if has_open_subpath { + path.line_to(to_vello_point(p)); + } + } + PathCommand::QuadTo { control, to } => { + if has_open_subpath { + path.quad_to(to_vello_point(control), to_vello_point(to)); + } + } + PathCommand::CubicTo { c1, c2, to } => { + if has_open_subpath { + path.curve_to(to_vello_point(c1), to_vello_point(c2), to_vello_point(to)); + } + } + PathCommand::Close => { + if has_open_subpath { + path.close_path(); + has_open_subpath = false; + } + } + } + } + (!path.elements().is_empty()).then_some(path) +} + +fn to_vello_point(p: Vec2) -> (f64, f64) { + (p.0 as f64, p.1 as f64) +} + +fn to_vello_color(paint: &Paint) -> Option { + let Paint::Solid(color) = paint; + if color.a <= 0.0 { + return None; + } + let [r, g, b, a] = color_u8(*color); + Some(vello::peniko::Color::rgba8( + r as u8, g as u8, b as u8, a as u8, + )) +} + +fn concat_transform(a: Transform, b: Transform) -> Transform { + Transform { + a: a.a * b.a + a.c * b.b, + b: a.b * b.a + a.d * b.b, + c: a.a * b.c + a.c * b.d, + d: a.b * b.c + a.d * b.d, + tx: a.a * b.tx + a.c * b.ty + a.tx, + ty: a.b * b.tx + a.d * b.ty + a.ty, + } +} + +fn to_vello_affine(t: Transform) -> Affine { + Affine::new([ + t.a as f64, + t.b as f64, + t.c as f64, + t.d as f64, + t.tx as f64, + t.ty as f64, + ]) +} + const COMMON_WGSL: &str = r#" fn unpack_rgba(px: u32) -> vec4 { return vec4( @@ -927,13 +1248,43 @@ fn main(@builtin(global_invocation_id) id: vec3) { } "#; +const TEXTURE_TO_BUFFER_SHADER: &str = r#" +struct Params { + width: u32, + height: u32, + pad0: u32, + pad1: u32, +} + +@group(0) @binding(0) var src: texture_2d; +@group(0) @binding(1) var dst: array; +@group(0) @binding(2) var params: Params; + +@compute @workgroup_size(16, 16) +fn main(@builtin(global_invocation_id) id: vec3) { + let x = id.x; + let y = id.y; + if (x >= params.width || y >= params.height) { + return; + } + let c = textureLoad(src, vec2(i32(x), i32(y)), 0); + let r = u32(round(clamp(c.r, 0.0, 1.0) * 255.0)); + let g = u32(round(clamp(c.g, 0.0, 1.0) * 255.0)); + let b = u32(round(clamp(c.b, 0.0, 1.0) * 255.0)); + let a = u32(round(clamp(c.a, 0.0, 1.0) * 255.0)); + dst[y * params.width + x] = r | (g << 8u) | (b << 16u) | (a << 24u); +} +"#; + #[cfg(test)] mod tests { use super::*; use tellur_core::composite::composite_at; + use tellur_core::geometry::Rect; use tellur_core::render_context::{ CompositeInput, DropShadowInput, GpuRasterBackend, OutlineInput, }; + use tellur_core::vector::{Fill, Path, PathCommand}; fn gpu_or_skip() -> Option { match GpuRenderer::new() { @@ -1048,4 +1399,80 @@ mod tests { ] ); } + + #[test] + #[ignore = "requires a GPU adapter"] + fn rasterize_fills_simple_rectangle() { + let Some(mut gpu) = gpu_or_skip() else { + return; + }; + let graphic = VectorGraphic { + view_box: Rect { + origin: Vec2::ZERO, + size: Vec2(4.0, 4.0), + }, + root: Node::Path(Path { + commands: vec![ + PathCommand::MoveTo(Vec2(1.0, 1.0)), + PathCommand::LineTo(Vec2(3.0, 1.0)), + PathCommand::LineTo(Vec2(3.0, 3.0)), + PathCommand::LineTo(Vec2(1.0, 3.0)), + PathCommand::Close, + ], + fill: Some(Fill { + paint: Paint::Solid(Color::rgba_u8(8, 9, 10, 255)), + }), + stroke: None, + transform: Transform::IDENTITY, + }), + }; + + let rendered = + GpuRasterBackend::rasterize(&mut gpu, &graphic, Resolution::new(4, 4)).unwrap(); + let rendered = readback(&mut gpu, rendered); + + let mut filled = 0; + for pixel in rendered.pixels.chunks_exact(4) { + if pixel[3] != 0 { + assert_eq!(pixel, &[8, 9, 10, 255]); + filled += 1; + } + } + assert_eq!(filled, 4); + } + + #[test] + #[ignore = "requires a GPU adapter"] + fn rasterize_preserves_straight_alpha() { + let Some(mut gpu) = gpu_or_skip() else { + return; + }; + let graphic = VectorGraphic { + view_box: Rect { + origin: Vec2::ZERO, + size: Vec2(4.0, 4.0), + }, + root: Node::Path(Path { + commands: vec![ + PathCommand::MoveTo(Vec2(1.0, 1.0)), + PathCommand::LineTo(Vec2(3.0, 1.0)), + PathCommand::LineTo(Vec2(3.0, 3.0)), + PathCommand::LineTo(Vec2(1.0, 3.0)), + PathCommand::Close, + ], + fill: Some(Fill { + paint: Paint::Solid(Color::rgba_u8(80, 40, 20, 128)), + }), + stroke: None, + transform: Transform::IDENTITY, + }), + }; + + let rendered = + GpuRasterBackend::rasterize(&mut gpu, &graphic, Resolution::new(4, 4)).unwrap(); + let rendered = readback(&mut gpu, rendered); + let center = &rendered.pixels[((1 * 4 + 1) * 4)..((1 * 4 + 2) * 4)]; + + assert_eq!(center, &[80, 40, 20, 128]); + } } diff --git a/tellur-renderer/src/rasterize.rs b/tellur-renderer/src/rasterize.rs index 98883ac..6479ab5 100644 --- a/tellur-renderer/src/rasterize.rs +++ b/tellur-renderer/src/rasterize.rs @@ -24,8 +24,15 @@ impl RasterComponent for Raster self.vector.paint_bounds(size) } - fn render(&self, size: Vec2, target: Resolution, _ctx: &mut dyn RenderContext) -> RasterImage { + fn render(&self, size: Vec2, target: Resolution, ctx: &mut dyn RenderContext) -> RasterImage { let graphic = self.vector.render(size); + if ctx.prefers_gpu() { + if let Some(gpu) = ctx.gpu_backend() { + if let Some(image) = gpu.rasterize(&graphic, target) { + return image; + } + } + } rasterize(&graphic, target.width, target.height) } } diff --git a/tellur-renderer/src/render_context.rs b/tellur-renderer/src/render_context.rs index c54b2b9..d37692b 100644 --- a/tellur-renderer/src/render_context.rs +++ b/tellur-renderer/src/render_context.rs @@ -172,7 +172,7 @@ impl fmt::Display for CacheMetrics { )?; writeln!( f, - "GPU preference={:?}, attempted={}, available={}, ops={} (composite {}, shadow {}, outline {}, readback {})", + "GPU preference={:?}, attempted={}, available={}, ops={} (composite {}, shadow {}, outline {}, rasterize {}, readback {})", self.gpu_preference, self.gpu_init_attempted, self.gpu_available, @@ -180,6 +180,7 @@ impl fmt::Display for CacheMetrics { self.gpu.composites, self.gpu.drop_shadows, self.gpu.outlines, + self.gpu.rasterizes, self.gpu.readbacks, )?; if !self.per_type.is_empty() { From ff78d1c5fd9c93daea6efb394e8878838c216905 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=81=82=E3=81=99=E3=81=B1=E3=82=8B?= Date: Thu, 28 May 2026 04:04:56 +0900 Subject: [PATCH 4/4] fix: address clippy warnings --- tellur-live/examples/demo_scene/mod.rs | 7 ++- tellur-renderer/src/gpu.rs | 81 +++++++++++++------------- 2 files changed, 45 insertions(+), 43 deletions(-) diff --git a/tellur-live/examples/demo_scene/mod.rs b/tellur-live/examples/demo_scene/mod.rs index 096f895..56e4a0d 100644 --- a/tellur-live/examples/demo_scene/mod.rs +++ b/tellur-live/examples/demo_scene/mod.rs @@ -355,6 +355,7 @@ fn add_fx_rect( ); } +#[allow(clippy::too_many_arguments)] fn add_fx_outline_rect( scene: &mut VectorLayer, center: Vec2, @@ -1241,7 +1242,7 @@ fn draw_scan(scene: &mut VectorLayer, time: T, p: Palette) { const ANGLE_LABELS: [&str; 12] = [ "000", "030", "060", "090", "120", "150", "180", "210", "240", "270", "300", "330", ]; - for i in 0..12 { + for (i, label) in ANGLE_LABELS.iter().enumerate() { let a = i as f32 / 12.0 * TAU - PI * 0.5; let stagger = i as f32 * 0.025; let tk = ease_out_cubic(time.phase(3.85 + stagger, 4.3 + stagger)); @@ -1280,7 +1281,7 @@ fn draw_scan(scene: &mut VectorLayer, time: T, p: Palette) { scene, Vec2(CX + a.cos() * label_r, CY + a.sin() * label_r), Anchor::CENTER, - ANGLE_LABELS[i], + label, 11.0, alpha(p.paper, label_alpha * 0.7), Weight::NORMAL, @@ -1601,7 +1602,7 @@ fn draw_resolve(scene: &mut VectorLayer, time: T, p: Palette) { // a dim outer field ring with hash marks. All keyed to a slow // post-settle rotation shared with the satellites. - let comp_in = ease_out_cubic(time.phase(6.28, 6.7)); + let comp_in = ease_out_cubic(time.phase(6.27, 6.7)); if comp_in > 0.0 { let breath = 1.0 + wave(time, 1.4, 0.0) * 0.06; diff --git a/tellur-renderer/src/gpu.rs b/tellur-renderer/src/gpu.rs index 3b63df4..2bd142c 100644 --- a/tellur-renderer/src/gpu.rs +++ b/tellur-renderer/src/gpu.rs @@ -287,11 +287,9 @@ impl GpuRenderer { &self.device, encoder, &self.composite_pipeline, - &dst.buffer, - &src.buffer, + [&dst.buffer, &src.buffer], ¶ms, - src.width, - src.height, + DispatchSize::new(src.width, src.height), ); } @@ -317,11 +315,9 @@ impl GpuRenderer { &self.device, encoder, &self.copy_alpha_pipeline, - &src.buffer, - &alpha.buffer, + [&src.buffer, &alpha.buffer], ¶ms, - alpha.width, - alpha.height, + DispatchSize::new(alpha.width, alpha.height), ); } @@ -359,11 +355,9 @@ impl GpuRenderer { &self.device, encoder, &self.blur_pipeline, - &src.buffer, - &dst.buffer, + [&src.buffer, &dst.buffer], ¶ms, - src.width, - src.height, + DispatchSize::new(src.width, src.height), ); } @@ -395,11 +389,9 @@ impl GpuRenderer { &self.device, encoder, &self.shadow_pipeline, - &dst.buffer, - &alpha.buffer, + [&dst.buffer, &alpha.buffer], ¶ms, - alpha.width, - alpha.height, + DispatchSize::new(alpha.width, alpha.height), ); } @@ -408,10 +400,8 @@ impl GpuRenderer { encoder: &mut wgpu::CommandEncoder, dst: &GpuBufferImage, alpha: &GpuBufferImage, - offset_x: i32, - offset_y: i32, - radius_x: u32, - radius_y: u32, + offset: (i32, i32), + radius: (u32, u32), color: Color, ) { let [r, g, b, a] = color_u8(color); @@ -420,24 +410,22 @@ impl GpuRenderer { dst_h: dst.height, src_w: alpha.width, src_h: alpha.height, - offset_x, - offset_y, + offset_x: offset.0, + offset_y: offset.1, r, g, b, a, - radius_x, - radius_y, + radius_x: radius.0, + radius_y: radius.1, }; dispatch_three_buffer( &self.device, encoder, &self.outline_pipeline, - &dst.buffer, - &alpha.buffer, + [&dst.buffer, &alpha.buffer], ¶ms, - alpha.width, - alpha.height, + DispatchSize::new(alpha.width, alpha.height), ); } @@ -637,10 +625,8 @@ impl GpuRasterBackend for GpuRenderer { &mut encoder, &target, &alpha, - input.outline_offset_x, - input.outline_offset_y, - input.radius_x, - input.radius_y, + (input.outline_offset_x, input.outline_offset_y), + (input.radius_x, input.radius_y), input.color, ); self.composite_one( @@ -739,15 +725,25 @@ fn create_vello_renderer(device: &wgpu::Device) -> Option { .ok() } +#[derive(Clone, Copy)] +struct DispatchSize { + width: u32, + height: u32, +} + +impl DispatchSize { + fn new(width: u32, height: u32) -> Self { + Self { width, height } + } +} + fn dispatch_three_buffer( device: &wgpu::Device, encoder: &mut wgpu::CommandEncoder, pipeline: &wgpu::ComputePipeline, - a: &wgpu::Buffer, - b: &wgpu::Buffer, + buffers: [&wgpu::Buffer; 2], params: &P, - width: u32, - height: u32, + size: DispatchSize, ) { let params = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { label: Some("tellur-gpu-params"), @@ -761,11 +757,11 @@ fn dispatch_three_buffer( entries: &[ wgpu::BindGroupEntry { binding: 0, - resource: a.as_entire_binding(), + resource: buffers[0].as_entire_binding(), }, wgpu::BindGroupEntry { binding: 1, - resource: b.as_entire_binding(), + resource: buffers[1].as_entire_binding(), }, wgpu::BindGroupEntry { binding: 2, @@ -780,7 +776,11 @@ fn dispatch_three_buffer( }); pass.set_pipeline(pipeline); pass.set_bind_group(0, &bind_group, &[]); - pass.dispatch_workgroups(div_ceil(width, WORKGROUP), div_ceil(height, WORKGROUP), 1); + pass.dispatch_workgroups( + div_ceil(size.width, WORKGROUP), + div_ceil(size.height, WORKGROUP), + 1, + ); } fn div_ceil(n: u32, d: u32) -> u32 { @@ -1471,7 +1471,8 @@ mod tests { let rendered = GpuRasterBackend::rasterize(&mut gpu, &graphic, Resolution::new(4, 4)).unwrap(); let rendered = readback(&mut gpu, rendered); - let center = &rendered.pixels[((1 * 4 + 1) * 4)..((1 * 4 + 2) * 4)]; + let center_idx = (rendered.width as usize + 1) * 4; + let center = &rendered.pixels[center_idx..center_idx + 4]; assert_eq!(center, &[80, 40, 20, 128]); }