Skip to content
Open
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
684aa45
feat(avatar): add procedural Clawd character WGSL shader
namearth5005 Mar 20, 2026
9850151
feat(avatar): add AvatarRenderer with procedural Clawd drawing and an…
namearth5005 Mar 20, 2026
ed730c5
feat(settings): add avatar mode toggle and background config
namearth5005 Mar 20, 2026
4f2f8c9
feat(avatar): wire AvatarRenderer into camera layer rendering pipeline
namearth5005 Mar 20, 2026
7b1e8a8
feat(face-tracking): create face tracking crate with Apple Vision bac…
namearth5005 Mar 20, 2026
581792b
feat(avatar): integrate face tracking into rendering pipeline
namearth5005 Mar 20, 2026
1943615
feat(avatar): add spring-smoothed face pose for organic avatar motion
namearth5005 Mar 20, 2026
0e0b901
chore: update Cargo.lock for face-tracking crate
namearth5005 Mar 20, 2026
69497a0
fix(avatar): fix uniform buffer size mismatch causing wgpu crash
namearth5005 Mar 20, 2026
399951c
fix(avatar): improve Clawd shader proportions with white outline
namearth5005 Mar 20, 2026
ca603d6
feat(avatar): wire Rive integration with Canvas2D Clawd fallback and …
namearth5005 Mar 20, 2026
e91ff59
feat(avatar): add Rive runtime support with Canvas2D fallback for cam…
namearth5005 Mar 20, 2026
4368148
Merge branch 'main' into pr/1672
richiemcilroy Mar 23, 2026
08339c1
fix(face-tracking): pass frame stride to fix RGBA data size mismatch
namearth5005 Mar 20, 2026
1fcef81
feat(avatar): working Rive runtime with face tracking expression mapping
namearth5005 Mar 24, 2026
6a46b70
feat(avatar): swap to Live2D (pixi-live2d-display) for proper face pu…
namearth5005 Mar 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ cidre = { git = "https://github.com/CapSoftware/cidre", rev = "bf84b67079a8", fe
"io_surface",
"mtl",
"vt",
"vn",
], default-features = false }

windows = "0.60.0"
Expand Down
6 changes: 6 additions & 0 deletions apps/desktop/src-tauri/src/general_settings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ pub struct GeneralSettingsStore {
#[serde(default)]
pub auto_zoom_on_clicks: bool,
#[serde(default)]
pub avatar_mode: bool,
#[serde(default)]
pub avatar_background: cap_project::AvatarBackground,
#[serde(default)]
pub post_deletion_behaviour: PostDeletionBehaviour,
#[serde(default = "default_excluded_windows")]
pub excluded_windows: Vec<WindowExclusion>,
Expand Down Expand Up @@ -203,6 +207,8 @@ impl Default for GeneralSettingsStore {
recording_countdown: Some(3),
enable_native_camera_preview: default_enable_native_camera_preview(),
auto_zoom_on_clicks: false,
avatar_mode: false,
avatar_background: cap_project::AvatarBackground::Dark,
post_deletion_behaviour: PostDeletionBehaviour::DoNothing,
excluded_windows: default_excluded_windows(),
delete_instant_recordings_after_upload: false,
Expand Down
10 changes: 10 additions & 0 deletions apps/desktop/src/routes/(window-chrome)/settings/experimental.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ function Inner(props: { initialStore: GeneralSettingsStore | null }) {
enableNotifications: true,
enableNativeCameraPreview: false,
autoZoomOnClicks: false,
avatarMode: false,
custom_cursor_capture2: true,
},
);
Expand Down Expand Up @@ -96,6 +97,15 @@ function Inner(props: { initialStore: GeneralSettingsStore | null }) {
/>
</div>
</div>
<h3 class="text-sm text-gray-12 w-fit">Camera Features</h3>
<div class="px-3 rounded-xl border divide-y divide-gray-3 border-gray-3 bg-gray-2">
<ToggleSettingItem
label="Avatar mode (Clawd)"
description="Replace your camera feed with an animated Clawd character that mimics your facial expressions. Perfect for creating engaging tutorials."
value={!!settings.avatarMode}
onChange={(value) => handleChange("avatarMode", value)}
/>
</div>
</div>
</div>
);
Expand Down
13 changes: 13 additions & 0 deletions crates/face-tracking/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[package]
name = "cap-face-tracking"
version = "0.1.0"
edition = "2024"

[lints]
workspace = true

[dependencies]
tracing.workspace = true

[target.'cfg(target_os = "macos")'.dependencies]
cidre = { workspace = true }
30 changes: 30 additions & 0 deletions crates/face-tracking/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#[derive(Clone, Debug, Default)]
pub struct FacePose {
pub head_pitch: f32,
pub head_yaw: f32,
pub head_roll: f32,
pub mouth_open: f32,
pub left_eye_open: f32,
pub right_eye_open: f32,
pub confidence: f32,
}

#[cfg(target_os = "macos")]
mod macos;

#[cfg(target_os = "macos")]
pub use macos::FaceTracker;

#[cfg(not(target_os = "macos"))]
pub struct FaceTracker;

#[cfg(not(target_os = "macos"))]
impl FaceTracker {
pub fn new() -> Self {
Self
}

pub fn track(&mut self, _rgba_data: &[u8], _width: u32, _height: u32) -> FacePose {
FacePose::default()
}
}
243 changes: 243 additions & 0 deletions crates/face-tracking/src/macos.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
use std::ffi::c_void;

use cidre::{arc, cv, ns, objc, vn};

use crate::FacePose;

pub struct FaceTracker {
landmarks_request: arc::R<vn::Request>,
}

impl FaceTracker {
pub fn new() -> Self {
let landmarks_request = create_landmarks_request();
Self { landmarks_request }
}

pub fn track(&mut self, rgba_data: &[u8], width: u32, height: u32) -> FacePose {
match self.track_inner(rgba_data, width, height) {
Some(pose) => pose,
None => FacePose::default(),
}
}

fn track_inner(&mut self, rgba_data: &[u8], width: u32, height: u32) -> Option<FacePose> {
let w = width as usize;
let h = height as usize;
let src_row_bytes = w * 4;
let expected_len = src_row_bytes * h;
if rgba_data.len() < expected_len {
tracing::warn!(
"RGBA data too small: {} < {}",
rgba_data.len(),
expected_len
);
return None;
}

let mut pixel_buf = cv::PixelBuf::new(w, h, cv::PixelFormat::_32_BGRA, None).ok()?;

unsafe {
pixel_buf
.lock_base_addr(cv::pixel_buffer::LockFlags::DEFAULT)
.result()
.ok()?;
}

let dst_base = unsafe { CVPixelBufferGetBaseAddress(&pixel_buf) };
let dst_row_bytes = unsafe { CVPixelBufferGetBytesPerRow(&pixel_buf) };

if dst_base.is_null() {
unsafe {
pixel_buf.unlock_lock_base_addr(cv::pixel_buffer::LockFlags::DEFAULT);
}
return None;
}

unsafe {
rgba_to_bgra_copy(
rgba_data,
dst_base as *mut u8,
w,
h,
src_row_bytes,
dst_row_bytes,
);
}

unsafe {
pixel_buf.unlock_lock_base_addr(cv::pixel_buffer::LockFlags::DEFAULT);
}

let handler = vn::ImageRequestHandler::with_cv_pixel_buf(&pixel_buf, None)?;

let mut rect_request = vn::DetectFaceRectanglesRequest::new();
rect_request.set_revision(vn::DetectFaceRectanglesRequest::REVISION_3);

let requests =
ns::Array::<vn::Request>::from_slice(&[&rect_request, &self.landmarks_request]);
if handler.perform(&requests).is_err() {
return None;
}

let landmarks_results: Option<arc::R<ns::Array<vn::FaceObservation>>> = unsafe {
let raw: *const vn::Request = &*self.landmarks_request;
let face_req: &vn::DetectFaceRectanglesRequest =
&*(raw as *const vn::DetectFaceRectanglesRequest);
face_req.results()
};

let face_obs = landmarks_results
.as_ref()
.filter(|r| !r.is_empty())
.and_then(|r| r.get(0).ok());

let face_obs = match face_obs {
Some(obs) => obs,
None => {
let results = rect_request.results()?;
if results.is_empty() {
return None;
}
results.get(0).ok()?
}
};

let head_roll = face_obs.roll().map(|n| n.as_f32()).unwrap_or(0.0);
let head_yaw = face_obs.yaw().map(|n| n.as_f32()).unwrap_or(0.0);
let head_pitch = face_obs.pitch().map(|n| n.as_f32()).unwrap_or(0.0);
let confidence = face_obs.confidence();

let (mouth_open, left_eye_open, right_eye_open) = extract_landmark_features(&face_obs);

Some(FacePose {
head_pitch,
head_yaw,
head_roll,
mouth_open,
left_eye_open,
right_eye_open,
confidence,
})
}
}

fn create_landmarks_request() -> arc::R<vn::Request> {
unsafe {
let cls = objc::objc_getClass(b"VNDetectFaceLandmarksRequest\0".as_ptr());
match cls {
Some(cls) => {
let cls: &objc::Class<vn::Request> = std::mem::transmute(cls);
cls.new()
}
None => {
tracing::warn!(
"VNDetectFaceLandmarksRequest not found, falling back to rectangles"
);
let req = vn::DetectFaceRectanglesRequest::new();
std::mem::transmute(req)
}
}
}
}

fn extract_landmark_features(face: &vn::FaceObservation) -> (f32, f32, f32) {
let landmarks = match face.landmarks() {
Some(l) => l,
None => return (0.0, 1.0, 1.0),
};

let mouth_open = compute_mouth_openness(&landmarks);
let left_eye_open = compute_eye_openness(landmarks.left_eye());
let right_eye_open = compute_eye_openness(landmarks.right_eye());

(mouth_open, left_eye_open, right_eye_open)
}

fn compute_mouth_openness(landmarks: &vn::FaceLandmarks2d) -> f32 {
let inner_lips = match landmarks.inner_lips() {
Some(region) => region,
None => return 0.0,
};

let points = inner_lips.normalized_points();
if points.len() < 6 {
return 0.0;
}

let top = points[2];
let bottom = points[points.len() - 2];
let vertical = (top.y - bottom.y).abs() as f32;

let left = points[0];
let right = points[points.len() / 2];
let horizontal = (right.x - left.x).abs() as f32;

if horizontal < 1e-6 {
return 0.0;
}

let ratio = vertical / horizontal;
(ratio * 3.0).clamp(0.0, 1.0)
}

fn compute_eye_openness(eye_region: Option<arc::R<vn::FaceLandmarkRegion2d>>) -> f32 {
let region = match eye_region {
Some(r) => r,
None => return 1.0,
};

let points = region.normalized_points();
if points.len() < 6 {
return 1.0;
}

let half = points.len() / 2;
let top = points[half / 2];
let bottom = points[half + half / 2];
let vertical = (top.y - bottom.y).abs() as f32;

let left = points[0];
let right = points[half];
let horizontal = (right.x - left.x).abs() as f32;

if horizontal < 1e-6 {
return 1.0;
}

let ratio = vertical / horizontal;
(ratio * 4.0).clamp(0.0, 1.0)
}

unsafe fn rgba_to_bgra_copy(
src: &[u8],
dst: *mut u8,
width: usize,
height: usize,
src_row_bytes: usize,
dst_row_bytes: usize,
) {
for y in 0..height {
let src_row = &src[y * src_row_bytes..];
let dst_row = unsafe { dst.add(y * dst_row_bytes) };
for x in 0..width {
let si = x * 4;
let di = x * 4;
let r = src_row[si];
let g = src_row[si + 1];
let b = src_row[si + 2];
let a = src_row[si + 3];
unsafe {
*dst_row.add(di) = b;
*dst_row.add(di + 1) = g;
*dst_row.add(di + 2) = r;
*dst_row.add(di + 3) = a;
}
}
}
}

unsafe extern "C-unwind" {
Comment on lines +238 to +243
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Unsound transmute to retrieve results from landmarks_request

self.landmarks_request is typed as arc::R<vn::Request> but was constructed as a VNDetectFaceLandmarksRequest (or a DetectFaceRectanglesRequest in the fallback case). The code reinterprets its raw pointer as &vn::DetectFaceRectanglesRequest to call .results():

let face_req: &vn::DetectFaceRectanglesRequest =
    &*(raw as *const vn::DetectFaceRectanglesRequest);
face_req.results()

In the fallback path (create_landmarks_request returns a DetectFaceRectanglesRequest transmuted to vn::Request), this double-cast works. But in the normal path where self.landmarks_request is genuinely a VNDetectFaceLandmarksRequest, calling methods through a DetectFaceRectanglesRequest pointer is technically UB in Rust, even if the Objective-C runtime happens to handle it correctly due to the class hierarchy.

Consider casting to VNDetectFaceLandmarksRequest directly (which is the correct type), or using the Objective-C runtime's objc_msgSend / cidre's typed APIs for results() on the actual class.

Prompt To Fix With AI
This is a comment left during a code review.
Path: crates/face-tracking/src/macos.rs
Line: 235-240

Comment:
**Unsound transmute to retrieve results from `landmarks_request`**

`self.landmarks_request` is typed as `arc::R<vn::Request>` but was constructed as a `VNDetectFaceLandmarksRequest` (or a `DetectFaceRectanglesRequest` in the fallback case). The code reinterprets its raw pointer as `&vn::DetectFaceRectanglesRequest` to call `.results()`:

```rust
let face_req: &vn::DetectFaceRectanglesRequest =
    &*(raw as *const vn::DetectFaceRectanglesRequest);
face_req.results()
```

In the fallback path (`create_landmarks_request` returns a `DetectFaceRectanglesRequest` transmuted to `vn::Request`), this double-cast works. But in the normal path where `self.landmarks_request` is genuinely a `VNDetectFaceLandmarksRequest`, calling methods through a `DetectFaceRectanglesRequest` pointer is technically UB in Rust, even if the Objective-C runtime happens to handle it correctly due to the class hierarchy.

Consider casting to `VNDetectFaceLandmarksRequest` directly (which is the correct type), or using the Objective-C runtime's `objc_msgSend` / cidre's typed APIs for `results()` on the actual class.

How can I resolve this? If you propose a fix, please make it concise.

fn CVPixelBufferGetBaseAddress(pixel_buffer: &cv::PixelBuf) -> *mut c_void;
fn CVPixelBufferGetBytesPerRow(pixel_buffer: &cv::PixelBuf) -> usize;
}
9 changes: 9 additions & 0 deletions crates/project/src/configuration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1182,6 +1182,15 @@ impl ProjectConfiguration {
}
}

#[derive(Type, Serialize, Deserialize, Clone, Copy, Debug, Default)]
#[serde(rename_all = "camelCase")]
pub enum AvatarBackground {
#[default]
Dark,
Light,
Gradient,
}

pub const SLOW_SMOOTHING_SAMPLES: usize = 24;
pub const REGULAR_SMOOTHING_SAMPLES: usize = 16;
pub const FAST_SMOOTHING_SAMPLES: usize = 10;
Expand Down
1 change: 1 addition & 0 deletions crates/rendering/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ workspace = true
[dependencies]
anyhow.workspace = true
bytemuck = { version = "1.7", features = ["derive"] }
cap-face-tracking = { path = "../face-tracking" }
cap-flags = { path = "../flags" }
cap-project = { path = "../project" }
cap-video-decode = { path = "../video-decode" }
Expand Down
Loading