diff --git a/src/main.zig b/src/main.zig index 2655b6f..4ce4c81 100644 --- a/src/main.zig +++ b/src/main.zig @@ -62,6 +62,10 @@ fn fbViewport(win_w: u32, win_h: u32) Viewport { const State = struct { pipeline: sg.Pipeline = .{}, + // Second pipeline whose fragment shader applies the HSV-smooth filter on the + // GPU; selected at draw time when ui.display_hsv is on. Keeping the filter in + // a shader means the NP2kai core needs no video-filter changes. + pipeline_hsv: sg.Pipeline = .{}, bindings: sg.Bindings = .{}, pass_action: sg.PassAction = .{}, image: sg.Image = .{}, @@ -82,7 +86,11 @@ var last_emu_ns: i128 = 0; var skip_counter: u32 = 0; const nowait_frames_per_tick: u32 = 16; var draw_fps: f32 = 0.0; -var last_draw_ns: i128 = 0; +// Windowed FPS: average presented-frame count over a ~0.5s wall-clock window. +// Replaces an instantaneous 1/Δt readout, which swung wildly (into the hundreds) +// whenever the host frame() rate beat against the ~60Hz draw gate. +var fps_draw_count: u32 = 0; +var fps_window_start_ns: i128 = 0; // 音声バッファ(Zig側で変換用に使用) var audio_buffer: [4096 * 2]f32 = undefined; @@ -135,13 +143,11 @@ export fn init() void { // because the reset's diskdrv_hddbind() binds drives from the config. configureHdds(expanded_disks); } - // Always load the HSV-smooth profile into np2cfg before pccore_init() reads - // it into the filter manager, so the Screen menu can toggle it live. The - // --video-filter flag only decides whether it starts on. UsaProject never - // reads np2kai's .cfg, so this is the only path that configures it. - const vf_on = if (parsed_opts) |o| o.video_filter else false; - cz.usa_setup_video_filter(if (vf_on) 1 else 0); - ui.display_hsv = vf_on; + // The HSV-smooth filter now lives in the app layer as a GPU fragment-shader + // pass (state.pipeline_hsv), so the NP2kai core filter stays disabled and + // the core source is left unmodified. The --video-filter flag only decides + // whether the shader filter starts on; the Screen menu toggles it live. + ui.display_hsv = if (parsed_opts) |o| o.video_filter else false; cz.pccore_init(); cz.pccore_reset(); if (parsed_opts) |opts| { @@ -195,16 +201,21 @@ export fn init() void { state.bindings.views[0] = state.view; state.bindings.samplers[0] = state.sampler; + const blit_attrs = init: { + var attrs: [16]sg.VertexAttrState = @splat(.{}); + attrs[0].format = .FLOAT3; + attrs[1].format = .FLOAT2; + break :init attrs; + }; state.pipeline = sg.makePipeline(.{ - .shader = makeBlitShader(), - .layout = .{ - .attrs = init: { - var attrs: [16]sg.VertexAttrState = @splat(.{}); - attrs[0].format = .FLOAT3; - attrs[1].format = .FLOAT2; - break :init attrs; - }, - }, + .shader = makeBlitShader(platform.os.shader_fs_source), + .layout = .{ .attrs = blit_attrs }, + .index_type = .UINT16, + }); + // HSV-smooth variant: same geometry/layout, filtering fragment shader. + state.pipeline_hsv = sg.makePipeline(.{ + .shader = makeBlitShader(platform.os.shader_fs_hsv_source), + .layout = .{ .attrs = blit_attrs }, .index_type = .UINT16, }); @@ -321,14 +332,14 @@ fn setupDataDir() void { config.setDataDir(dir); } -fn makeBlitShader() sg.Shader { +fn makeBlitShader(fs_source: [*:0]const u8) sg.Shader { return sg.makeShader(.{ .vertex_func = .{ .source = platform.os.shader_vs_source, }, .fragment_func = .{ .entry = platform.os.shader_entry, - .source = platform.os.shader_fs_source, + .source = fs_source, }, .attrs = init: { var a: [16]sg.ShaderVertexAttr = @splat(.{}); @@ -390,28 +401,44 @@ export fn frame() void { const draw_skip = cz.usa_get_draw_skip(); var i: u32 = 0; while (i < frames) : (i += 1) { - const should_draw = blk: { - if (draw_skip <= 1) break :blk true; + // Only the final emulated frame of this host frame is ever uploaded + // to the GPU (a single updateImage() after the loop), so only it + // needs the expensive scrndraw_draw() + HSV filter pass. Intermediate + // frames just advance the CPU with draw=false; rendering them — and + // especially running the per-pixel HSV-smooth filter on them — is + // pure waste (16x under No-Wait, up to 4x under catch-up) that + // collapses the frame rate to single digits when the filter is on. + const is_last = (i + 1 == frames); + var should_draw = is_last; + // draw_skip is the user's frame-skip setting; apply it to the one + // presented frame per host tick so heavy load can drop whole frames. + if (is_last and draw_skip > 1) { skip_counter += 1; if (skip_counter >= draw_skip) { skip_counter = 0; - break :blk true; + } else { + should_draw = false; } - break :blk false; - }; + } cz.pccore_exec(should_draw); cz.sound_sync(); if (should_draw) { cz.scrndraw_redraw(); - const draw_dt_ns = now - last_draw_ns; - if (draw_dt_ns > 0) { - draw_fps = @floatCast(1_000_000_000.0 / @as(f64, @floatFromInt(draw_dt_ns))); - } - last_draw_ns = now; + fps_draw_count += 1; } } } + // Publish smoothed FPS once per ~0.5s window. + if (fps_window_start_ns == 0) fps_window_start_ns = now; + const win_ns = now - fps_window_start_ns; + if (win_ns >= 500_000_000) { + const win_s = @as(f64, @floatFromInt(win_ns)) / 1_000_000_000.0; + draw_fps = @floatCast(@as(f64, @floatFromInt(fps_draw_count)) / win_s); + fps_draw_count = 0; + fps_window_start_ns = now; + } + pixel.rgb565BufferToRgba8(&fb_rgba, cz.pc98_framebuffer[0 .. FB_WIDTH * FB_HEIGHT]); var img_data = sg.ImageData{}; @@ -446,7 +473,7 @@ export fn frame() void { }); sg.beginPass(.{ .action = state.pass_action, .swapchain = sglue.swapchain() }); - sg.applyPipeline(state.pipeline); + sg.applyPipeline(if (ui.display_hsv) state.pipeline_hsv else state.pipeline); // Live scaling-filter choice from the Screen menu (nearest vs linear). state.bindings.samplers[0] = if (ui.display_scale_linear) state.sampler_linear else state.sampler; sg.applyBindings(state.bindings); diff --git a/src/platform/linux.zig b/src/platform/linux.zig index 06ac7c7..890e8af 100644 --- a/src/platform/linux.zig +++ b/src/platform/linux.zig @@ -3,6 +3,7 @@ const sapp = @import("sokol").app; pub const shader_vs_source = @embedFile("shaders/blit.vs.glsl"); pub const shader_fs_source = @embedFile("shaders/blit.fs.glsl"); +pub const shader_fs_hsv_source = @embedFile("shaders/blit_hsv.fs.glsl"); pub const shader_entry = "main"; pub const data_dir_template = "{s}/.local/share/{s}"; diff --git a/src/platform/macos.zig b/src/platform/macos.zig index 7c70b5f..4075a9b 100644 --- a/src/platform/macos.zig +++ b/src/platform/macos.zig @@ -3,6 +3,7 @@ const sapp = @import("sokol").app; pub const shader_vs_source = @embedFile("shaders/blit.vs.metal"); pub const shader_fs_source = @embedFile("shaders/blit.fs.metal"); +pub const shader_fs_hsv_source = @embedFile("shaders/blit_hsv.fs.metal"); pub const shader_entry = "_main"; pub const data_dir_template = "{s}/Library/Application Support/{s}"; diff --git a/src/platform/shaders/blit_hsv.fs.glsl b/src/platform/shaders/blit_hsv.fs.glsl new file mode 100644 index 0000000..e8e7c95 --- /dev/null +++ b/src/platform/shaders/blit_hsv.fs.glsl @@ -0,0 +1,76 @@ +#version 330 +// App-layer HSV-smooth filter (replaces NP2kai's core videofilter). For each +// output texel it averages the 3x3 neighbourhood in HSV space, keeping only +// neighbours within a hue/saturation/value tolerance of the centre — a faithful +// GPU port of VideoFilter_HSVSmooth (preset radius=15 -> 3x3, dH=30, dS=30, +// dV=90, weight type 0). Running it on the GPU keeps the emulator core +// unmodified and is effectively free versus the CPU implementation. +uniform sampler2D tex_smp; +in vec2 uv; +out vec4 frag_color; + +const float dHtol = 30.0; +const float dStol = 30.0 / 255.0; +const float dVtol = 90.0 / 255.0; + +// H in [0,360), S and V in [0,1]. +vec3 rgb2hsv(vec3 c) { + float mx = max(c.r, max(c.g, c.b)); + float mn = min(c.r, min(c.g, c.b)); + float d = mx - mn; + float h = 0.0; + if (d > 0.0) { + if (mx == c.r) h = mod((c.g - c.b) / d, 6.0); + else if (mx == c.g) h = (c.b - c.r) / d + 2.0; + else h = (c.r - c.g) / d + 4.0; + h *= 60.0; + if (h < 0.0) h += 360.0; + } + float s = (mx <= 0.0) ? 0.0 : d / mx; + return vec3(h, s, mx); +} + +vec3 hsv2rgb(vec3 c) { + float h = c.x, s = c.y, v = c.z; + float cc = v * s; + float x = cc * (1.0 - abs(mod(h / 60.0, 2.0) - 1.0)); + float m = v - cc; + vec3 rgb; + if (h < 60.0) rgb = vec3(cc, x, 0.0); + else if (h < 120.0) rgb = vec3(x, cc, 0.0); + else if (h < 180.0) rgb = vec3(0.0, cc, x); + else if (h < 240.0) rgb = vec3(0.0, x, cc); + else if (h < 300.0) rgb = vec3(x, 0.0, cc); + else rgb = vec3(cc, 0.0, x); + return rgb + m; +} + +void main() { + vec2 texel = 1.0 / vec2(textureSize(tex_smp, 0)); + vec3 C = rgb2hsv(texture(tex_smp, uv).rgb); + float sumH = 0.0, sumS = 0.0, sumV = 0.0, count = 0.0; + for (int dy = -1; dy <= 1; dy++) { + for (int dx = -1; dx <= 1; dx++) { + vec3 D = rgb2hsv(texture(tex_smp, uv + vec2(float(dx), float(dy)) * texel).rgb); + float dH = D.x - C.x; + if (dH > 180.0) dH -= 360.0; + else if (dH < -180.0) dH += 360.0; + if (D.y <= 0.0) dH = 0.0; + float dS = D.y - C.y; + float dV = D.z - C.z; + bool ok = true; + if (D.z > 0.0 && abs(dH) > dHtol) ok = false; + if (D.z > 0.0 && abs(dS) > dStol) ok = false; + if (abs(dV) > dVtol) ok = false; + if (ok) { + sumH += C.x + dH; + sumS += C.y + dS; + sumV += C.z + dV; + count += 1.0; + } + } + } + float H = mod(sumH / count, 360.0); + if (H < 0.0) H += 360.0; + frag_color = vec4(hsv2rgb(vec3(H, sumS / count, sumV / count)), 1.0); +} diff --git a/src/platform/shaders/blit_hsv.fs.hlsl b/src/platform/shaders/blit_hsv.fs.hlsl new file mode 100644 index 0000000..d7d28a8 --- /dev/null +++ b/src/platform/shaders/blit_hsv.fs.hlsl @@ -0,0 +1,75 @@ +// App-layer HSV-smooth filter (replaces NP2kai's core videofilter). See +// blit_hsv.fs.glsl for the algorithm notes; this is the D3D11/HLSL port. +Texture2D tex : register(t0); +SamplerState smp : register(s0); +struct fs_in { + float2 uv : TEXCOORD0; +}; + +static const float dHtol = 30.0; +static const float dStol = 30.0 / 255.0; +static const float dVtol = 90.0 / 255.0; + +// H in [0,360), S and V in [0,1]. +float3 rgb2hsv(float3 c) { + float mx = max(c.r, max(c.g, c.b)); + float mn = min(c.r, min(c.g, c.b)); + float d = mx - mn; + float h = 0.0; + if (d > 0.0) { + if (mx == c.r) h = fmod((c.g - c.b) / d, 6.0); + else if (mx == c.g) h = (c.b - c.r) / d + 2.0; + else h = (c.r - c.g) / d + 4.0; + h *= 60.0; + if (h < 0.0) h += 360.0; + } + float s = (mx <= 0.0) ? 0.0 : d / mx; + return float3(h, s, mx); +} + +float3 hsv2rgb(float3 c) { + float h = c.x, s = c.y, v = c.z; + float cc = v * s; + float x = cc * (1.0 - abs(fmod(h / 60.0, 2.0) - 1.0)); + float m = v - cc; + float3 rgb; + if (h < 60.0) rgb = float3(cc, x, 0.0); + else if (h < 120.0) rgb = float3(x, cc, 0.0); + else if (h < 180.0) rgb = float3(0.0, cc, x); + else if (h < 240.0) rgb = float3(0.0, x, cc); + else if (h < 300.0) rgb = float3(x, 0.0, cc); + else rgb = float3(cc, 0.0, x); + return rgb + m; +} + +float4 main(fs_in inp) : SV_Target0 { + float tw, th; + tex.GetDimensions(tw, th); + float2 texel = float2(1.0 / tw, 1.0 / th); + float3 C = rgb2hsv(tex.Sample(smp, inp.uv).rgb); + float sumH = 0.0, sumS = 0.0, sumV = 0.0, count = 0.0; + [unroll] for (int dy = -1; dy <= 1; dy++) { + [unroll] for (int dx = -1; dx <= 1; dx++) { + float3 D = rgb2hsv(tex.Sample(smp, inp.uv + float2(dx, dy) * texel).rgb); + float dH = D.x - C.x; + if (dH > 180.0) dH -= 360.0; + else if (dH < -180.0) dH += 360.0; + if (D.y <= 0.0) dH = 0.0; + float dS = D.y - C.y; + float dV = D.z - C.z; + bool ok = true; + if (D.z > 0.0 && abs(dH) > dHtol) ok = false; + if (D.z > 0.0 && abs(dS) > dStol) ok = false; + if (abs(dV) > dVtol) ok = false; + if (ok) { + sumH += C.x + dH; + sumS += C.y + dS; + sumV += C.z + dV; + count += 1.0; + } + } + } + float H = fmod(sumH / count, 360.0); + if (H < 0.0) H += 360.0; + return float4(hsv2rgb(float3(H, sumS / count, sumV / count)), 1.0); +} diff --git a/src/platform/shaders/blit_hsv.fs.metal b/src/platform/shaders/blit_hsv.fs.metal new file mode 100644 index 0000000..3bf867c --- /dev/null +++ b/src/platform/shaders/blit_hsv.fs.metal @@ -0,0 +1,73 @@ +// App-layer HSV-smooth filter (replaces NP2kai's core videofilter). See +// blit_hsv.fs.glsl for the algorithm notes; this is the Metal port. +#include +using namespace metal; +struct fs_in { + float2 uv; +}; + +constant float dHtol = 30.0; +constant float dStol = 30.0 / 255.0; +constant float dVtol = 90.0 / 255.0; + +// H in [0,360), S and V in [0,1]. +static float3 rgb2hsv(float3 c) { + float mx = max(c.r, max(c.g, c.b)); + float mn = min(c.r, min(c.g, c.b)); + float d = mx - mn; + float h = 0.0; + if (d > 0.0) { + if (mx == c.r) h = fmod((c.g - c.b) / d, 6.0); + else if (mx == c.g) h = (c.b - c.r) / d + 2.0; + else h = (c.r - c.g) / d + 4.0; + h *= 60.0; + if (h < 0.0) h += 360.0; + } + float s = (mx <= 0.0) ? 0.0 : d / mx; + return float3(h, s, mx); +} + +static float3 hsv2rgb(float3 c) { + float h = c.x, s = c.y, v = c.z; + float cc = v * s; + float x = cc * (1.0 - abs(fmod(h / 60.0, 2.0) - 1.0)); + float m = v - cc; + float3 rgb; + if (h < 60.0) rgb = float3(cc, x, 0.0); + else if (h < 120.0) rgb = float3(x, cc, 0.0); + else if (h < 180.0) rgb = float3(0.0, cc, x); + else if (h < 240.0) rgb = float3(0.0, x, cc); + else if (h < 300.0) rgb = float3(x, 0.0, cc); + else rgb = float3(cc, 0.0, x); + return rgb + m; +} + +fragment float4 _main(fs_in in [[stage_in]], texture2d tex [[texture(0)]], sampler smp [[sampler(0)]]) { + float2 texel = float2(1.0 / tex.get_width(), 1.0 / tex.get_height()); + float3 C = rgb2hsv(tex.sample(smp, in.uv).rgb); + float sumH = 0.0, sumS = 0.0, sumV = 0.0, count = 0.0; + for (int dy = -1; dy <= 1; dy++) { + for (int dx = -1; dx <= 1; dx++) { + float3 D = rgb2hsv(tex.sample(smp, in.uv + float2(dx, dy) * texel).rgb); + float dH = D.x - C.x; + if (dH > 180.0) dH -= 360.0; + else if (dH < -180.0) dH += 360.0; + if (D.y <= 0.0) dH = 0.0; + float dS = D.y - C.y; + float dV = D.z - C.z; + bool ok = true; + if (D.z > 0.0 && abs(dH) > dHtol) ok = false; + if (D.z > 0.0 && abs(dS) > dStol) ok = false; + if (abs(dV) > dVtol) ok = false; + if (ok) { + sumH += C.x + dH; + sumS += C.y + dS; + sumV += C.z + dV; + count += 1.0; + } + } + } + float H = fmod(sumH / count, 360.0); + if (H < 0.0) H += 360.0; + return float4(hsv2rgb(float3(H, sumS / count, sumV / count)), 1.0); +} diff --git a/src/platform/windows.zig b/src/platform/windows.zig index 12e5d2f..5059259 100644 --- a/src/platform/windows.zig +++ b/src/platform/windows.zig @@ -3,6 +3,7 @@ const sapp = @import("sokol").app; pub const shader_vs_source = @embedFile("shaders/blit.vs.hlsl"); pub const shader_fs_source = @embedFile("shaders/blit.fs.hlsl"); +pub const shader_fs_hsv_source = @embedFile("shaders/blit_hsv.fs.hlsl"); pub const shader_entry = "main"; // Backslashes (not forward slashes): the path is handed to `explorer` to diff --git a/src/ui.zig b/src/ui.zig index 0e11a3d..4ee66fd 100644 --- a/src/ui.zig +++ b/src/ui.zig @@ -405,14 +405,11 @@ fn menuScreen(ctx: *c.nk_context) void { var linear_v: c_int = if (display_scale_linear) 1 else 0; _ = c.nk_checkbox_label(ctx, "Smooth Scaling", &linear_v); display_scale_linear = linear_v != 0; - // HSV Filter — NP2kai HSV-smooth on the emulated output; toggled live. + // HSV Filter — app-layer HSV-smooth, applied as a GPU shader pass in + // main.frame() when display_hsv is set. No core-side call needed. var hsv_v: c_int = if (display_hsv) 1 else 0; _ = c.nk_checkbox_label(ctx, "HSV Filter", &hsv_v); - const hsv_new = hsv_v != 0; - if (hsv_new != display_hsv) { - display_hsv = hsv_new; - cz.usa_set_video_filter(if (hsv_new) 1 else 0); - } + display_hsv = hsv_v != 0; c.nk_menu_end(ctx); } }