Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 57 additions & 30 deletions src/main.zig
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ fn fbViewport(win_w: u32, win_h: u32) Viewport {

const State = struct {
pipeline: sg.Pipeline = .{},
// Second pipeline whose fragment shader applies the HSV-smooth filter on the
// GPU; selected at draw time when ui.display_hsv is on. Keeping the filter in
// a shader means the NP2kai core needs no video-filter changes.
pipeline_hsv: sg.Pipeline = .{},
bindings: sg.Bindings = .{},
pass_action: sg.PassAction = .{},
image: sg.Image = .{},
Expand All @@ -82,7 +86,11 @@ var last_emu_ns: i128 = 0;
var skip_counter: u32 = 0;
const nowait_frames_per_tick: u32 = 16;
var draw_fps: f32 = 0.0;
var last_draw_ns: i128 = 0;
// Windowed FPS: average presented-frame count over a ~0.5s wall-clock window.
// Replaces an instantaneous 1/Δt readout, which swung wildly (into the hundreds)
// whenever the host frame() rate beat against the ~60Hz draw gate.
var fps_draw_count: u32 = 0;
var fps_window_start_ns: i128 = 0;

// 音声バッファ(Zig側で変換用に使用)
var audio_buffer: [4096 * 2]f32 = undefined;
Expand Down Expand Up @@ -135,13 +143,11 @@ export fn init() void {
// because the reset's diskdrv_hddbind() binds drives from the config.
configureHdds(expanded_disks);
}
// Always load the HSV-smooth profile into np2cfg before pccore_init() reads
// it into the filter manager, so the Screen menu can toggle it live. The
// --video-filter flag only decides whether it starts on. UsaProject never
// reads np2kai's .cfg, so this is the only path that configures it.
const vf_on = if (parsed_opts) |o| o.video_filter else false;
cz.usa_setup_video_filter(if (vf_on) 1 else 0);
ui.display_hsv = vf_on;
// The HSV-smooth filter now lives in the app layer as a GPU fragment-shader
// pass (state.pipeline_hsv), so the NP2kai core filter stays disabled and
// the core source is left unmodified. The --video-filter flag only decides
// whether the shader filter starts on; the Screen menu toggles it live.
ui.display_hsv = if (parsed_opts) |o| o.video_filter else false;
cz.pccore_init();
cz.pccore_reset();
if (parsed_opts) |opts| {
Expand Down Expand Up @@ -195,16 +201,21 @@ export fn init() void {
state.bindings.views[0] = state.view;
state.bindings.samplers[0] = state.sampler;

const blit_attrs = init: {
var attrs: [16]sg.VertexAttrState = @splat(.{});
attrs[0].format = .FLOAT3;
attrs[1].format = .FLOAT2;
break :init attrs;
};
state.pipeline = sg.makePipeline(.{
.shader = makeBlitShader(),
.layout = .{
.attrs = init: {
var attrs: [16]sg.VertexAttrState = @splat(.{});
attrs[0].format = .FLOAT3;
attrs[1].format = .FLOAT2;
break :init attrs;
},
},
.shader = makeBlitShader(platform.os.shader_fs_source),
.layout = .{ .attrs = blit_attrs },
.index_type = .UINT16,
});
// HSV-smooth variant: same geometry/layout, filtering fragment shader.
state.pipeline_hsv = sg.makePipeline(.{
.shader = makeBlitShader(platform.os.shader_fs_hsv_source),
.layout = .{ .attrs = blit_attrs },
.index_type = .UINT16,
});

Expand Down Expand Up @@ -321,14 +332,14 @@ fn setupDataDir() void {
config.setDataDir(dir);
}

fn makeBlitShader() sg.Shader {
fn makeBlitShader(fs_source: [*:0]const u8) sg.Shader {
return sg.makeShader(.{
.vertex_func = .{
.source = platform.os.shader_vs_source,
},
.fragment_func = .{
.entry = platform.os.shader_entry,
.source = platform.os.shader_fs_source,
.source = fs_source,
},
.attrs = init: {
var a: [16]sg.ShaderVertexAttr = @splat(.{});
Expand Down Expand Up @@ -390,28 +401,44 @@ export fn frame() void {
const draw_skip = cz.usa_get_draw_skip();
var i: u32 = 0;
while (i < frames) : (i += 1) {
const should_draw = blk: {
if (draw_skip <= 1) break :blk true;
// Only the final emulated frame of this host frame is ever uploaded
// to the GPU (a single updateImage() after the loop), so only it
// needs the expensive scrndraw_draw() + HSV filter pass. Intermediate
// frames just advance the CPU with draw=false; rendering them — and
// especially running the per-pixel HSV-smooth filter on them — is
// pure waste (16x under No-Wait, up to 4x under catch-up) that
// collapses the frame rate to single digits when the filter is on.
const is_last = (i + 1 == frames);
var should_draw = is_last;
// draw_skip is the user's frame-skip setting; apply it to the one
// presented frame per host tick so heavy load can drop whole frames.
if (is_last and draw_skip > 1) {
skip_counter += 1;
if (skip_counter >= draw_skip) {
skip_counter = 0;
break :blk true;
} else {
should_draw = false;
}
break :blk false;
};
}
cz.pccore_exec(should_draw);
cz.sound_sync();
if (should_draw) {
cz.scrndraw_redraw();
const draw_dt_ns = now - last_draw_ns;
if (draw_dt_ns > 0) {
draw_fps = @floatCast(1_000_000_000.0 / @as(f64, @floatFromInt(draw_dt_ns)));
}
last_draw_ns = now;
fps_draw_count += 1;
}
}
}

// Publish smoothed FPS once per ~0.5s window.
if (fps_window_start_ns == 0) fps_window_start_ns = now;
const win_ns = now - fps_window_start_ns;
if (win_ns >= 500_000_000) {
const win_s = @as(f64, @floatFromInt(win_ns)) / 1_000_000_000.0;
draw_fps = @floatCast(@as(f64, @floatFromInt(fps_draw_count)) / win_s);
fps_draw_count = 0;
fps_window_start_ns = now;
}

pixel.rgb565BufferToRgba8(&fb_rgba, cz.pc98_framebuffer[0 .. FB_WIDTH * FB_HEIGHT]);

var img_data = sg.ImageData{};
Expand Down Expand Up @@ -446,7 +473,7 @@ export fn frame() void {
});

sg.beginPass(.{ .action = state.pass_action, .swapchain = sglue.swapchain() });
sg.applyPipeline(state.pipeline);
sg.applyPipeline(if (ui.display_hsv) state.pipeline_hsv else state.pipeline);
// Live scaling-filter choice from the Screen menu (nearest vs linear).
state.bindings.samplers[0] = if (ui.display_scale_linear) state.sampler_linear else state.sampler;
sg.applyBindings(state.bindings);
Expand Down
1 change: 1 addition & 0 deletions src/platform/linux.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const sapp = @import("sokol").app;

pub const shader_vs_source = @embedFile("shaders/blit.vs.glsl");
pub const shader_fs_source = @embedFile("shaders/blit.fs.glsl");
pub const shader_fs_hsv_source = @embedFile("shaders/blit_hsv.fs.glsl");
pub const shader_entry = "main";

pub const data_dir_template = "{s}/.local/share/{s}";
Expand Down
1 change: 1 addition & 0 deletions src/platform/macos.zig
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ const sapp = @import("sokol").app;

pub const shader_vs_source = @embedFile("shaders/blit.vs.metal");
pub const shader_fs_source = @embedFile("shaders/blit.fs.metal");
pub const shader_fs_hsv_source = @embedFile("shaders/blit_hsv.fs.metal");
pub const shader_entry = "_main";

pub const data_dir_template = "{s}/Library/Application Support/{s}";
Expand Down
76 changes: 76 additions & 0 deletions src/platform/shaders/blit_hsv.fs.glsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
#version 330
// App-layer HSV-smooth filter (replaces NP2kai's core videofilter). For each
// output texel it averages the 3x3 neighbourhood in HSV space, keeping only
// neighbours within a hue/saturation/value tolerance of the centre — a faithful
// GPU port of VideoFilter_HSVSmooth (preset radius=15 -> 3x3, dH=30, dS=30,
// dV=90, weight type 0). Running it on the GPU keeps the emulator core
// unmodified and is effectively free versus the CPU implementation.
uniform sampler2D tex_smp;
in vec2 uv;
out vec4 frag_color;

const float dHtol = 30.0;
const float dStol = 30.0 / 255.0;
const float dVtol = 90.0 / 255.0;

// H in [0,360), S and V in [0,1].
vec3 rgb2hsv(vec3 c) {
float mx = max(c.r, max(c.g, c.b));
float mn = min(c.r, min(c.g, c.b));
float d = mx - mn;
float h = 0.0;
if (d > 0.0) {
if (mx == c.r) h = mod((c.g - c.b) / d, 6.0);
else if (mx == c.g) h = (c.b - c.r) / d + 2.0;
else h = (c.r - c.g) / d + 4.0;
h *= 60.0;
if (h < 0.0) h += 360.0;
}
float s = (mx <= 0.0) ? 0.0 : d / mx;
return vec3(h, s, mx);
}

vec3 hsv2rgb(vec3 c) {
float h = c.x, s = c.y, v = c.z;
float cc = v * s;
float x = cc * (1.0 - abs(mod(h / 60.0, 2.0) - 1.0));
float m = v - cc;
vec3 rgb;
if (h < 60.0) rgb = vec3(cc, x, 0.0);
else if (h < 120.0) rgb = vec3(x, cc, 0.0);
else if (h < 180.0) rgb = vec3(0.0, cc, x);
else if (h < 240.0) rgb = vec3(0.0, x, cc);
else if (h < 300.0) rgb = vec3(x, 0.0, cc);
else rgb = vec3(cc, 0.0, x);
return rgb + m;
}

void main() {
vec2 texel = 1.0 / vec2(textureSize(tex_smp, 0));
vec3 C = rgb2hsv(texture(tex_smp, uv).rgb);
float sumH = 0.0, sumS = 0.0, sumV = 0.0, count = 0.0;
for (int dy = -1; dy <= 1; dy++) {
for (int dx = -1; dx <= 1; dx++) {
vec3 D = rgb2hsv(texture(tex_smp, uv + vec2(float(dx), float(dy)) * texel).rgb);
float dH = D.x - C.x;
if (dH > 180.0) dH -= 360.0;
else if (dH < -180.0) dH += 360.0;
if (D.y <= 0.0) dH = 0.0;
float dS = D.y - C.y;
float dV = D.z - C.z;
bool ok = true;
if (D.z > 0.0 && abs(dH) > dHtol) ok = false;
if (D.z > 0.0 && abs(dS) > dStol) ok = false;
if (abs(dV) > dVtol) ok = false;
if (ok) {
sumH += C.x + dH;
sumS += C.y + dS;
sumV += C.z + dV;
count += 1.0;
}
}
}
float H = mod(sumH / count, 360.0);
if (H < 0.0) H += 360.0;
frag_color = vec4(hsv2rgb(vec3(H, sumS / count, sumV / count)), 1.0);
}
75 changes: 75 additions & 0 deletions src/platform/shaders/blit_hsv.fs.hlsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// App-layer HSV-smooth filter (replaces NP2kai's core videofilter). See
// blit_hsv.fs.glsl for the algorithm notes; this is the D3D11/HLSL port.
Texture2D<float4> tex : register(t0);
SamplerState smp : register(s0);
struct fs_in {
float2 uv : TEXCOORD0;
};

static const float dHtol = 30.0;
static const float dStol = 30.0 / 255.0;
static const float dVtol = 90.0 / 255.0;

// H in [0,360), S and V in [0,1].
float3 rgb2hsv(float3 c) {
float mx = max(c.r, max(c.g, c.b));
float mn = min(c.r, min(c.g, c.b));
float d = mx - mn;
float h = 0.0;
if (d > 0.0) {
if (mx == c.r) h = fmod((c.g - c.b) / d, 6.0);
else if (mx == c.g) h = (c.b - c.r) / d + 2.0;
else h = (c.r - c.g) / d + 4.0;
h *= 60.0;
if (h < 0.0) h += 360.0;
}
float s = (mx <= 0.0) ? 0.0 : d / mx;
return float3(h, s, mx);
}

float3 hsv2rgb(float3 c) {
float h = c.x, s = c.y, v = c.z;
float cc = v * s;
float x = cc * (1.0 - abs(fmod(h / 60.0, 2.0) - 1.0));
float m = v - cc;
float3 rgb;
if (h < 60.0) rgb = float3(cc, x, 0.0);
else if (h < 120.0) rgb = float3(x, cc, 0.0);
else if (h < 180.0) rgb = float3(0.0, cc, x);
else if (h < 240.0) rgb = float3(0.0, x, cc);
else if (h < 300.0) rgb = float3(x, 0.0, cc);
else rgb = float3(cc, 0.0, x);
return rgb + m;
}

float4 main(fs_in inp) : SV_Target0 {
float tw, th;
tex.GetDimensions(tw, th);
float2 texel = float2(1.0 / tw, 1.0 / th);
float3 C = rgb2hsv(tex.Sample(smp, inp.uv).rgb);
float sumH = 0.0, sumS = 0.0, sumV = 0.0, count = 0.0;
[unroll] for (int dy = -1; dy <= 1; dy++) {
[unroll] for (int dx = -1; dx <= 1; dx++) {
float3 D = rgb2hsv(tex.Sample(smp, inp.uv + float2(dx, dy) * texel).rgb);
float dH = D.x - C.x;
if (dH > 180.0) dH -= 360.0;
else if (dH < -180.0) dH += 360.0;
if (D.y <= 0.0) dH = 0.0;
float dS = D.y - C.y;
float dV = D.z - C.z;
bool ok = true;
if (D.z > 0.0 && abs(dH) > dHtol) ok = false;
if (D.z > 0.0 && abs(dS) > dStol) ok = false;
if (abs(dV) > dVtol) ok = false;
if (ok) {
sumH += C.x + dH;
sumS += C.y + dS;
sumV += C.z + dV;
count += 1.0;
}
}
}
float H = fmod(sumH / count, 360.0);
if (H < 0.0) H += 360.0;
return float4(hsv2rgb(float3(H, sumS / count, sumV / count)), 1.0);
}
73 changes: 73 additions & 0 deletions src/platform/shaders/blit_hsv.fs.metal
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// App-layer HSV-smooth filter (replaces NP2kai's core videofilter). See
// blit_hsv.fs.glsl for the algorithm notes; this is the Metal port.
#include <metal_stdlib>
using namespace metal;
struct fs_in {
float2 uv;
};

constant float dHtol = 30.0;
constant float dStol = 30.0 / 255.0;
constant float dVtol = 90.0 / 255.0;

// H in [0,360), S and V in [0,1].
static float3 rgb2hsv(float3 c) {
float mx = max(c.r, max(c.g, c.b));
float mn = min(c.r, min(c.g, c.b));
float d = mx - mn;
float h = 0.0;
if (d > 0.0) {
if (mx == c.r) h = fmod((c.g - c.b) / d, 6.0);
else if (mx == c.g) h = (c.b - c.r) / d + 2.0;
else h = (c.r - c.g) / d + 4.0;
h *= 60.0;
if (h < 0.0) h += 360.0;
}
float s = (mx <= 0.0) ? 0.0 : d / mx;
return float3(h, s, mx);
}

static float3 hsv2rgb(float3 c) {
float h = c.x, s = c.y, v = c.z;
float cc = v * s;
float x = cc * (1.0 - abs(fmod(h / 60.0, 2.0) - 1.0));
float m = v - cc;
float3 rgb;
if (h < 60.0) rgb = float3(cc, x, 0.0);
else if (h < 120.0) rgb = float3(x, cc, 0.0);
else if (h < 180.0) rgb = float3(0.0, cc, x);
else if (h < 240.0) rgb = float3(0.0, x, cc);
else if (h < 300.0) rgb = float3(x, 0.0, cc);
else rgb = float3(cc, 0.0, x);
return rgb + m;
}

fragment float4 _main(fs_in in [[stage_in]], texture2d<float> tex [[texture(0)]], sampler smp [[sampler(0)]]) {
float2 texel = float2(1.0 / tex.get_width(), 1.0 / tex.get_height());
float3 C = rgb2hsv(tex.sample(smp, in.uv).rgb);
float sumH = 0.0, sumS = 0.0, sumV = 0.0, count = 0.0;
for (int dy = -1; dy <= 1; dy++) {
for (int dx = -1; dx <= 1; dx++) {
float3 D = rgb2hsv(tex.sample(smp, in.uv + float2(dx, dy) * texel).rgb);
float dH = D.x - C.x;
if (dH > 180.0) dH -= 360.0;
else if (dH < -180.0) dH += 360.0;
if (D.y <= 0.0) dH = 0.0;
float dS = D.y - C.y;
float dV = D.z - C.z;
bool ok = true;
if (D.z > 0.0 && abs(dH) > dHtol) ok = false;
if (D.z > 0.0 && abs(dS) > dStol) ok = false;
if (abs(dV) > dVtol) ok = false;
if (ok) {
sumH += C.x + dH;
sumS += C.y + dS;
sumV += C.z + dV;
count += 1.0;
}
}
}
float H = fmod(sumH / count, 360.0);
if (H < 0.0) H += 360.0;
return float4(hsv2rgb(float3(H, sumS / count, sumV / count)), 1.0);
}
Loading
Loading