From de01d228881797a1460d31fc676ab5034f59982d Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Tue, 9 Jun 2026 12:44:14 -0700 Subject: [PATCH 1/3] Implement FFT linear convolution --- NAM/dsp.cpp | 255 ++++++++++++++++++++++++++++++++++++- NAM/dsp.h | 41 +++++- tools/CMakeLists.txt | 49 +++---- tools/bench_linear.cpp | 201 +++++++++++++++++++++++++++++ tools/run_tests.cpp | 6 + tools/test/test_linear.cpp | 136 ++++++++++++++++++++ 6 files changed, 662 insertions(+), 26 deletions(-) create mode 100644 tools/bench_linear.cpp create mode 100644 tools/test/test_linear.cpp diff --git a/NAM/dsp.cpp b/NAM/dsp.cpp index e975001b..dd193293 100644 --- a/NAM/dsp.cpp +++ b/NAM/dsp.cpp @@ -1,5 +1,7 @@ #include // std::max_element +#include #include // pow, tanh, expf +#include #include #include #include @@ -11,6 +13,8 @@ #include "dsp.h" #include "registry.h" +#include + #define tanh_impl_ std::tanh // #define tanh_impl_ fast_tanh_ @@ -252,23 +256,163 @@ void nam::Buffer::_advance_input_buffer_(const int num_frames) // Linear ===================================================================== +namespace +{ +constexpr int _LINEAR_AUTO_DIRECT_MAX_TAPS = 256; +constexpr int _LINEAR_FFT_SMALL_BLOCK_SIZE = 256; +constexpr int _LINEAR_FFT_MEDIUM_BLOCK_SIZE = 512; +constexpr int _LINEAR_FFT_LARGE_BLOCK_SIZE = 1024; + +int _ceil_div(const int numerator, const int denominator) +{ + return (numerator + denominator - 1) / denominator; +} + +int _choose_linear_fft_block_size(const int receptive_field) +{ + if (receptive_field <= 2048) + return _LINEAR_FFT_SMALL_BLOCK_SIZE; + if (receptive_field <= 8192) + return _LINEAR_FFT_MEDIUM_BLOCK_SIZE; + return _LINEAR_FFT_LARGE_BLOCK_SIZE; +} + +} // namespace + +struct nam::LinearFFTState +{ + using Complex = std::complex; + + struct ChannelState + { + std::vector input_time; + std::vector> input_spectra; + std::vector output_ring; + int input_pos = 0; + int spectrum_write_index = 0; + }; + + Eigen::FFT fft; + int block_size = 0; + int fft_size = 0; + int direct_taps = 0; + int num_partitions = 0; + int output_ring_size = 0; + long long sample_index = 0; + std::vector> kernel_spectra; + std::vector channels; + std::vector accumulator; + std::vector ifft_time; +}; + nam::Linear::Linear(const int in_channels, const int out_channels, const int receptive_field, const bool _bias, - const std::vector& weights, const double expected_sample_rate) + const std::vector& weights, const double expected_sample_rate, + const LinearImplementation implementation) : nam::Buffer(in_channels, out_channels, receptive_field, expected_sample_rate) +, _requested_implementation(implementation) +, _active_implementation(LinearImplementation::Direct) { if ((int)weights.size() != (receptive_field + (_bias ? 1 : 0))) throw std::runtime_error( "Params vector does not match expected size based " "on architecture parameters"); + this->_impulse_response.assign(weights.begin(), weights.begin() + receptive_field); this->_weight.resize(this->_receptive_field); // Pass in in reverse order so that dot products work out of the box. for (int i = 0; i < this->_receptive_field; i++) this->_weight(i) = weights[receptive_field - 1 - i]; this->_bias = _bias ? weights[receptive_field] : (float)0.0; + + this->_configure_implementation(); } +nam::Linear::~Linear() = default; + void nam::Linear::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) +{ + if (this->_active_implementation == LinearImplementation::FFT) + this->_process_fft(input, output, num_frames); + else + this->_process_direct(input, output, num_frames); +} + +void nam::Linear::SetMaxBufferSize(const int maxBufferSize) +{ + nam::Buffer::SetMaxBufferSize(maxBufferSize); + this->_configure_implementation(); +} + +void nam::Linear::_configure_implementation() +{ + if (this->_requested_implementation == LinearImplementation::Direct) + this->_active_implementation = LinearImplementation::Direct; + else if (this->_requested_implementation == LinearImplementation::FFT) + this->_active_implementation = LinearImplementation::FFT; + else + this->_active_implementation = + this->_receptive_field <= _LINEAR_AUTO_DIRECT_MAX_TAPS ? LinearImplementation::Direct : LinearImplementation::FFT; + + if (this->_active_implementation == LinearImplementation::FFT) + this->_configure_fft_state(); + else + this->_fft_state.reset(); +} + +void nam::Linear::_configure_fft_state() +{ + this->_fft_state = std::make_unique(); + auto& state = *this->_fft_state; + + state.block_size = _choose_linear_fft_block_size(this->_receptive_field); + state.fft_size = 2 * state.block_size; + state.direct_taps = std::min(this->_receptive_field, state.block_size); + state.num_partitions = this->_receptive_field > state.direct_taps + ? _ceil_div(this->_receptive_field - state.direct_taps, state.block_size) + : 0; + state.output_ring_size = 4 * state.block_size; + state.sample_index = 0; + + this->_fft_direct_weight.resize(state.direct_taps); + for (int i = 0; i < state.direct_taps; i++) + this->_fft_direct_weight(i) = this->_impulse_response[state.direct_taps - 1 - i]; + + state.kernel_spectra.assign(state.num_partitions, std::vector(state.fft_size)); + std::vector kernel_time(state.fft_size, 0.0f); + for (int partition = 0; partition < state.num_partitions; partition++) + { + std::fill(kernel_time.begin(), kernel_time.end(), 0.0f); + const int start = state.direct_taps + partition * state.block_size; + const int partition_size = std::min(state.block_size, this->_receptive_field - start); + for (int i = 0; i < partition_size; i++) + kernel_time[i] = this->_impulse_response[start + i]; + state.fft.fwd(state.kernel_spectra[partition].data(), kernel_time.data(), state.fft_size); + } + + const int channels_to_process = std::min(NumInputChannels(), NumOutputChannels()); + state.channels.resize(channels_to_process); + for (auto& channel : state.channels) + { + channel.input_time.assign(state.fft_size, 0.0f); + channel.input_spectra.assign( + state.num_partitions, std::vector(state.fft_size, LinearFFTState::Complex{})); + channel.output_ring.assign(state.output_ring_size, 0.0f); + channel.input_pos = 0; + channel.spectrum_write_index = 0; + } + state.accumulator.assign(state.fft_size, LinearFFTState::Complex{}); + state.ifft_time.assign(state.fft_size, 0.0f); + + if (state.num_partitions > 0) + { + std::vector warm_spectrum(state.fft_size); + std::vector warm_time(state.fft_size, 0.0f); + state.fft.fwd(warm_spectrum.data(), warm_time.data(), state.fft_size); + state.fft.inv(warm_time.data(), warm_spectrum.data(), state.fft_size); + } +} + +void nam::Linear::_process_direct(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) { this->nam::Buffer::_update_buffers_(input, num_frames); @@ -301,6 +445,111 @@ void nam::Linear::process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num nam::Buffer::_advance_input_buffer_(num_frames); } +void nam::Linear::_process_fft(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) +{ + this->nam::Buffer::_update_buffers_(input, num_frames); + + const int in_channels = NumInputChannels(); + const int out_channels = NumOutputChannels(); + const int channels_to_process = std::min(in_channels, out_channels); + auto& state = *this->_fft_state; + const int direct_taps = state.direct_taps; + + for (int i = 0; i < num_frames; i++) + { + const long direct_offset = this->_input_buffer_offset - direct_taps + i + 1; + for (int ch = 0; ch < channels_to_process; ch++) + { + const int ring_index = (int)(state.sample_index % state.output_ring_size); + const float tail = state.channels[ch].output_ring[ring_index]; + state.channels[ch].output_ring[ring_index] = 0.0f; + + auto input_vec = Eigen::Map(&this->_input_buffers[ch][direct_offset], direct_taps); + output[ch][i] = this->_bias + this->_fft_direct_weight.dot(input_vec) + tail; + + if (state.num_partitions > 0) + { + auto& channel = state.channels[ch]; + channel.input_time[channel.input_pos] = (float)input[ch][i]; + channel.input_pos++; + if (channel.input_pos == state.block_size) + this->_run_fft_block(ch); + } + } + + for (int ch = channels_to_process; ch < out_channels; ch++) + output[ch][i] = (NAM_SAMPLE)0.0; + + state.sample_index++; + } + + nam::Buffer::_advance_input_buffer_(num_frames); +} + +void nam::Linear::_run_fft_block(const int channel_index) +{ + auto& state = *this->_fft_state; + auto& channel = state.channels[channel_index]; + + auto& current_spectrum = channel.input_spectra[channel.spectrum_write_index]; + state.fft.fwd(current_spectrum.data(), channel.input_time.data(), state.fft_size); + + std::fill(state.accumulator.begin(), state.accumulator.end(), LinearFFTState::Complex{}); + for (int partition = 0; partition < state.num_partitions; partition++) + { + int input_spectrum_index = channel.spectrum_write_index - partition; + if (input_spectrum_index < 0) + input_spectrum_index += state.num_partitions; + const auto& input_spectrum = channel.input_spectra[input_spectrum_index]; + const auto& kernel_spectrum = state.kernel_spectra[partition]; + for (int bin = 0; bin < state.fft_size; bin++) + state.accumulator[bin] += input_spectrum[bin] * kernel_spectrum[bin]; + } + + state.fft.inv(state.ifft_time.data(), state.accumulator.data(), state.fft_size); + + const long long block_start = state.sample_index - state.block_size + 1; + const long long output_start = block_start + state.direct_taps; + auto& output_ring = channel.output_ring; + for (int i = 0; i < state.fft_size - 1; i++) + { + const int ring_index = (int)((output_start + i) % state.output_ring_size); + output_ring[ring_index] += state.ifft_time[i]; + } + + std::fill(channel.input_time.begin(), channel.input_time.begin() + state.block_size, 0.0f); + channel.input_pos = 0; + channel.spectrum_write_index++; + if (channel.spectrum_write_index == state.num_partitions) + channel.spectrum_write_index = 0; +} + +nam::LinearImplementation nam::linear::parse_implementation(const std::string& implementation) +{ + std::string normalized = implementation; + std::transform( + normalized.begin(), normalized.end(), normalized.begin(), [](unsigned char c) { return (char)std::tolower(c); }); + + if (normalized == "auto") + return LinearImplementation::Auto; + if (normalized == "direct" || normalized == "legacy" || normalized == "old") + return LinearImplementation::Direct; + if (normalized == "fft" || normalized == "partitioned_fft" || normalized == "partitioned-fft") + return LinearImplementation::FFT; + throw std::runtime_error("Unsupported Linear implementation: " + implementation); +} + +std::string nam::linear::implementation_to_string(const LinearImplementation implementation) +{ + switch (implementation) + { + case LinearImplementation::Auto: return "auto"; + case LinearImplementation::Direct: return "direct"; + case LinearImplementation::FFT: return "fft"; + } + throw std::runtime_error("Unsupported Linear implementation enum"); +} + // Config parser nam::linear::LinearConfig nam::linear::parse_config_json(const nlohmann::json& config) { @@ -310,13 +559,15 @@ nam::linear::LinearConfig nam::linear::parse_config_json(const nlohmann::json& c // Default to 1 channel in/out for backward compatibility c.in_channels = config.value("in_channels", 1); c.out_channels = config.value("out_channels", 1); + c.implementation = parse_implementation(config.value("implementation", "auto")); return c; } // LinearConfig::create() std::unique_ptr nam::linear::LinearConfig::create(std::vector weights, double sampleRate) { - return std::make_unique(in_channels, out_channels, receptive_field, bias, weights, sampleRate); + return std::make_unique( + in_channels, out_channels, receptive_field, bias, weights, sampleRate, implementation); } // Config parser for ConfigParserRegistry diff --git a/NAM/dsp.h b/NAM/dsp.h index 1fadcf70..4f28541e 100644 --- a/NAM/dsp.h +++ b/NAM/dsp.h @@ -39,6 +39,15 @@ namespace wavenet class WaveNet; } // namespace wavenet +struct LinearFFTState; + +/// \brief Selects the convolution engine used by Linear models. +enum class LinearImplementation +{ + Auto, ///< Choose direct or FFT convolution from the impulse-response length. + Direct, ///< Legacy per-sample direct convolution. + FFT ///< Zero-latency partitioned FFT convolution. +}; /// \brief Base class for all DSP models /// @@ -253,8 +262,12 @@ class Linear : public Buffer /// \param _bias Whether to use bias /// \param weights Model weights (impulse response coefficients) /// \param expected_sample_rate Expected sample rate in Hz (-1.0 if unknown) + /// \param implementation Convolution implementation to use Linear(const int in_channels, const int out_channels, const int receptive_field, const bool _bias, - const std::vector& weights, const double expected_sample_rate = -1.0); + const std::vector& weights, const double expected_sample_rate = -1.0, + const LinearImplementation implementation = LinearImplementation::Auto); + + ~Linear() override; /// \brief Process audio frames /// \param input Input audio buffers @@ -262,9 +275,28 @@ class Linear : public Buffer /// \param num_frames Number of frames to process void process(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames) override; + LinearImplementation GetRequestedImplementation() const { return _requested_implementation; } + LinearImplementation GetActiveImplementation() const { return _active_implementation; } + +protected: + void SetMaxBufferSize(const int maxBufferSize) override; + protected: Eigen::VectorXf _weight; + Eigen::VectorXf _fft_direct_weight; float _bias; + +private: + std::vector _impulse_response; + LinearImplementation _requested_implementation; + LinearImplementation _active_implementation; + std::unique_ptr _fft_state; + + void _configure_implementation(); + void _configure_fft_state(); + void _process_direct(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames); + void _process_fft(NAM_SAMPLE** input, NAM_SAMPLE** output, const int num_frames); + void _run_fft_block(const int channel); }; namespace linear @@ -277,10 +309,17 @@ struct LinearConfig : public ModelConfig bool bias; int in_channels; int out_channels; + LinearImplementation implementation = LinearImplementation::Auto; std::unique_ptr create(std::vector weights, double sampleRate) override; }; +/// \brief Parse a Linear implementation string. +LinearImplementation parse_implementation(const std::string& implementation); + +/// \brief String name for a Linear implementation. +std::string implementation_to_string(const LinearImplementation implementation); + /// \brief Parse Linear configuration from JSON /// \param config JSON configuration object /// \return LinearConfig diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 492fb676..5e3faf1e 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -3,7 +3,7 @@ file(GLOB NAM_SOURCES_SUB "${CMAKE_CURRENT_SOURCE_DIR}/../NAM/*/*.cpp") set(NAM_SOURCES ${NAM_SOURCES_TOP} ${NAM_SOURCES_SUB}) # TODO: add loadmodel and run_tests to TOOLS? -set(TOOLS benchmodel) +set(TOOLS benchmodel bench_linear) add_custom_target(tools ALL DEPENDS ${TOOLS} render) @@ -18,6 +18,7 @@ include_directories(tools ${AUDIO_DSP_TOOLS_DIR}/dsp) add_executable(loadmodel loadmodel.cpp ${NAM_SOURCES}) add_executable(benchmodel benchmodel.cpp ${NAM_SOURCES}) +add_executable(bench_linear bench_linear.cpp ${NAM_SOURCES}) add_executable(render render.cpp ${NAM_SOURCES} ${AUDIO_DSP_TOOLS_WAV_SOURCES}) target_compile_features(render PUBLIC cxx_std_20) # AudioDSPTools wav.cpp has sign-compare issues; don't fail build @@ -81,31 +82,33 @@ endif() source_group(NAM ${CMAKE_CURRENT_SOURCE_DIR} FILES ${NAM_SOURCES}) -target_compile_features(${TOOLS} PUBLIC cxx_std_20) +foreach(TOOL_TARGET IN LISTS TOOLS) + target_compile_features(${TOOL_TARGET} PUBLIC cxx_std_20) -set_target_properties(${TOOLS} - PROPERTIES - CXX_VISIBILITY_PRESET hidden - INTERPROCEDURAL_OPTIMIZATION TRUE - PREFIX "" -) + set_target_properties(${TOOL_TARGET} + PROPERTIES + CXX_VISIBILITY_PRESET hidden + INTERPROCEDURAL_OPTIMIZATION TRUE + PREFIX "" + ) -if (CMAKE_SYSTEM_NAME STREQUAL "Windows") - target_compile_definitions(${TOOLS} PRIVATE NOMINMAX WIN32_LEAN_AND_MEAN) -endif() + if (CMAKE_SYSTEM_NAME STREQUAL "Windows") + target_compile_definitions(${TOOL_TARGET} PRIVATE NOMINMAX WIN32_LEAN_AND_MEAN) + endif() -if (MSVC) - target_compile_options(${TOOLS} PRIVATE - "$<$:/W4>" - "$<$:/O2>" - ) -else() - target_compile_options(${TOOLS} PRIVATE - -Wall -Wextra -Wpedantic -Wstrict-aliasing -Wunreachable-code -Weffc++ -Wno-unused-parameter - "$<$:-Og;-ggdb;-Werror>" - "$<$:-Ofast>" - ) -endif() + if (MSVC) + target_compile_options(${TOOL_TARGET} PRIVATE + "$<$:/W4>" + "$<$:/O2>" + ) + else() + target_compile_options(${TOOL_TARGET} PRIVATE + -Wall -Wextra -Wpedantic -Wstrict-aliasing -Wunreachable-code -Weffc++ -Wno-unused-parameter + "$<$:-Og;-ggdb;-Werror>" + "$<$:-Ofast>" + ) + endif() +endforeach() # There's an error in eigen's # /Users/steve/src/NeuralAmpModelerCore/Dependencies/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h diff --git a/tools/bench_linear.cpp b/tools/bench_linear.cpp new file mode 100644 index 00000000..dc465464 --- /dev/null +++ b/tools/bench_linear.cpp @@ -0,0 +1,201 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "NAM/dsp.h" + +namespace +{ +using Clock = std::chrono::high_resolution_clock; + +struct Options +{ + int sample_rate = 48000; + double input_seconds = 10.0; + int buffer_size = 64; + int receptive_field = 0; + double receptive_field_seconds = 2.0; + bool sweep = false; +}; + +struct RunResult +{ + double seconds = 0.0; + std::vector output; + nam::LinearImplementation active_implementation = nam::LinearImplementation::Direct; +}; + +int parse_int(const char* value, const std::string& name) +{ + char* end = nullptr; + const long parsed = std::strtol(value, &end, 10); + if (end == value || *end != '\0' || parsed <= 0) + throw std::runtime_error(name + " must be a positive integer"); + return (int)parsed; +} + +double parse_double(const char* value, const std::string& name) +{ + char* end = nullptr; + const double parsed = std::strtod(value, &end); + if (end == value || *end != '\0' || parsed <= 0.0) + throw std::runtime_error(name + " must be a positive number"); + return parsed; +} + +Options parse_options(int argc, char* argv[]) +{ + Options options; + for (int i = 1; i < argc; i++) + { + const std::string arg(argv[i]); + auto require_value = [&](const std::string& name) { + if (i + 1 >= argc) + throw std::runtime_error(name + " requires a value"); + return argv[++i]; + }; + + if (arg == "--sample-rate") + options.sample_rate = parse_int(require_value(arg), arg); + else if (arg == "--input-seconds") + options.input_seconds = parse_double(require_value(arg), arg); + else if (arg == "--rf") + options.receptive_field = parse_int(require_value(arg), arg); + else if (arg == "--rf-seconds") + options.receptive_field_seconds = parse_double(require_value(arg), arg); + else if (arg == "--buffer-size") + options.buffer_size = parse_int(require_value(arg), arg); + else if (arg == "--sweep") + options.sweep = true; + else + throw std::runtime_error("Unknown argument: " + arg); + } + if (options.receptive_field == 0) + options.receptive_field = (int)std::llround(options.receptive_field_seconds * options.sample_rate); + return options; +} + +std::vector make_weights(const int receptive_field) +{ + std::vector weights; + weights.reserve(receptive_field); + for (int i = 0; i < receptive_field; i++) + { + const double envelope = std::exp(-5.0 * (double)i / std::max(1, receptive_field)); + const double modulated = std::sin(0.019 * (i + 1)) + 0.35 * std::cos(0.071 * (i + 1)); + weights.push_back((float)(0.01 * envelope * modulated)); + } + return weights; +} + +std::vector make_input(const int num_samples) +{ + std::vector input(num_samples); + for (int i = 0; i < num_samples; i++) + { + const double sample = 0.2 * std::sin(0.011 * i) + 0.1 * std::sin(0.037 * i) + 0.03 * std::cos(0.101 * i); + input[i] = (NAM_SAMPLE)sample; + } + return input; +} + +RunResult run_model(const std::vector& weights, const std::vector& input, const Options& options, + const nam::LinearImplementation implementation) +{ + nam::Linear model(1, 1, (int)weights.size(), false, weights, (double)options.sample_rate, implementation); + model.Reset((double)options.sample_rate, options.buffer_size); + + RunResult result; + result.output.assign(input.size(), (NAM_SAMPLE)0.0); + result.active_implementation = model.GetActiveImplementation(); + + NAM_SAMPLE* input_ptrs[1]; + NAM_SAMPLE* output_ptrs[1]; + + const auto start = Clock::now(); + for (size_t offset = 0; offset < input.size(); offset += options.buffer_size) + { + const int count = std::min(options.buffer_size, (int)(input.size() - offset)); + input_ptrs[0] = const_cast(&input[offset]); + output_ptrs[0] = &result.output[offset]; + model.process(input_ptrs, output_ptrs, count); + } + const auto end = Clock::now(); + result.seconds = std::chrono::duration(end - start).count(); + + return result; +} + +NAM_SAMPLE max_abs_diff(const std::vector& a, const std::vector& b) +{ + NAM_SAMPLE result = 0.0; + for (size_t i = 0; i < a.size(); i++) + result = std::max(result, std::abs(a[i] - b[i])); + return result; +} + +void print_result(const int receptive_field, const std::string& requested, const RunResult& result, + const double input_seconds, const NAM_SAMPLE max_diff) +{ + const double rtf = result.seconds / input_seconds; + std::cout << receptive_field << "," << requested << "," + << nam::linear::implementation_to_string(result.active_implementation) << "," << result.seconds << "," + << rtf << "," << max_diff << "\n"; +} + +void run_case(const Options& options) +{ + const auto weights = make_weights(options.receptive_field); + const auto input = make_input((int)std::llround(options.input_seconds * options.sample_rate)); + + const auto direct = run_model(weights, input, options, nam::LinearImplementation::Direct); + const auto fft = run_model(weights, input, options, nam::LinearImplementation::FFT); + const auto automatic = run_model(weights, input, options, nam::LinearImplementation::Auto); + + const auto fft_diff = max_abs_diff(direct.output, fft.output); + const auto auto_diff = max_abs_diff(direct.output, automatic.output); + + print_result(options.receptive_field, "direct", direct, options.input_seconds, 0.0); + print_result(options.receptive_field, "fft", fft, options.input_seconds, fft_diff); + print_result(options.receptive_field, "auto", automatic, options.input_seconds, auto_diff); +} + +void run_sweep(Options options) +{ + const std::vector receptive_fields{64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}; + for (const int receptive_field : receptive_fields) + { + options.receptive_field = receptive_field; + run_case(options); + } +} + +} // namespace + +int main(int argc, char* argv[]) +{ + try + { + const auto options = parse_options(argc, argv); + std::cout << std::setprecision(10); + std::cout << "receptive_field,requested,active,seconds,rtf,max_abs_diff_vs_direct\n"; + if (options.sweep) + run_sweep(options); + else + run_case(options); + } + catch (const std::exception& e) + { + std::cerr << "Error: " << e.what() << "\n"; + std::cerr << "Usage: bench_linear [--sample-rate N] [--input-seconds S] [--rf N] [--rf-seconds S]" + " [--buffer-size N] [--sweep]\n"; + return 1; + } + return 0; +} diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index 367ba3d2..b9521acd 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -26,6 +26,7 @@ #include "test/test_wavenet_gating_compatibility.cpp" #include "test/test_blending_detailed.cpp" #include "test/test_input_buffer_verification.cpp" +#include "test/test_linear.cpp" #include "test/test_lstm.cpp" #include "test/test_wavenet_configurable_gating.cpp" #include "test/test_noncontiguous_blocks.cpp" @@ -84,6 +85,11 @@ int main() test_dsp::test_set_input_level(); test_dsp::test_set_output_level(); + test_linear::test_direct_known_values(); + test_linear::test_fft_matches_direct_irregular_chunks(); + test_linear::test_auto_selection(); + test_linear::test_parse_implementation(); + test_ring_buffer::test_construct(); test_ring_buffer::test_reset(); test_ring_buffer::test_reset_with_receptive_field(); diff --git a/tools/test/test_linear.cpp b/tools/test/test_linear.cpp new file mode 100644 index 00000000..bffe9b4e --- /dev/null +++ b/tools/test/test_linear.cpp @@ -0,0 +1,136 @@ +// Tests for Linear DSP models + +#include "NAM/dsp.h" + +#include +#include +#include +#include +#include + +namespace test_linear +{ +namespace +{ + +std::vector process_model(nam::Linear& model, const std::vector& input, + const std::vector& chunk_sizes) +{ + std::vector output(input.size(), (NAM_SAMPLE)0.0); + NAM_SAMPLE* input_ptrs[1]; + NAM_SAMPLE* output_ptrs[1]; + + size_t offset = 0; + size_t chunk_index = 0; + while (offset < input.size()) + { + const int requested = chunk_sizes[chunk_index % chunk_sizes.size()]; + const int count = std::min(requested, (int)(input.size() - offset)); + input_ptrs[0] = const_cast(&input[offset]); + output_ptrs[0] = &output[offset]; + model.process(input_ptrs, output_ptrs, count); + offset += count; + chunk_index++; + } + return output; +} + +std::vector make_input(const int num_samples) +{ + std::vector input(num_samples); + for (int i = 0; i < num_samples; i++) + input[i] = (NAM_SAMPLE)(0.2 * std::sin(0.013 * i) + 0.05 * std::cos(0.071 * i)); + return input; +} + +std::vector make_weights(const int receptive_field, const bool bias) +{ + std::vector weights; + weights.reserve(receptive_field + (bias ? 1 : 0)); + for (int i = 0; i < receptive_field; i++) + weights.push_back((float)(std::exp(-0.001 * i) * std::sin(0.037 * (i + 1)) * 0.01)); + if (bias) + weights.push_back(0.03125f); + return weights; +} + +void assert_near(const NAM_SAMPLE actual, const NAM_SAMPLE expected, const NAM_SAMPLE tolerance) +{ + assert(std::abs(actual - expected) <= tolerance); +} + +} // namespace + +void test_direct_known_values() +{ + const std::vector weights{0.5f, -0.25f, 0.125f}; + nam::Linear model(1, 1, 3, false, weights, 48000.0, nam::LinearImplementation::Direct); + + const std::vector input{(NAM_SAMPLE)1.0, (NAM_SAMPLE)2.0, (NAM_SAMPLE)3.0, (NAM_SAMPLE)4.0}; + const auto output = process_model(model, input, {4}); + + assert_near(output[0], 0.5, 1.0e-7); + assert_near(output[1], 0.75, 1.0e-7); + assert_near(output[2], 1.125, 1.0e-7); + assert_near(output[3], 1.5, 1.0e-7); +} + +void test_fft_matches_direct_irregular_chunks() +{ + const int receptive_field = 1536; + const bool bias = true; + const auto weights = make_weights(receptive_field, bias); + const auto input = make_input(4096); + + nam::Linear direct(1, 1, receptive_field, bias, weights, 48000.0, nam::LinearImplementation::Direct); + nam::Linear fft(1, 1, receptive_field, bias, weights, 48000.0, nam::LinearImplementation::FFT); + + const std::vector chunks{1, 17, 64, 255, 3, 512, 31}; + const auto direct_output = process_model(direct, input, chunks); + const auto fft_output = process_model(fft, input, chunks); + + NAM_SAMPLE max_abs_diff = 0.0; + for (size_t i = 0; i < input.size(); i++) + max_abs_diff = std::max(max_abs_diff, std::abs(direct_output[i] - fft_output[i])); + + assert(max_abs_diff < 5.0e-5); +} + +void test_auto_selection() +{ + const auto short_weights = make_weights(128, false); + nam::Linear short_model(1, 1, 128, false, short_weights, 48000.0); + assert(short_model.GetRequestedImplementation() == nam::LinearImplementation::Auto); + assert(short_model.GetActiveImplementation() == nam::LinearImplementation::Direct); + + const auto cutoff_weights = make_weights(256, false); + nam::Linear cutoff_model(1, 1, 256, false, cutoff_weights, 48000.0); + assert(cutoff_model.GetRequestedImplementation() == nam::LinearImplementation::Auto); + assert(cutoff_model.GetActiveImplementation() == nam::LinearImplementation::Direct); + + const auto fft_weights = make_weights(512, false); + nam::Linear fft_model(1, 1, 512, false, fft_weights, 48000.0); + assert(fft_model.GetRequestedImplementation() == nam::LinearImplementation::Auto); + assert(fft_model.GetActiveImplementation() == nam::LinearImplementation::FFT); +} + +void test_parse_implementation() +{ + assert(nam::linear::parse_implementation("auto") == nam::LinearImplementation::Auto); + assert(nam::linear::parse_implementation("legacy") == nam::LinearImplementation::Direct); + assert(nam::linear::parse_implementation("partitioned-fft") == nam::LinearImplementation::FFT); + assert(nam::linear::implementation_to_string(nam::LinearImplementation::Direct) == "direct"); + + bool threw = false; + try + { + nam::linear::parse_implementation("not-a-real-implementation"); + } + catch (const std::runtime_error&) + { + threw = true; + } + assert(threw); +} + +} // namespace test_linear From 84ae701c2474ee4ecade514f227ef13a1e20874d Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Tue, 9 Jun 2026 15:24:49 -0700 Subject: [PATCH 2/3] Add Linear realtime safety tests --- tools/CMakeLists.txt | 3 +- tools/bench_linear.cpp | 201 ------------------------------------- tools/run_tests.cpp | 4 + tools/test/test_linear.cpp | 61 +++++++++++ 4 files changed, 66 insertions(+), 203 deletions(-) delete mode 100644 tools/bench_linear.cpp diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 5e3faf1e..5bf76106 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -3,7 +3,7 @@ file(GLOB NAM_SOURCES_SUB "${CMAKE_CURRENT_SOURCE_DIR}/../NAM/*/*.cpp") set(NAM_SOURCES ${NAM_SOURCES_TOP} ${NAM_SOURCES_SUB}) # TODO: add loadmodel and run_tests to TOOLS? -set(TOOLS benchmodel bench_linear) +set(TOOLS benchmodel) add_custom_target(tools ALL DEPENDS ${TOOLS} render) @@ -18,7 +18,6 @@ include_directories(tools ${AUDIO_DSP_TOOLS_DIR}/dsp) add_executable(loadmodel loadmodel.cpp ${NAM_SOURCES}) add_executable(benchmodel benchmodel.cpp ${NAM_SOURCES}) -add_executable(bench_linear bench_linear.cpp ${NAM_SOURCES}) add_executable(render render.cpp ${NAM_SOURCES} ${AUDIO_DSP_TOOLS_WAV_SOURCES}) target_compile_features(render PUBLIC cxx_std_20) # AudioDSPTools wav.cpp has sign-compare issues; don't fail build diff --git a/tools/bench_linear.cpp b/tools/bench_linear.cpp deleted file mode 100644 index dc465464..00000000 --- a/tools/bench_linear.cpp +++ /dev/null @@ -1,201 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "NAM/dsp.h" - -namespace -{ -using Clock = std::chrono::high_resolution_clock; - -struct Options -{ - int sample_rate = 48000; - double input_seconds = 10.0; - int buffer_size = 64; - int receptive_field = 0; - double receptive_field_seconds = 2.0; - bool sweep = false; -}; - -struct RunResult -{ - double seconds = 0.0; - std::vector output; - nam::LinearImplementation active_implementation = nam::LinearImplementation::Direct; -}; - -int parse_int(const char* value, const std::string& name) -{ - char* end = nullptr; - const long parsed = std::strtol(value, &end, 10); - if (end == value || *end != '\0' || parsed <= 0) - throw std::runtime_error(name + " must be a positive integer"); - return (int)parsed; -} - -double parse_double(const char* value, const std::string& name) -{ - char* end = nullptr; - const double parsed = std::strtod(value, &end); - if (end == value || *end != '\0' || parsed <= 0.0) - throw std::runtime_error(name + " must be a positive number"); - return parsed; -} - -Options parse_options(int argc, char* argv[]) -{ - Options options; - for (int i = 1; i < argc; i++) - { - const std::string arg(argv[i]); - auto require_value = [&](const std::string& name) { - if (i + 1 >= argc) - throw std::runtime_error(name + " requires a value"); - return argv[++i]; - }; - - if (arg == "--sample-rate") - options.sample_rate = parse_int(require_value(arg), arg); - else if (arg == "--input-seconds") - options.input_seconds = parse_double(require_value(arg), arg); - else if (arg == "--rf") - options.receptive_field = parse_int(require_value(arg), arg); - else if (arg == "--rf-seconds") - options.receptive_field_seconds = parse_double(require_value(arg), arg); - else if (arg == "--buffer-size") - options.buffer_size = parse_int(require_value(arg), arg); - else if (arg == "--sweep") - options.sweep = true; - else - throw std::runtime_error("Unknown argument: " + arg); - } - if (options.receptive_field == 0) - options.receptive_field = (int)std::llround(options.receptive_field_seconds * options.sample_rate); - return options; -} - -std::vector make_weights(const int receptive_field) -{ - std::vector weights; - weights.reserve(receptive_field); - for (int i = 0; i < receptive_field; i++) - { - const double envelope = std::exp(-5.0 * (double)i / std::max(1, receptive_field)); - const double modulated = std::sin(0.019 * (i + 1)) + 0.35 * std::cos(0.071 * (i + 1)); - weights.push_back((float)(0.01 * envelope * modulated)); - } - return weights; -} - -std::vector make_input(const int num_samples) -{ - std::vector input(num_samples); - for (int i = 0; i < num_samples; i++) - { - const double sample = 0.2 * std::sin(0.011 * i) + 0.1 * std::sin(0.037 * i) + 0.03 * std::cos(0.101 * i); - input[i] = (NAM_SAMPLE)sample; - } - return input; -} - -RunResult run_model(const std::vector& weights, const std::vector& input, const Options& options, - const nam::LinearImplementation implementation) -{ - nam::Linear model(1, 1, (int)weights.size(), false, weights, (double)options.sample_rate, implementation); - model.Reset((double)options.sample_rate, options.buffer_size); - - RunResult result; - result.output.assign(input.size(), (NAM_SAMPLE)0.0); - result.active_implementation = model.GetActiveImplementation(); - - NAM_SAMPLE* input_ptrs[1]; - NAM_SAMPLE* output_ptrs[1]; - - const auto start = Clock::now(); - for (size_t offset = 0; offset < input.size(); offset += options.buffer_size) - { - const int count = std::min(options.buffer_size, (int)(input.size() - offset)); - input_ptrs[0] = const_cast(&input[offset]); - output_ptrs[0] = &result.output[offset]; - model.process(input_ptrs, output_ptrs, count); - } - const auto end = Clock::now(); - result.seconds = std::chrono::duration(end - start).count(); - - return result; -} - -NAM_SAMPLE max_abs_diff(const std::vector& a, const std::vector& b) -{ - NAM_SAMPLE result = 0.0; - for (size_t i = 0; i < a.size(); i++) - result = std::max(result, std::abs(a[i] - b[i])); - return result; -} - -void print_result(const int receptive_field, const std::string& requested, const RunResult& result, - const double input_seconds, const NAM_SAMPLE max_diff) -{ - const double rtf = result.seconds / input_seconds; - std::cout << receptive_field << "," << requested << "," - << nam::linear::implementation_to_string(result.active_implementation) << "," << result.seconds << "," - << rtf << "," << max_diff << "\n"; -} - -void run_case(const Options& options) -{ - const auto weights = make_weights(options.receptive_field); - const auto input = make_input((int)std::llround(options.input_seconds * options.sample_rate)); - - const auto direct = run_model(weights, input, options, nam::LinearImplementation::Direct); - const auto fft = run_model(weights, input, options, nam::LinearImplementation::FFT); - const auto automatic = run_model(weights, input, options, nam::LinearImplementation::Auto); - - const auto fft_diff = max_abs_diff(direct.output, fft.output); - const auto auto_diff = max_abs_diff(direct.output, automatic.output); - - print_result(options.receptive_field, "direct", direct, options.input_seconds, 0.0); - print_result(options.receptive_field, "fft", fft, options.input_seconds, fft_diff); - print_result(options.receptive_field, "auto", automatic, options.input_seconds, auto_diff); -} - -void run_sweep(Options options) -{ - const std::vector receptive_fields{64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384}; - for (const int receptive_field : receptive_fields) - { - options.receptive_field = receptive_field; - run_case(options); - } -} - -} // namespace - -int main(int argc, char* argv[]) -{ - try - { - const auto options = parse_options(argc, argv); - std::cout << std::setprecision(10); - std::cout << "receptive_field,requested,active,seconds,rtf,max_abs_diff_vs_direct\n"; - if (options.sweep) - run_sweep(options); - else - run_case(options); - } - catch (const std::exception& e) - { - std::cerr << "Error: " << e.what() << "\n"; - std::cerr << "Usage: bench_linear [--sample-rate N] [--input-seconds S] [--rf N] [--rf-seconds S]" - " [--buffer-size N] [--sweep]\n"; - return 1; - } - return 0; -} diff --git a/tools/run_tests.cpp b/tools/run_tests.cpp index b9521acd..0f9d50a3 100644 --- a/tools/run_tests.cpp +++ b/tools/run_tests.cpp @@ -89,6 +89,10 @@ int main() test_linear::test_fft_matches_direct_irregular_chunks(); test_linear::test_auto_selection(); test_linear::test_parse_implementation(); + test_linear::test_direct_process_realtime_safe(); + test_linear::test_fft_process_realtime_safe(); + test_linear::test_auto_direct_process_realtime_safe(); + test_linear::test_auto_fft_process_realtime_safe(); test_ring_buffer::test_construct(); test_ring_buffer::test_reset(); diff --git a/tools/test/test_linear.cpp b/tools/test/test_linear.cpp index bffe9b4e..ada26e70 100644 --- a/tools/test/test_linear.cpp +++ b/tools/test/test_linear.cpp @@ -8,6 +8,8 @@ #include #include +#include "allocation_tracking.h" + namespace test_linear { namespace @@ -59,6 +61,41 @@ void assert_near(const NAM_SAMPLE actual, const NAM_SAMPLE expected, const NAM_S assert(std::abs(actual - expected) <= tolerance); } +void assert_process_realtime_safe(const int receptive_field, const nam::LinearImplementation requested_implementation, + const nam::LinearImplementation expected_active_implementation, const char* test_name) +{ + const int max_buffer_size = 512; + const auto weights = make_weights(receptive_field, true); + nam::Linear model(1, 1, receptive_field, true, weights, 48000.0, requested_implementation); + model.Reset(48000.0, max_buffer_size); + assert(model.GetActiveImplementation() == expected_active_implementation); + + std::vector input(max_buffer_size); + std::vector output(max_buffer_size); + for (int i = 0; i < max_buffer_size; i++) + input[i] = (NAM_SAMPLE)(0.1 * std::sin(0.021 * i) + 0.03 * std::cos(0.017 * i)); + + NAM_SAMPLE* input_ptrs[1] = {input.data()}; + NAM_SAMPLE* output_ptrs[1] = {output.data()}; + + model.process(input_ptrs, output_ptrs, max_buffer_size); + + const int block_sizes[] = {1, 7, 32, 64, 128, 256, 3, 511, 512}; + allocation_tracking::run_allocation_test_no_allocations( + nullptr, + [&]() { + for (int pass = 0; pass < 8; pass++) + { + for (const int block_size : block_sizes) + model.process(input_ptrs, output_ptrs, block_size); + } + }, + nullptr, test_name); + + for (int i = 0; i < max_buffer_size; i++) + assert(std::isfinite(output[i])); +} + } // namespace void test_direct_known_values() @@ -133,4 +170,28 @@ void test_parse_implementation() assert(threw); } +void test_direct_process_realtime_safe() +{ + assert_process_realtime_safe( + 512, nam::LinearImplementation::Direct, nam::LinearImplementation::Direct, "Linear direct process real-time safe"); +} + +void test_fft_process_realtime_safe() +{ + assert_process_realtime_safe( + 4096, nam::LinearImplementation::FFT, nam::LinearImplementation::FFT, "Linear FFT process real-time safe"); +} + +void test_auto_direct_process_realtime_safe() +{ + assert_process_realtime_safe(128, nam::LinearImplementation::Auto, nam::LinearImplementation::Direct, + "Linear auto direct process real-time safe"); +} + +void test_auto_fft_process_realtime_safe() +{ + assert_process_realtime_safe( + 4096, nam::LinearImplementation::Auto, nam::LinearImplementation::FFT, "Linear auto FFT process real-time safe"); +} + } // namespace test_linear From 9f08b8735378e740f16d463e27c492b8f523371b Mon Sep 17 00:00:00 2001 From: Steven Atkinson Date: Tue, 9 Jun 2026 15:32:23 -0700 Subject: [PATCH 3/3] Revert unnecessary tools CMake change --- tools/CMakeLists.txt | 46 +++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt index 5bf76106..492fb676 100644 --- a/tools/CMakeLists.txt +++ b/tools/CMakeLists.txt @@ -81,33 +81,31 @@ endif() source_group(NAM ${CMAKE_CURRENT_SOURCE_DIR} FILES ${NAM_SOURCES}) -foreach(TOOL_TARGET IN LISTS TOOLS) - target_compile_features(${TOOL_TARGET} PUBLIC cxx_std_20) +target_compile_features(${TOOLS} PUBLIC cxx_std_20) - set_target_properties(${TOOL_TARGET} - PROPERTIES - CXX_VISIBILITY_PRESET hidden - INTERPROCEDURAL_OPTIMIZATION TRUE - PREFIX "" - ) +set_target_properties(${TOOLS} + PROPERTIES + CXX_VISIBILITY_PRESET hidden + INTERPROCEDURAL_OPTIMIZATION TRUE + PREFIX "" +) - if (CMAKE_SYSTEM_NAME STREQUAL "Windows") - target_compile_definitions(${TOOL_TARGET} PRIVATE NOMINMAX WIN32_LEAN_AND_MEAN) - endif() +if (CMAKE_SYSTEM_NAME STREQUAL "Windows") + target_compile_definitions(${TOOLS} PRIVATE NOMINMAX WIN32_LEAN_AND_MEAN) +endif() - if (MSVC) - target_compile_options(${TOOL_TARGET} PRIVATE - "$<$:/W4>" - "$<$:/O2>" - ) - else() - target_compile_options(${TOOL_TARGET} PRIVATE - -Wall -Wextra -Wpedantic -Wstrict-aliasing -Wunreachable-code -Weffc++ -Wno-unused-parameter - "$<$:-Og;-ggdb;-Werror>" - "$<$:-Ofast>" - ) - endif() -endforeach() +if (MSVC) + target_compile_options(${TOOLS} PRIVATE + "$<$:/W4>" + "$<$:/O2>" + ) +else() + target_compile_options(${TOOLS} PRIVATE + -Wall -Wextra -Wpedantic -Wstrict-aliasing -Wunreachable-code -Weffc++ -Wno-unused-parameter + "$<$:-Og;-ggdb;-Werror>" + "$<$:-Ofast>" + ) +endif() # There's an error in eigen's # /Users/steve/src/NeuralAmpModelerCore/Dependencies/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h