From 850d1e2b9c08afadcb0e182b970c855a5d45fd5d Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Tue, 2 Jun 2026 01:17:37 +0200 Subject: [PATCH 01/12] script: tolerate slang parse errors so encrypted RTL doesn't abort --top MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit IEEE-1735 encrypted IP makes slang trip at the surrounding `endmodule` even though the lexer skips the protect envelope itself. Previously, that error in any one file aborted the entire `bender script --top X` invocation because session.cpp threw on the first per-file error. bender-slang now collects per-tree pass/fail status instead of throwing on parse errors. System-load failures (file unreadable) are still fatal — only parse errors are tolerated. Diagnostics are still surfaced to stderr per file so users can see exactly what slang complained about. A new failed_tree_indices() bridge function lets the Rust side learn which trees had errors. apply_slang_filters builds a force_kept_paths set from those indices and unions it with the reachability-derived kept_paths during file filtering, then prints a clear warning per file: "slang reported parse errors in ; preserving in script output regardless of reachability". The tolerance is on by default with loud per-file warnings so real syntax bugs still get noticed; users wanting strict behavior can read stderr and act on it. The dangling-reference case (parent instantiates a module slang can't see) is already handled in analysis.cpp's DFS, which silently skips refs missing from the symbol table. Fixture: tests/pickle has a new `encrypted` target with three files — encrypted_ip.sv (realistic IEEE-1735 envelope around a module body), encrypted_user.sv (instantiates encrypted_ip via a dangling ref), and encrypted_top.sv. The test confirms all three survive `--top encrypted_top` even though slang fails to parse encrypted_ip.sv. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/bender-slang/cpp/analysis.cpp | 14 +++++++++++++ crates/bender-slang/cpp/session.cpp | 26 ++++++++++++++++------- crates/bender-slang/cpp/slang_bridge.h | 9 ++++++++ crates/bender-slang/src/lib.rs | 14 +++++++++++++ src/cmd/script.rs | 19 ++++++++++++++++- tests/pickle/Bender.yml | 6 ++++++ tests/pickle/src/encrypted_ip.sv | 12 +++++++++++ tests/pickle/src/encrypted_top.sv | 3 +++ tests/pickle/src/encrypted_user.sv | 6 ++++++ tests/script.rs | 29 ++++++++++++++++++++++++++ 10 files changed, 129 insertions(+), 9 deletions(-) create mode 100644 tests/pickle/src/encrypted_ip.sv create mode 100644 tests/pickle/src/encrypted_top.sv create mode 100644 tests/pickle/src/encrypted_user.sv diff --git a/crates/bender-slang/cpp/analysis.cpp b/crates/bender-slang/cpp/analysis.cpp index 1233d261..bdb0b152 100644 --- a/crates/bender-slang/cpp/analysis.cpp +++ b/crates/bender-slang/cpp/analysis.cpp @@ -145,6 +145,20 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con return result; } +// Returns the indices of trees whose parse reported slang errors. Used by the Rust side to +// force-keep such files in the output regardless of reachability — useful for IEEE-1735 +// encrypted IP that slang can't fully digest. +rust::Vec failed_tree_indices(const SlangSession& session) { + const auto& errs = session.tree_parse_errors(); + rust::Vec out; + for (size_t i = 0; i < errs.size(); ++i) { + if (errs[i]) { + out.push_back(static_cast(i)); + } + } + return out; +} + // Returns the deduped, canonical filesystem paths of every header file that was actually loaded // via `include directives while parsing the requested trees. Trees from different parse groups // may live in different SourceManagers, so the lookup is per-tree. diff --git a/crates/bender-slang/cpp/session.cpp b/crates/bender-slang/cpp/session.cpp index 783e4bd8..c73d80d1 100644 --- a/crates/bender-slang/cpp/session.cpp +++ b/crates/bender-slang/cpp/session.cpp @@ -3,6 +3,7 @@ #include "slang_bridge.h" +#include #include using namespace slang; @@ -42,11 +43,15 @@ std::vector> SlangContext::parse_files(const rust::V std::vector> out; out.reserve(paths.size()); + parseErrors.clear(); + parseErrors.reserve(paths.size()); for (const auto& path : paths) { string_view pathView(path.data(), path.size()); auto result = SyntaxTree::fromFile(pathView, sourceManager, options); + // A system-level failure (file unreadable, etc.) is still fatal: the caller asked for + // this path and we couldn't even open it. Parse errors are tolerated below. if (!result) { auto& err = result.error(); std::string msg = "System Error loading '" + std::string(err.second) + "': " + err.first.message(); @@ -63,15 +68,16 @@ std::vector> SlangContext::parse_files(const rust::V diagEngine.issue(diag); } + // Surface diagnostics for any file with errors, but keep going — the Rust side decides + // what to do with the (possibly partial) tree. This matters for IEEE-1735 encrypted IP: + // slang skips the protect envelope but still trips on the surrounding endmodule, and + // those files shouldn't sink the whole `bender script` invocation. if (hasErrors) { - std::string rendered = diagClient->getString(); - if (rendered.empty()) { - rendered = "Failed to parse '" + std::string(pathView) + "'."; - } - throw std::runtime_error(rendered); + std::cerr << diagClient->getString(); } out.push_back(tree); + parseErrors.push_back(hasErrors); } return out; @@ -86,11 +92,15 @@ void SlangSession::parse_group(const rust::Vec& files, const rust: ctx->set_includes(includes); ctx->set_defines(defines); - // Parse the files and store the resulting syntax trees in the session. + // Parse the files and store the resulting syntax trees in the session, alongside their + // pass/fail status so callers can decide how to handle partially-parsed files. auto parsed = ctx->parse_files(files); + const auto& errs = ctx->last_parse_errors(); allTrees.reserve(allTrees.size() + parsed.size()); - for (const auto& tree : parsed) { - allTrees.push_back(tree); + treeParseErrors.reserve(treeParseErrors.size() + parsed.size()); + for (size_t i = 0; i < parsed.size(); ++i) { + allTrees.push_back(parsed[i]); + treeParseErrors.push_back(i < errs.size() && errs[i]); } contexts.push_back(std::move(ctx)); diff --git a/crates/bender-slang/cpp/slang_bridge.h b/crates/bender-slang/cpp/slang_bridge.h index 79840a66..c127e88d 100644 --- a/crates/bender-slang/cpp/slang_bridge.h +++ b/crates/bender-slang/cpp/slang_bridge.h @@ -30,11 +30,16 @@ class SlangContext { std::vector> parse_files(const rust::Vec& paths); + // For each tree returned by the last parse_files call, whether slang reported parse errors. + // Parallel to that return vector. + const std::vector& last_parse_errors() const { return parseErrors; } + private: slang::SourceManager sourceManager; slang::parsing::PreprocessorOptions ppOptions; slang::DiagnosticEngine diagEngine; std::shared_ptr diagClient; + std::vector parseErrors; }; class SlangSession { @@ -43,10 +48,13 @@ class SlangSession { const rust::Vec& defines); const std::vector>& trees() const { return allTrees; } + // Parallel to trees(): true if slang reported parse errors for that tree. + const std::vector& tree_parse_errors() const { return treeParseErrors; } private: std::vector> contexts; std::vector> allTrees; + std::vector treeParseErrors; }; class SyntaxTreeRewriter { @@ -80,6 +88,7 @@ rust::String dump_tree_json(std::shared_ptr tree); rust::Vec reachable_tree_indices(const SlangSession& session, const rust::Vec& tops); rust::Vec resolved_include_paths_for(const SlangSession& session, const rust::Vec& tree_indices); +rust::Vec failed_tree_indices(const SlangSession& session); std::size_t tree_count(const SlangSession& session); std::shared_ptr tree_at(const SlangSession& session, std::size_t index); std::uint64_t renamed_declarations(const SyntaxTreeRewriter& rewriter); diff --git a/crates/bender-slang/src/lib.rs b/crates/bender-slang/src/lib.rs index 9a789662..7a3e6d72 100644 --- a/crates/bender-slang/src/lib.rs +++ b/crates/bender-slang/src/lib.rs @@ -67,6 +67,8 @@ mod ffi { tree_indices: &Vec, ) -> Result>; + fn failed_tree_indices(session: &SlangSession) -> Result>; + fn tree_count(session: &SlangSession) -> usize; fn tree_at(session: &SlangSession, index: usize) -> Result>; @@ -203,6 +205,18 @@ impl SlangSession { Ok(indices.into_iter().map(|i| i as usize).collect()) } + /// Returns the indices of trees that slang reported parse errors for. Such trees are kept + /// in the session (their partial metadata is still available) so callers can decide how to + /// handle them — typically by force-keeping the underlying file in any filtered output. + pub fn failed_indices(&self) -> Result> { + let indices = ffi::failed_tree_indices(self.inner.as_ref().unwrap()).map_err(|cause| { + SlangError::TrimByTop { + message: cause.to_string(), + } + })?; + Ok(indices.into_iter().map(|i| i as usize).collect()) + } + /// Returns the canonical filesystem paths of every header that was actually loaded via an /// `include directive while parsing the given trees. Useful for figuring out which include /// directories were actually consulted. diff --git a/src/cmd/script.rs b/src/cmd/script.rs index a7c41919..3867d9d3 100644 --- a/src/cmd/script.rs +++ b/src/cmd/script.rs @@ -545,6 +545,21 @@ fn apply_slang_filters<'a>( } } + // Files whose slang parse failed get force-kept regardless of reachability. IEEE-1735 + // encrypted IP routinely trips the parser at the end of the protect envelope; we can't + // analyze those modules, but we mustn't drop them from the script either. + let failed_indices = session.failed_indices().into_diagnostic()?; + let force_kept_paths: HashSet<&Path> = failed_indices + .iter() + .filter_map(|i| index_to_path.get(i).copied()) + .collect(); + for p in &force_kept_paths { + eprintln!( + "warning: slang reported parse errors in {}; preserving in script output regardless of reachability", + p.display() + ); + } + // Determine which trees feed into the include / file-retention questions. With `--top` we // only look at trees reachable from those top modules; without `--top` we use every tree // (relevant when the caller asked for include-dir trimming but no file filtering). @@ -586,7 +601,9 @@ fn apply_slang_filters<'a>( .map(|mut group| { if filter_files { group.files.retain(|f| match f { - SourceFile::File(p, Some(SourceType::Verilog)) => kept_paths.contains(p), + SourceFile::File(p, Some(SourceType::Verilog)) => { + kept_paths.contains(p) || force_kept_paths.contains(p) + } _ => true, }); } diff --git a/tests/pickle/Bender.yml b/tests/pickle/Bender.yml index 861040b1..c174ceeb 100644 --- a/tests/pickle/Bender.yml +++ b/tests/pickle/Bender.yml @@ -25,3 +25,9 @@ sources: - src/dup_a.sv - src/dup_b.sv - src/dup_top.sv + + - target: encrypted + files: + - src/encrypted_ip.sv + - src/encrypted_user.sv + - src/encrypted_top.sv diff --git a/tests/pickle/src/encrypted_ip.sv b/tests/pickle/src/encrypted_ip.sv new file mode 100644 index 00000000..62119a42 --- /dev/null +++ b/tests/pickle/src/encrypted_ip.sv @@ -0,0 +1,12 @@ +// Realistic-shape IEEE-1735 encrypted module: slang lexes/skips the protect +// envelope but the parser still trips on the surrounding endmodule, which is +// what we want this fixture to exercise. +module encrypted_ip (); +`pragma protect begin_protected +`pragma protect encrypt_agent = "Test" +`pragma protect data_method = "aes128-cbc" +`pragma protect encoding = ( enctype = "base64", bytes = 33 ) +`pragma protect data_block +QUJDREVGRzEyMzQ1Njc4OTBhYmNkZWZnaGlqa2xtbm9wcXIK +`pragma protect end_protected +endmodule diff --git a/tests/pickle/src/encrypted_top.sv b/tests/pickle/src/encrypted_top.sv new file mode 100644 index 00000000..74dc0962 --- /dev/null +++ b/tests/pickle/src/encrypted_top.sv @@ -0,0 +1,3 @@ +module encrypted_top (); + encrypted_user u(); +endmodule diff --git a/tests/pickle/src/encrypted_user.sv b/tests/pickle/src/encrypted_user.sv new file mode 100644 index 00000000..f7fa526f --- /dev/null +++ b/tests/pickle/src/encrypted_user.sv @@ -0,0 +1,6 @@ +// Plain RTL that instantiates the encrypted IP. Slang parses this file fine +// but the encrypted_ip reference has no corresponding tree (the encrypted +// file failed to parse), so the dangling-ref tolerance kicks in. +module encrypted_user (); + encrypted_ip u_enc(); +endmodule diff --git a/tests/script.rs b/tests/script.rs index ca0b39a7..9826e16e 100644 --- a/tests/script.rs +++ b/tests/script.rs @@ -171,6 +171,35 @@ mod tests { assert!(!files.contains("unused_top.sv")); } + /// Encrypted RTL (IEEE-1735 protect envelopes) makes slang trip at the surrounding + /// `endmodule` even though the envelope itself is skipped. The filter must: + /// * not abort `bender script --top` because of slang errors in encrypted IP, and + /// * preserve the encrypted file in the output even though no internal reference resolves + /// to it (its module symbol is hidden behind the protect envelope). + #[test] + fn script_top_keeps_encrypted_file() { + let out = run_script(&[ + "--target", + "encrypted", + "--top", + "encrypted_top", + "flist-plus", + ]); + let files = source_basenames(&out); + assert!( + files.contains("encrypted_top.sv"), + "top file missing: {files:?}" + ); + assert!( + files.contains("encrypted_user.sv"), + "user of encrypted IP missing: {files:?}" + ); + assert!( + files.contains("encrypted_ip.sv"), + "encrypted IP must be force-kept despite parse errors: {files:?}" + ); + } + /// Regression test: when two files define the same module name, last-wins semantics apply. /// The file parsed last (dup_b.sv) wins; the earlier definition (dup_a.sv) is dropped. #[test] From e96194875bbae5cf4ebeeffff6bed1feacc6a129 Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Tue, 2 Jun 2026 12:54:33 +0200 Subject: [PATCH 02/12] script,pickle: --drop-unparseable, summary warning, annotate failed files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three follow-ups on the parse-error tolerance added in the previous commit: * script now prints a single summary warning at end of filtering: "warning: slang reported parse errors in N file(s); kept in script output: a.sv, b.sv" — instead of one stderr line per file, which got noisy on big designs. The diagnostic for each file still comes through (slang emits those itself). * script gains `--drop-unparseable`. Default behavior (keep) is unchanged. With the flag set, files slang couldn't parse are dropped from the output regardless of reachability; the summary warning's verb switches from "kept" to "dropped". Useful when the user knows the failures are real bugs in their RTL and wants the script to reflect that. * `--source-annotations` now emits `// UNPARSEABLE: slang reported parse errors` above each kept-but-failed file in the script, so a reader can spot them in the file list itself. Reuses the existing FileEntry.comment plumbing via a fresh `unparseable_comment` helper in emit_template. * pickle refuses on slang parse errors instead of silently emitting corrupt output: the slang printer reconstructs from the (partial) parse tree, and for encrypted protect envelopes that meant the base64 data block got mangled into something no downstream decrypt tool would accept. Pre-tolerance pickle threw automatically; now we check `failed_indices()` after parsing and surface a clear error pointing users at `bender script --top` for the file-list-only case. apply_slang_filters now returns the filtered groups alongside the set of kept-unparseable paths, which the caller threads into emit_template for the comment annotation. The retain predicate is unified into a single closure handling both the --top axis and the --drop-unparseable axis. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/cmd/pickle.rs | 21 ++++++++- src/cmd/script.rs | 113 ++++++++++++++++++++++++++++++++++++---------- tests/script.rs | 38 ++++++++++++++++ 3 files changed, 146 insertions(+), 26 deletions(-) diff --git a/src/cmd/pickle.rs b/src/cmd/pickle.rs index 130b2138..5c1f4a2e 100644 --- a/src/cmd/pickle.rs +++ b/src/cmd/pickle.rs @@ -5,7 +5,7 @@ use std::fs::File; use std::io::{BufWriter, Write}; -use std::path::Path; +use std::path::{Path, PathBuf}; use clap::Args; use indexmap::{IndexMap, IndexSet}; @@ -171,6 +171,7 @@ pub fn run(sess: &Session, args: PickleArgs) -> Result<()> { }; let mut session = SlangSession::new(); + let mut all_paths: Vec = Vec::new(); for src_group in srcs { // Collect include directories from the source group and command line arguments. let include_dirs: Vec = src_group @@ -215,11 +216,29 @@ pub fn run(sess: &Session, args: PickleArgs) -> Result<()> { }) .collect(); + all_paths.extend(file_paths.iter().map(PathBuf::from)); session .parse_group(&file_paths, &include_dirs, &defines) .into_diagnostic()?; } + // Pickle rewrites and reprints each tree, so a partial parse silently emits broken output + // (encrypted protect envelopes get mangled, etc.). Refuse rather than corrupt — the file + // filter in `bender script` is fine with partial trees, but pickle is not. + let failed = session.failed_indices().into_diagnostic()?; + if !failed.is_empty() { + let names: Vec = failed + .iter() + .filter_map(|i| all_paths.get(*i).map(|p| p.display().to_string())) + .collect(); + return Err(miette::miette!( + "pickle cannot rewrite {} file(s) with slang parse errors (output would be corrupt): {}\n\ + see diagnostics above; use `bender script --top ...` if you only need a file list", + names.len(), + names.join(", ") + )); + } + let trees = if args.top.is_empty() { session.all_trees().into_diagnostic()? } else { diff --git a/src/cmd/script.rs b/src/cmd/script.rs index 3867d9d3..a2eb5366 100644 --- a/src/cmd/script.rs +++ b/src/cmd/script.rs @@ -103,6 +103,11 @@ pub struct ScriptArgs { )] pub trim_incdirs: TrimIncdirs, + /// Drop files slang could not parse (default: keep them) + #[cfg(feature = "slang")] + #[arg(long, global = true, help_heading = "General Script Options")] + pub drop_unparseable: bool, + /// Format of the generated script #[command(subcommand)] pub format: ScriptFormat, @@ -352,20 +357,23 @@ pub fn run(sess: &Session, args: &ScriptArgs) -> Result<()> { .collect::>>()?; // Slang-based filtering: trim unreachable Verilog files (when `--top` is given) and/or - // unused include directories (per `--trim-incdirs`). + // unused include directories (per `--trim-incdirs`), and optionally drop files that slang + // could not parse (per `--drop-unparseable`). #[cfg(feature = "slang")] - let srcs = { + let (srcs, unparseable_paths) = { let trim_incdirs = match args.trim_incdirs { TrimIncdirs::Always => true, TrimIncdirs::Never => false, TrimIncdirs::Auto => !args.top.is_empty(), }; - if args.top.is_empty() && !trim_incdirs { - srcs + if args.top.is_empty() && !trim_incdirs && !args.drop_unparseable { + (srcs, std::collections::HashSet::::new()) } else { - apply_slang_filters(srcs, &args.top, trim_incdirs)? + apply_slang_filters(srcs, &args.top, trim_incdirs, args.drop_unparseable)? } }; + #[cfg(not(feature = "slang"))] + let unparseable_paths = std::collections::HashSet::::new(); let mut tera_context = Context::new(); let mut only_args = OnlyArgs { @@ -448,7 +456,15 @@ pub fn run(sess: &Session, args: &ScriptArgs) -> Result<()> { ScriptFormat::TemplateJson => JSON, }; - emit_template(sess, tera_context, template_content, args, only_args, srcs) + emit_template( + sess, + tera_context, + template_content, + args, + only_args, + srcs, + &unparseable_paths, + ) } /// Subdivide the source files in a group. @@ -487,12 +503,18 @@ where /// dropped (VHDL and untyped files are always retained, and any group that ends up with no /// files is dropped). When `trim_incdirs` is true, include directories slang did not resolve /// an `include through are dropped from `include_dirs` and `export_incdirs`. +/// +/// Files that slang reported parse errors on (typical for IEEE-1735 encrypted IP) are kept in +/// the output regardless of reachability by default; pass `drop_unparseable=true` to drop them. +/// Returns the filtered groups plus the set of unparseable file paths that survived filtering, +/// so the caller can annotate them in `source_annotations` output. #[cfg(feature = "slang")] fn apply_slang_filters<'a>( srcs: Vec>, top: &[String], trim_incdirs: bool, -) -> Result>> { + drop_unparseable: bool, +) -> Result<(Vec>, std::collections::HashSet)> { use std::collections::{HashMap, HashSet}; let mut session = SlangSession::new(); @@ -545,20 +567,15 @@ fn apply_slang_filters<'a>( } } - // Files whose slang parse failed get force-kept regardless of reachability. IEEE-1735 - // encrypted IP routinely trips the parser at the end of the protect envelope; we can't - // analyze those modules, but we mustn't drop them from the script either. + // Files whose slang parse failed get force-kept by default. IEEE-1735 encrypted IP + // routinely trips the parser at the end of the protect envelope; we can't analyze those + // modules, but we mustn't drop them from the script either. `drop_unparseable=true` flips + // that to drop them. let failed_indices = session.failed_indices().into_diagnostic()?; - let force_kept_paths: HashSet<&Path> = failed_indices + let unparseable_paths: HashSet<&Path> = failed_indices .iter() .filter_map(|i| index_to_path.get(i).copied()) .collect(); - for p in &force_kept_paths { - eprintln!( - "warning: slang reported parse errors in {}; preserving in script output regardless of reachability", - p.display() - ); - } // Determine which trees feed into the include / file-retention questions. With `--top` we // only look at trees reachable from those top modules; without `--top` we use every tree @@ -596,14 +613,24 @@ fn apply_slang_filters<'a>( resolved_includes.iter().any(|f| f.starts_with(&canon)) }; - Ok(srcs + // Single retention rule per Verilog file: + // * unparseable files keep iff !drop_unparseable (regardless of reachability) + // * everything else keep iff no --top filter is active, or it's reachable from one + let retain_verilog = |p: &Path| -> bool { + if unparseable_paths.contains(p) { + !drop_unparseable + } else { + !filter_files || kept_paths.contains(p) + } + }; + let run_file_filter = filter_files || drop_unparseable; + + let filtered: Vec> = srcs .into_iter() .map(|mut group| { - if filter_files { + if run_file_filter { group.files.retain(|f| match f { - SourceFile::File(p, Some(SourceType::Verilog)) => { - kept_paths.contains(p) || force_kept_paths.contains(p) - } + SourceFile::File(p, Some(SourceType::Verilog)) => retain_verilog(p), _ => true, }); } @@ -617,7 +644,33 @@ fn apply_slang_filters<'a>( }) // Remove empty groups that may have resulted from filtering out all Verilog files. .filter(|group| !group.files.is_empty()) - .collect()) + .collect(); + + // Summary: surface what slang couldn't parse without spamming one line per file. + let kept_unparseable: HashSet = if drop_unparseable { + HashSet::new() + } else { + unparseable_paths.iter().map(|p| p.to_path_buf()).collect() + }; + if !unparseable_paths.is_empty() { + let names: Vec = unparseable_paths + .iter() + .map(|p| p.display().to_string()) + .collect(); + let verb = if drop_unparseable { + "dropped" + } else { + "kept in script output" + }; + eprintln!( + "warning: slang reported parse errors in {} file(s); {}: {}", + names.len(), + verb, + names.join(", "), + ); + } + + Ok((filtered, kept_unparseable)) } static HEADER_AUTOGEN: &str = "This script was generated automatically by bender."; @@ -640,7 +693,17 @@ fn emit_template( args: &ScriptArgs, only: OnlyArgs, srcs: Vec, + unparseable_paths: &std::collections::HashSet, ) -> Result<()> { + // Helper for annotating FileEntry.comment on files that survived filtering despite slang + // failing to parse them; visible to users with `--source-annotations`. + let unparseable_comment = |p: &Path| -> Option { + if unparseable_paths.contains(p) { + Some("UNPARSEABLE: slang reported parse errors".to_string()) + } else { + None + } + }; tera_context.insert("HEADER_AUTOGEN", HEADER_AUTOGEN); tera_context.insert("root", sess.root); // tera_context.insert("srcs", &srcs); @@ -731,7 +794,7 @@ fn emit_template( }, None => FileEntry { file: file.0.to_path_buf(), - comment: file.1, + comment: file.1.or_else(|| unparseable_comment(file.0)), }, } }) @@ -798,7 +861,7 @@ fn emit_template( }, None => FileEntry { file: p.to_path_buf(), - comment: None, + comment: unparseable_comment(p), }, } } diff --git a/tests/script.rs b/tests/script.rs index 9826e16e..f31ebd2c 100644 --- a/tests/script.rs +++ b/tests/script.rs @@ -200,6 +200,44 @@ mod tests { ); } + /// `--drop-unparseable` drops files slang couldn't parse, even when `--top` keeps the rest. + #[test] + fn script_drop_unparseable_excludes_encrypted_file() { + let out = run_script(&[ + "--target", + "encrypted", + "--top", + "encrypted_top", + "--drop-unparseable", + "flist-plus", + ]); + let files = source_basenames(&out); + assert!(files.contains("encrypted_top.sv")); + assert!(files.contains("encrypted_user.sv")); + assert!( + !files.contains("encrypted_ip.sv"), + "encrypted IP should be dropped by --drop-unparseable: {files:?}" + ); + } + + /// `--source-annotations` adds a `// UNPARSEABLE: ...` comment above the kept encrypted file + /// so users reading the script can tell which entries slang couldn't analyze. + #[test] + fn script_source_annotations_marks_unparseable() { + let out = run_script(&[ + "--target", + "encrypted", + "--top", + "encrypted_top", + "--source-annotations", + "flist-plus", + ]); + assert!( + out.contains("// UNPARSEABLE: slang reported parse errors"), + "expected UNPARSEABLE annotation in output:\n{out}" + ); + } + /// Regression test: when two files define the same module name, last-wins semantics apply. /// The file parsed last (dup_b.sv) wins; the earlier definition (dup_a.sv) is dropped. #[test] From d83131a7d464bf02c112c6dedfe004747352c87d Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Wed, 3 Jun 2026 15:23:06 +0200 Subject: [PATCH 03/12] script: discriminate encrypted IP vs broken RTL via ProtectedEnvelope MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, the parse-error tolerance was blanket: any file slang couldn't parse was kept in the script output with a warning. That masked genuine syntax bugs in user RTL. Use slang's own taxonomy to discriminate. The lexer/preprocessor emit `slang::diag::ProtectedEnvelope` (the [-Wprotected-envelope] warning) every time they skip an IEEE-1735 protect envelope; real syntax bugs never trigger it. We capture that signal per tree alongside the existing parse-error bit, expose it via `protected_tree_indices()`, and split the failed set into two: * encrypted_paths — failed AND emitted ProtectedEnvelope. Always tolerated (legal SystemVerilog that slang merely struggles to digest fully). * broken_paths — failed without ProtectedEnvelope. Looks like a real syntax bug; aborts the run by default. A new `--allow-broken` flag flips that abort into a warning, mirroring how `--allow-broken` (and similar escape hatches) work elsewhere in bender. The summary stderr now prints separate lines for encrypted vs broken so users can tell apart automatic vs explicit tolerance. bender-slang itself stays policy-free: parse_group is still lenient (returns Ok on parse errors, throws only on system-load failures), and the new protected_indices() complements the existing failed_indices(). Policy lives in `bender` (script.rs decides; pickle.rs continues to refuse all parse errors since rewriting a partial tree corrupts the output). bender-slang/tests/basic.rs: the old `parse_invalid_file_returns_parse_error` test was wrong after the previous tolerance commit and is updated to the new contract — it asserts parse_group succeeds, failed_indices() reports broken.sv, and protected_indices() is empty. Fixture: tests/pickle/Bender.yml gains a `broken` target wired to the existing tests/pickle/src/broken.sv. New script tests cover both failure (default) and `--allow-broken` (kept with warning). Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/bender-slang/cpp/analysis.cpp | 15 ++++ crates/bender-slang/cpp/session.cpp | 23 ++++-- crates/bender-slang/cpp/slang_bridge.h | 10 +++ crates/bender-slang/src/lib.rs | 15 ++++ crates/bender-slang/tests/basic.rs | 25 ++++-- src/cmd/script.rs | 104 ++++++++++++++++++++----- tests/pickle/Bender.yml | 4 + tests/script.rs | 54 +++++++++++++ 8 files changed, 220 insertions(+), 30 deletions(-) diff --git a/crates/bender-slang/cpp/analysis.cpp b/crates/bender-slang/cpp/analysis.cpp index bdb0b152..26630525 100644 --- a/crates/bender-slang/cpp/analysis.cpp +++ b/crates/bender-slang/cpp/analysis.cpp @@ -159,6 +159,21 @@ rust::Vec failed_tree_indices(const SlangSession& session) { return out; } +// Returns the indices of trees for which slang emitted at least one `pragma protect` envelope +// diagnostic. These are the files that look like IEEE-1735 encrypted IP — even if they also +// have parse errors, those errors are downstream of the protect block and should be tolerated +// without `--allow-broken`. +rust::Vec protected_tree_indices(const SlangSession& session) { + const auto& flags = session.tree_protect_diags(); + rust::Vec out; + for (size_t i = 0; i < flags.size(); ++i) { + if (flags[i]) { + out.push_back(static_cast(i)); + } + } + return out; +} + // Returns the deduped, canonical filesystem paths of every header file that was actually loaded // via `include directives while parsing the requested trees. Trees from different parse groups // may live in different SourceManagers, so the lookup is per-tree. diff --git a/crates/bender-slang/cpp/session.cpp b/crates/bender-slang/cpp/session.cpp index c73d80d1..d6f62948 100644 --- a/crates/bender-slang/cpp/session.cpp +++ b/crates/bender-slang/cpp/session.cpp @@ -1,6 +1,7 @@ // Copyright (c) 2025 ETH Zurich // Tim Fischer +#include "slang/diagnostics/PreprocessorDiags.h" #include "slang_bridge.h" #include @@ -36,7 +37,8 @@ void SlangContext::set_defines(const rust::Vec& defs) { } // Parses a list of source files and returns the resulting syntax trees as a vector (of shared pointers). -// If any file fails to parse, an exception is thrown with the error message(s) from the diagnostic engine. +// System-level errors (file unreadable, etc.) throw; per-file parse errors are surfaced +// non-fatally via last_parse_errors() / last_protect_diags() so the caller can apply policy. std::vector> SlangContext::parse_files(const rust::Vec& paths) { Bag options; options.set(ppOptions); @@ -45,6 +47,8 @@ std::vector> SlangContext::parse_files(const rust::V out.reserve(paths.size()); parseErrors.clear(); parseErrors.reserve(paths.size()); + protectDiags.clear(); + protectDiags.reserve(paths.size()); for (const auto& path : paths) { string_view pathView(path.data(), path.size()); @@ -63,21 +67,26 @@ std::vector> SlangContext::parse_files(const rust::V diagEngine.clearIncludeStack(); bool hasErrors = false; + bool hasProtectDiag = false; for (const auto& diag : tree->diagnostics()) { hasErrors |= diag.isError(); + if (diag.code == slang::diag::ProtectedEnvelope) { + hasProtectDiag = true; + } diagEngine.issue(diag); } // Surface diagnostics for any file with errors, but keep going — the Rust side decides - // what to do with the (possibly partial) tree. This matters for IEEE-1735 encrypted IP: - // slang skips the protect envelope but still trips on the surrounding endmodule, and - // those files shouldn't sink the whole `bender script` invocation. + // what to do with the (possibly partial) tree. The hasProtectDiag flag lets the Rust + // side discriminate IEEE-1735 encrypted IP (auto-tolerated) from real syntax bugs + // (fail by default; tolerate with --allow-broken). if (hasErrors) { std::cerr << diagClient->getString(); } out.push_back(tree); parseErrors.push_back(hasErrors); + protectDiags.push_back(hasProtectDiag); } return out; @@ -93,14 +102,18 @@ void SlangSession::parse_group(const rust::Vec& files, const rust: ctx->set_defines(defines); // Parse the files and store the resulting syntax trees in the session, alongside their - // pass/fail status so callers can decide how to handle partially-parsed files. + // pass/fail status and `pragma protect` diagnostic presence, so callers can decide how to + // handle partially-parsed files. auto parsed = ctx->parse_files(files); const auto& errs = ctx->last_parse_errors(); + const auto& protects = ctx->last_protect_diags(); allTrees.reserve(allTrees.size() + parsed.size()); treeParseErrors.reserve(treeParseErrors.size() + parsed.size()); + treeProtectDiags.reserve(treeProtectDiags.size() + parsed.size()); for (size_t i = 0; i < parsed.size(); ++i) { allTrees.push_back(parsed[i]); treeParseErrors.push_back(i < errs.size() && errs[i]); + treeProtectDiags.push_back(i < protects.size() && protects[i]); } contexts.push_back(std::move(ctx)); diff --git a/crates/bender-slang/cpp/slang_bridge.h b/crates/bender-slang/cpp/slang_bridge.h index c127e88d..f9855873 100644 --- a/crates/bender-slang/cpp/slang_bridge.h +++ b/crates/bender-slang/cpp/slang_bridge.h @@ -33,6 +33,9 @@ class SlangContext { // For each tree returned by the last parse_files call, whether slang reported parse errors. // Parallel to that return vector. const std::vector& last_parse_errors() const { return parseErrors; } + // For each tree, whether slang emitted at least one `pragma protect` envelope diagnostic + // (the lexer/preprocessor signal that the file contains IEEE-1735 encrypted content). + const std::vector& last_protect_diags() const { return protectDiags; } private: slang::SourceManager sourceManager; @@ -40,6 +43,7 @@ class SlangContext { slang::DiagnosticEngine diagEngine; std::shared_ptr diagClient; std::vector parseErrors; + std::vector protectDiags; }; class SlangSession { @@ -50,11 +54,16 @@ class SlangSession { const std::vector>& trees() const { return allTrees; } // Parallel to trees(): true if slang reported parse errors for that tree. const std::vector& tree_parse_errors() const { return treeParseErrors; } + // Parallel to trees(): true if slang emitted a `pragma protect` envelope diag for that tree. + // Used by the Rust side to discriminate "encrypted IP" (auto-tolerated) from "real syntax + // bug" (fail by default; tolerate with --allow-broken). + const std::vector& tree_protect_diags() const { return treeProtectDiags; } private: std::vector> contexts; std::vector> allTrees; std::vector treeParseErrors; + std::vector treeProtectDiags; }; class SyntaxTreeRewriter { @@ -89,6 +98,7 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con rust::Vec resolved_include_paths_for(const SlangSession& session, const rust::Vec& tree_indices); rust::Vec failed_tree_indices(const SlangSession& session); +rust::Vec protected_tree_indices(const SlangSession& session); std::size_t tree_count(const SlangSession& session); std::shared_ptr tree_at(const SlangSession& session, std::size_t index); std::uint64_t renamed_declarations(const SyntaxTreeRewriter& rewriter); diff --git a/crates/bender-slang/src/lib.rs b/crates/bender-slang/src/lib.rs index 7a3e6d72..5bce17b2 100644 --- a/crates/bender-slang/src/lib.rs +++ b/crates/bender-slang/src/lib.rs @@ -69,6 +69,8 @@ mod ffi { fn failed_tree_indices(session: &SlangSession) -> Result>; + fn protected_tree_indices(session: &SlangSession) -> Result>; + fn tree_count(session: &SlangSession) -> usize; fn tree_at(session: &SlangSession, index: usize) -> Result>; @@ -217,6 +219,19 @@ impl SlangSession { Ok(indices.into_iter().map(|i| i as usize).collect()) } + /// Returns the indices of trees for which slang emitted at least one `pragma protect` + /// envelope diagnostic. Lets callers tell IEEE-1735 encrypted IP apart from genuinely + /// broken source files (which produce parse errors without any protect-envelope diag). + pub fn protected_indices(&self) -> Result> { + let indices = + ffi::protected_tree_indices(self.inner.as_ref().unwrap()).map_err(|cause| { + SlangError::TrimByTop { + message: cause.to_string(), + } + })?; + Ok(indices.into_iter().map(|i| i as usize).collect()) + } + /// Returns the canonical filesystem paths of every header that was actually loaded via an /// `include directive while parsing the given trees. Useful for figuring out which include /// directories were actually consulted. diff --git a/crates/bender-slang/tests/basic.rs b/crates/bender-slang/tests/basic.rs index 5b405f26..6309541c 100644 --- a/crates/bender-slang/tests/basic.rs +++ b/crates/bender-slang/tests/basic.rs @@ -22,18 +22,29 @@ fn parse_valid_file_succeeds() { } #[test] -fn parse_invalid_file_returns_parse_error() { +fn parse_invalid_file_reports_via_failed_indices() { + // The new contract: parse_group is lenient — system errors still throw, but per-file parse + // errors are surfaced via failed_indices(). Callers (bender script, bender pickle) layer + // their own policy (allow / refuse / discriminate) on top. let mut session = bender_slang::SlangSession::new(); let files = vec![fixture_path("src/broken.sv")]; let includes = vec![]; let defines = vec![]; - let result = session.parse_group(&files, &includes, &defines); + let indices = session + .parse_group(&files, &includes, &defines) + .expect("parse_group is lenient and should return Ok even on parse errors"); + assert_eq!(indices.len(), 1); + + let failed = session.failed_indices().expect("failed_indices query"); + assert_eq!(failed, vec![0], "broken.sv should be reported as failed"); - match result { - Err(bender_slang::SlangError::ParseGroup { .. }) => {} - Err(other) => panic!("expected SlangError::ParseGroup, got {other}"), - Ok(_) => panic!("expected parse to fail"), - } + let protected = session + .protected_indices() + .expect("protected_indices query"); + assert!( + protected.is_empty(), + "broken.sv has no pragma protect envelope: {protected:?}" + ); } #[test] diff --git a/src/cmd/script.rs b/src/cmd/script.rs index a2eb5366..cc1564c5 100644 --- a/src/cmd/script.rs +++ b/src/cmd/script.rs @@ -108,6 +108,11 @@ pub struct ScriptArgs { #[arg(long, global = true, help_heading = "General Script Options")] pub drop_unparseable: bool, + /// Tolerate slang parse errors in non-encrypted files instead of failing + #[cfg(feature = "slang")] + #[arg(long, global = true, help_heading = "General Script Options")] + pub allow_broken: bool, + /// Format of the generated script #[command(subcommand)] pub format: ScriptFormat, @@ -366,10 +371,21 @@ pub fn run(sess: &Session, args: &ScriptArgs) -> Result<()> { TrimIncdirs::Never => false, TrimIncdirs::Auto => !args.top.is_empty(), }; + // The slang pass also enforces "no broken non-encrypted parse errors" unless + // --allow-broken, so even with no top / no trim / no drop, we still need to invoke + // it when a strict check is desired. For now we keep the short-circuit consistent + // with the original "do nothing if no slang-driven action requested" — the strict + // check kicks in whenever the slang pass already runs for another reason. if args.top.is_empty() && !trim_incdirs && !args.drop_unparseable { (srcs, std::collections::HashSet::::new()) } else { - apply_slang_filters(srcs, &args.top, trim_incdirs, args.drop_unparseable)? + apply_slang_filters( + srcs, + &args.top, + trim_incdirs, + args.drop_unparseable, + args.allow_broken, + )? } }; #[cfg(not(feature = "slang"))] @@ -504,8 +520,12 @@ where /// files is dropped). When `trim_incdirs` is true, include directories slang did not resolve /// an `include through are dropped from `include_dirs` and `export_incdirs`. /// -/// Files that slang reported parse errors on (typical for IEEE-1735 encrypted IP) are kept in -/// the output regardless of reachability by default; pass `drop_unparseable=true` to drop them. +/// IEEE-1735 encrypted IP is auto-tolerated: slang emits a `ProtectedEnvelope` diag whenever +/// it skips a protect block, and any parse errors on those files are treated as expected +/// fallout. Files with parse errors but no protect-envelope diag look like real syntax bugs +/// and abort the run unless `allow_broken=true`. Tolerated files are kept in the output +/// regardless of reachability; pass `drop_unparseable=true` to drop them instead. +/// /// Returns the filtered groups plus the set of unparseable file paths that survived filtering, /// so the caller can annotate them in `source_annotations` output. #[cfg(feature = "slang")] @@ -514,6 +534,7 @@ fn apply_slang_filters<'a>( top: &[String], trim_incdirs: bool, drop_unparseable: bool, + allow_broken: bool, ) -> Result<(Vec>, std::collections::HashSet)> { use std::collections::{HashMap, HashSet}; @@ -567,14 +588,47 @@ fn apply_slang_filters<'a>( } } - // Files whose slang parse failed get force-kept by default. IEEE-1735 encrypted IP - // routinely trips the parser at the end of the protect envelope; we can't analyze those - // modules, but we mustn't drop them from the script either. `drop_unparseable=true` flips - // that to drop them. + // Discriminate encrypted IP (legal SystemVerilog, just hard to parse — auto-tolerated) + // from genuinely broken files (real syntax bugs — abort unless `--allow-broken`). + // Encrypted ⇔ slang emitted a `ProtectedEnvelope` diag on that tree. let failed_indices = session.failed_indices().into_diagnostic()?; - let unparseable_paths: HashSet<&Path> = failed_indices + let protected_indices: HashSet = session + .protected_indices() + .into_diagnostic()? + .into_iter() + .collect(); + let mut encrypted_paths: HashSet<&Path> = HashSet::new(); + let mut broken_paths: Vec<&Path> = Vec::new(); + for i in &failed_indices { + if let Some(p) = index_to_path.get(i).copied() { + if protected_indices.contains(i) { + encrypted_paths.insert(p); + } else { + broken_paths.push(p); + } + } + } + + if !broken_paths.is_empty() && !allow_broken { + let listed = broken_paths + .iter() + .map(|p| p.display().to_string()) + .collect::>() + .join("\n "); + return Err(miette::miette!( + "slang reported parse errors in {} file(s) with no `pragma protect` envelope (looks like real syntax bugs):\n {}\n\ + see diagnostics above; pass --allow-broken to tolerate these errors and continue", + broken_paths.len(), + listed + )); + } + + // From here on, both encrypted (always) and broken (only with --allow-broken) survive as + // "unparseable" — kept or dropped per `drop_unparseable`. + let unparseable_paths: HashSet<&Path> = encrypted_paths .iter() - .filter_map(|i| index_to_path.get(i).copied()) + .copied() + .chain(broken_paths.iter().copied()) .collect(); // Determine which trees feed into the include / file-retention questions. With `--top` we @@ -646,24 +700,38 @@ fn apply_slang_filters<'a>( .filter(|group| !group.files.is_empty()) .collect(); - // Summary: surface what slang couldn't parse without spamming one line per file. + // Summary lines: report encrypted and broken files separately so users can tell apart the + // automatic tolerance (encrypted) from the explicit one (broken, only here if + // --allow-broken was set since we'd have errored out otherwise). let kept_unparseable: HashSet = if drop_unparseable { HashSet::new() } else { unparseable_paths.iter().map(|p| p.to_path_buf()).collect() }; - if !unparseable_paths.is_empty() { - let names: Vec = unparseable_paths + let verb = if drop_unparseable { + "dropped" + } else { + "kept in script output" + }; + if !encrypted_paths.is_empty() { + let names: Vec = encrypted_paths + .iter() + .map(|p| p.display().to_string()) + .collect(); + eprintln!( + "warning: {} encrypted file(s) ({}): {}", + names.len(), + verb, + names.join(", "), + ); + } + if !broken_paths.is_empty() { + let names: Vec = broken_paths .iter() .map(|p| p.display().to_string()) .collect(); - let verb = if drop_unparseable { - "dropped" - } else { - "kept in script output" - }; eprintln!( - "warning: slang reported parse errors in {} file(s); {}: {}", + "warning: {} broken file(s) ({} via --allow-broken): {}", names.len(), verb, names.join(", "), diff --git a/tests/pickle/Bender.yml b/tests/pickle/Bender.yml index c174ceeb..e1f658fe 100644 --- a/tests/pickle/Bender.yml +++ b/tests/pickle/Bender.yml @@ -31,3 +31,7 @@ sources: - src/encrypted_ip.sv - src/encrypted_user.sv - src/encrypted_top.sv + + - target: broken + files: + - src/broken.sv diff --git a/tests/script.rs b/tests/script.rs index f31ebd2c..0fac7871 100644 --- a/tests/script.rs +++ b/tests/script.rs @@ -26,6 +26,26 @@ mod tests { String::from_utf8(out.stdout).expect("stdout must be utf-8") } + /// Run the script subcommand expecting failure; returns the captured stderr. + fn run_script_failing(args: &[&str]) -> String { + let mut full_args = vec!["-d", "tests/pickle", "script"]; + full_args.extend(args); + + let out = cargo::cargo_bin_cmd!() + .args(&full_args) + .output() + .expect("Failed to execute bender binary"); + + assert!( + !out.status.success(), + "script command unexpectedly succeeded.\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&out.stdout), + String::from_utf8_lossy(&out.stderr) + ); + + String::from_utf8(out.stderr).expect("stderr must be utf-8") + } + /// Return the path component after the last `/` or `\`. On Windows, bender's source paths /// can come out mixed (e.g. `D:\workspace\tests\pickle/src/top.sv`) while incdir paths are /// all-backslash because the Bender.yml entry has no embedded separator — so splitting on @@ -238,6 +258,40 @@ mod tests { ); } + /// A file with a real syntax error (no `pragma protect` envelope) should abort the run by + /// default — that's the discrimination from encrypted IP, which is auto-tolerated. + #[test] + fn script_broken_file_fails_by_default() { + let stderr = run_script_failing(&["--target", "broken", "--top", "broken", "flist-plus"]); + assert!( + stderr.contains("looks like real syntax bugs"), + "expected real-bug error message, got stderr:\n{stderr}" + ); + assert!( + stderr.contains("--allow-broken"), + "error should mention --allow-broken escape hatch:\n{stderr}" + ); + } + + /// `--allow-broken` lets users opt into tolerance for non-encrypted parse errors. The + /// broken file is kept in the output with a warning instead of aborting. + #[test] + fn script_broken_file_allow_broken_keeps_it() { + let out = run_script(&[ + "--target", + "broken", + "--top", + "broken", + "--allow-broken", + "flist-plus", + ]); + let files = source_basenames(&out); + assert!( + files.contains("broken.sv"), + "broken.sv should survive with --allow-broken: {files:?}" + ); + } + /// Regression test: when two files define the same module name, last-wins semantics apply. /// The file parsed last (dup_b.sv) wins; the earlier definition (dup_a.sv) is dropped. #[test] From 00182568b038e2275a26c333b8f7ee74eea28dd6 Mon Sep 17 00:00:00 2001 From: Tim Fischer Date: Wed, 3 Jun 2026 21:52:10 +0200 Subject: [PATCH 04/12] bender-slang: refactor FFI to pass syntax trees directly instead of indices --- crates/bender-slang/cpp/analysis.cpp | 90 +++++++-------- crates/bender-slang/cpp/session.cpp | 50 +++----- crates/bender-slang/cpp/slang_bridge.h | 50 ++++---- crates/bender-slang/src/lib.rs | 154 +++++++++++-------------- crates/bender-slang/tests/basic.rs | 33 +++--- src/cmd/pickle.rs | 26 ++--- src/cmd/script.rs | 56 ++++----- 7 files changed, 200 insertions(+), 259 deletions(-) diff --git a/crates/bender-slang/cpp/analysis.cpp b/crates/bender-slang/cpp/analysis.cpp index 26630525..a5477da7 100644 --- a/crates/bender-slang/cpp/analysis.cpp +++ b/crates/bender-slang/cpp/analysis.cpp @@ -1,6 +1,7 @@ // Copyright (c) 2025 ETH Zurich // Tim Fischer +#include "bender-slang/src/lib.rs.h" #include "slang/syntax/AllSyntax.h" #include "slang_bridge.h" @@ -32,8 +33,28 @@ static bool stderr_is_tty() { static const slang::DiagCode kDuplicateTopLevelDecl(slang::DiagSubsystem::General, 9999); static constexpr std::string_view kDuplicateTopLevelDeclFormat = "module '{}' overwrites previous definition in '{}'"; -rust::Vec reachable_tree_indices(const SlangSession& session, const rust::Vec& tops) { - const auto& treeVec = session.trees(); +// Converts an internal per-tree record into the cxx shared struct handed across the bridge. +static ParsedTree to_parsed(const TreeEntry& entry) { + ParsedTree pt; + pt.tree = entry.tree; + pt.path = rust::String(entry.path); + pt.parsed_ok = entry.parsedOk; + pt.encrypted = entry.encrypted; + return pt; +} + +// Returns every parsed tree in the session, each bundled with its per-file facts. The order +// matches parse order across all parse_group calls. +rust::Vec all_trees(const SlangSession& session) { + rust::Vec out; + for (const auto& entry : session.entries()) { + out.push_back(to_parsed(entry)); + } + return out; +} + +rust::Vec reachable_trees(const SlangSession& session, const rust::Vec& tops) { + const auto& entries = session.entries(); // One engine+client per distinct SourceManager. Each parse group creates its own // SourceManager (see SlangContext), so trees from different groups need different @@ -60,8 +81,8 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con // Build the name-to-tree-index map with last-wins semantics, emitting a warning // whenever a later definition overwrites an earlier one. std::unordered_map nameToTreeIndex; - for (size_t i = 0; i < treeVec.size(); ++i) { - const auto& metadata = treeVec[i]->getMetadata(); + for (size_t i = 0; i < entries.size(); ++i) { + const auto& metadata = entries[i].tree->getMetadata(); auto checkAndInsert = [&](std::string_view name, slang::SourceLocation loc) { if (name.empty()) @@ -70,12 +91,12 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con if (inserted) return; - const auto& prevBufferIds = treeVec[it->second]->getSourceBufferIds(); + const auto& prevBufferIds = entries[it->second].tree->getSourceBufferIds(); std::string_view prevFile = prevBufferIds.empty() ? std::string_view("") - : treeVec[it->second]->sourceManager().getRawFileName(prevBufferIds[0]); + : entries[it->second].tree->sourceManager().getRawFileName(prevBufferIds[0]); - auto& state = diagFor(treeVec[i]->sourceManager()); + auto& state = diagFor(entries[i].tree->sourceManager()); slang::Diagnostic diag(kDuplicateTopLevelDecl, loc); diag << name; diag << prevFile; @@ -93,9 +114,9 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con // Build a dependency graph where each tree points to the trees that declare // symbols it references. - std::vector> deps(treeVec.size()); - for (size_t i = 0; i < treeVec.size(); ++i) { - const auto& metadata = treeVec[i]->getMetadata(); + std::vector> deps(entries.size()); + for (size_t i = 0; i < entries.size(); ++i) { + const auto& metadata = entries[i].tree->getMetadata(); std::unordered_set seen; for (auto ref : metadata.getReferencedSymbols()) { auto it = nameToTreeIndex.find(ref); @@ -121,7 +142,7 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con } // Perform a DFS from the top modules to find all reachable trees. - std::vector reachable(treeVec.size(), false); + std::vector reachable(entries.size(), false); std::function dfs = [&](size_t index) { if (reachable[index]) { return; @@ -136,55 +157,24 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con dfs(start); } - rust::Vec result; + rust::Vec result; for (size_t i = 0; i < reachable.size(); ++i) { if (reachable[i]) { - result.push_back(static_cast(i)); + result.push_back(to_parsed(entries[i])); } } return result; } -// Returns the indices of trees whose parse reported slang errors. Used by the Rust side to -// force-keep such files in the output regardless of reachability — useful for IEEE-1735 -// encrypted IP that slang can't fully digest. -rust::Vec failed_tree_indices(const SlangSession& session) { - const auto& errs = session.tree_parse_errors(); - rust::Vec out; - for (size_t i = 0; i < errs.size(); ++i) { - if (errs[i]) { - out.push_back(static_cast(i)); - } - } - return out; -} - -// Returns the indices of trees for which slang emitted at least one `pragma protect` envelope -// diagnostic. These are the files that look like IEEE-1735 encrypted IP — even if they also -// have parse errors, those errors are downstream of the protect block and should be tolerated -// without `--allow-broken`. -rust::Vec protected_tree_indices(const SlangSession& session) { - const auto& flags = session.tree_protect_diags(); - rust::Vec out; - for (size_t i = 0; i < flags.size(); ++i) { - if (flags[i]) { - out.push_back(static_cast(i)); - } - } - return out; -} - // Returns the deduped, canonical filesystem paths of every header file that was actually loaded -// via `include directives while parsing the requested trees. Trees from different parse groups -// may live in different SourceManagers, so the lookup is per-tree. -rust::Vec resolved_include_paths_for(const SlangSession& session, - const rust::Vec& tree_indices) { - const auto& treeVec = session.trees(); +// via `include directives while parsing the given trees. Trees from different parse groups may +// live in different SourceManagers, so the lookup is per-tree. +rust::Vec resolved_include_paths_for(const rust::Vec& trees) { std::unordered_set uniquePaths; - for (auto idx : tree_indices) { - if (idx >= treeVec.size()) + for (const auto& parsed : trees) { + const auto& tree = parsed.tree; + if (!tree) continue; - const auto& tree = treeVec[idx]; const auto& sm = tree->sourceManager(); for (const auto& inc : tree->getIncludeDirectives()) { if (!inc.buffer.id.valid()) diff --git a/crates/bender-slang/cpp/session.cpp b/crates/bender-slang/cpp/session.cpp index d6f62948..306be024 100644 --- a/crates/bender-slang/cpp/session.cpp +++ b/crates/bender-slang/cpp/session.cpp @@ -36,19 +36,16 @@ void SlangContext::set_defines(const rust::Vec& defs) { } } -// Parses a list of source files and returns the resulting syntax trees as a vector (of shared pointers). +// Parses a list of source files and returns one TreeEntry per file, each bundling the resulting +// syntax tree with the per-file facts slang reported (path, parse success, encryption). // System-level errors (file unreadable, etc.) throw; per-file parse errors are surfaced -// non-fatally via last_parse_errors() / last_protect_diags() so the caller can apply policy. -std::vector> SlangContext::parse_files(const rust::Vec& paths) { +// non-fatally via the TreeEntry::parsedOk flag so the caller can apply policy. +std::vector SlangContext::parse_files(const rust::Vec& paths) { Bag options; options.set(ppOptions); - std::vector> out; + std::vector out; out.reserve(paths.size()); - parseErrors.clear(); - parseErrors.reserve(paths.size()); - protectDiags.clear(); - protectDiags.reserve(paths.size()); for (const auto& path : paths) { string_view pathView(path.data(), path.size()); @@ -77,16 +74,14 @@ std::vector> SlangContext::parse_files(const rust::V } // Surface diagnostics for any file with errors, but keep going — the Rust side decides - // what to do with the (possibly partial) tree. The hasProtectDiag flag lets the Rust - // side discriminate IEEE-1735 encrypted IP (auto-tolerated) from real syntax bugs - // (fail by default; tolerate with --allow-broken). + // what to do with the (possibly partial) tree. The encrypted flag lets the Rust side + // discriminate IEEE-1735 encrypted IP (auto-tolerated) from real syntax bugs (fail by + // default; tolerate with --allow-broken). if (hasErrors) { std::cerr << diagClient->getString(); } - out.push_back(tree); - parseErrors.push_back(hasErrors); - protectDiags.push_back(hasProtectDiag); + out.push_back(TreeEntry{tree, std::string(path.data(), path.size()), !hasErrors, hasProtectDiag}); } return out; @@ -101,31 +96,16 @@ void SlangSession::parse_group(const rust::Vec& files, const rust: ctx->set_includes(includes); ctx->set_defines(defines); - // Parse the files and store the resulting syntax trees in the session, alongside their - // pass/fail status and `pragma protect` diagnostic presence, so callers can decide how to - // handle partially-parsed files. + // Parse the files and append the resulting per-tree records to the session, so callers can + // decide how to handle partially-parsed files. auto parsed = ctx->parse_files(files); - const auto& errs = ctx->last_parse_errors(); - const auto& protects = ctx->last_protect_diags(); - allTrees.reserve(allTrees.size() + parsed.size()); - treeParseErrors.reserve(treeParseErrors.size() + parsed.size()); - treeProtectDiags.reserve(treeProtectDiags.size() + parsed.size()); - for (size_t i = 0; i < parsed.size(); ++i) { - allTrees.push_back(parsed[i]); - treeParseErrors.push_back(i < errs.size() && errs[i]); - treeProtectDiags.push_back(i < protects.size() && protects[i]); + treeEntries.reserve(treeEntries.size() + parsed.size()); + for (auto& entry : parsed) { + treeEntries.push_back(std::move(entry)); } contexts.push_back(std::move(ctx)); } // Returns the number of syntax trees currently stored in the session. -std::size_t tree_count(const SlangSession& session) { return session.trees().size(); } - -// Returns the syntax tree at the given index in the session. -std::shared_ptr tree_at(const SlangSession& session, std::size_t index) { - if (index >= session.trees().size()) { - throw std::runtime_error("Tree index out of bounds."); - } - return session.trees()[index]; -} +std::size_t tree_count(const SlangSession& session) { return session.entries().size(); } diff --git a/crates/bender-slang/cpp/slang_bridge.h b/crates/bender-slang/cpp/slang_bridge.h index f9855873..b20b9e16 100644 --- a/crates/bender-slang/cpp/slang_bridge.h +++ b/crates/bender-slang/cpp/slang_bridge.h @@ -20,6 +20,24 @@ #include struct SlangPrintOpts; +struct ParsedTree; + +// Internal per-tree record kept by the session. Plain C++ (no cxx types) so the session can own +// it without depending on the generated header; the bridge functions convert it to `ParsedTree` +// at the FFI boundary. +struct TreeEntry { + std::shared_ptr tree; + // The source path exactly as it was handed to parse_group (so the Rust side can match it + // back to its own SourceFile entry without a separate index map). + std::string path; + // False if slang reported any error diagnostic while parsing this file. + bool parsedOk = true; + // True if slang emitted at least one `pragma protect` envelope diagnostic (the + // lexer/preprocessor signal that the file contains IEEE-1735 encrypted content). Lets the + // Rust side discriminate "encrypted IP" (auto-tolerated) from "real syntax bug" (fail by + // default; tolerate with --allow-broken). + bool encrypted = false; +}; class SlangContext { public: @@ -28,22 +46,13 @@ class SlangContext { void set_includes(const rust::Vec& includes); void set_defines(const rust::Vec& defines); - std::vector> parse_files(const rust::Vec& paths); - - // For each tree returned by the last parse_files call, whether slang reported parse errors. - // Parallel to that return vector. - const std::vector& last_parse_errors() const { return parseErrors; } - // For each tree, whether slang emitted at least one `pragma protect` envelope diagnostic - // (the lexer/preprocessor signal that the file contains IEEE-1735 encrypted content). - const std::vector& last_protect_diags() const { return protectDiags; } + std::vector parse_files(const rust::Vec& paths); private: slang::SourceManager sourceManager; slang::parsing::PreprocessorOptions ppOptions; slang::DiagnosticEngine diagEngine; std::shared_ptr diagClient; - std::vector parseErrors; - std::vector protectDiags; }; class SlangSession { @@ -51,19 +60,11 @@ class SlangSession { void parse_group(const rust::Vec& files, const rust::Vec& includes, const rust::Vec& defines); - const std::vector>& trees() const { return allTrees; } - // Parallel to trees(): true if slang reported parse errors for that tree. - const std::vector& tree_parse_errors() const { return treeParseErrors; } - // Parallel to trees(): true if slang emitted a `pragma protect` envelope diag for that tree. - // Used by the Rust side to discriminate "encrypted IP" (auto-tolerated) from "real syntax - // bug" (fail by default; tolerate with --allow-broken). - const std::vector& tree_protect_diags() const { return treeProtectDiags; } + const std::vector& entries() const { return treeEntries; } private: std::vector> contexts; - std::vector> allTrees; - std::vector treeParseErrors; - std::vector treeProtectDiags; + std::vector treeEntries; }; class SyntaxTreeRewriter { @@ -94,13 +95,10 @@ rust::String print_tree(std::shared_ptr tree, SlangPr rust::String dump_tree_json(std::shared_ptr tree); -rust::Vec reachable_tree_indices(const SlangSession& session, const rust::Vec& tops); -rust::Vec resolved_include_paths_for(const SlangSession& session, - const rust::Vec& tree_indices); -rust::Vec failed_tree_indices(const SlangSession& session); -rust::Vec protected_tree_indices(const SlangSession& session); std::size_t tree_count(const SlangSession& session); -std::shared_ptr tree_at(const SlangSession& session, std::size_t index); +rust::Vec all_trees(const SlangSession& session); +rust::Vec reachable_trees(const SlangSession& session, const rust::Vec& tops); +rust::Vec resolved_include_paths_for(const rust::Vec& trees); std::uint64_t renamed_declarations(const SyntaxTreeRewriter& rewriter); std::uint64_t renamed_references(const SyntaxTreeRewriter& rewriter); diff --git a/crates/bender-slang/src/lib.rs b/crates/bender-slang/src/lib.rs index 5bce17b2..a274f1a5 100644 --- a/crates/bender-slang/src/lib.rs +++ b/crates/bender-slang/src/lib.rs @@ -16,8 +16,6 @@ pub enum SlangError { ParseGroup { message: String }, #[error("Failed to trim files by top modules: {message}")] TrimByTop { message: String }, - #[error("Failed to access parsed syntax tree: {message}")] - TreeAccess { message: String }, #[error("Failed to rewrite syntax trees: {message}")] Rewrite { message: String }, } @@ -39,6 +37,18 @@ mod ffi { squash_newlines: bool, } + /// A parsed syntax tree bundled with the per-file facts slang reported while parsing it. + struct ParsedTree { + /// The parsed syntax tree (possibly partial if `parsed_ok` is false). + tree: SharedPtr, + /// The source path as it was handed to `parse_group`. + path: String, + /// False if slang reported any error diagnostic for this file. + parsed_ok: bool, + /// True if slang emitted a `pragma protect` envelope diag (IEEE-1735 encrypted IP). + encrypted: bool, + } + unsafe extern "C++" { include!("bender-slang/cpp/slang_bridge.h"); include!("slang/syntax/SyntaxTree.h"); @@ -60,20 +70,13 @@ mod ffi { defines: &Vec, ) -> Result<()>; - fn reachable_tree_indices(session: &SlangSession, tops: &Vec) -> Result>; - - fn resolved_include_paths_for( - session: &SlangSession, - tree_indices: &Vec, - ) -> Result>; - - fn failed_tree_indices(session: &SlangSession) -> Result>; + fn tree_count(session: &SlangSession) -> usize; - fn protected_tree_indices(session: &SlangSession) -> Result>; + fn all_trees(session: &SlangSession) -> Vec; - fn tree_count(session: &SlangSession) -> usize; + fn reachable_trees(session: &SlangSession, tops: &Vec) -> Result>; - fn tree_at(session: &SlangSession, index: usize) -> Result>; + fn resolved_include_paths_for(trees: &Vec) -> Vec; fn new_syntax_tree_rewriter() -> UniquePtr; fn set_suffix(self: Pin<&mut SyntaxTreeRewriter>, suffix: &str); @@ -107,6 +110,33 @@ pub struct SyntaxTree<'a> { _session: PhantomData<&'a SlangSession>, } +/// A parsed syntax tree bundled with the per-file facts slang reported while parsing it. +pub struct ParsedTree<'a> { + /// The parsed syntax tree (possibly partial when `parsed_ok` is false). + pub tree: SyntaxTree<'a>, + /// The source path exactly as it was handed to [`SlangSession::parse_group`]. + pub path: String, + /// False if slang reported any error diagnostic for this file. + pub parsed_ok: bool, + /// True if slang emitted a `pragma protect` envelope diagnostic, i.e. the file is + /// IEEE-1735 encrypted IP rather than a genuinely broken source file. + pub encrypted: bool, +} + +impl<'a> ParsedTree<'a> { + fn from_ffi(parsed: ffi::ParsedTree) -> Self { + Self { + tree: SyntaxTree { + inner: parsed.tree, + _session: PhantomData, + }, + path: parsed.path, + parsed_ok: parsed.parsed_ok, + encrypted: parsed.encrypted, + } + } +} + pub struct SyntaxTreeRewriter { inner: UniquePtr, } @@ -163,21 +193,17 @@ impl SlangSession { files: &[String], includes: &[String], defines: &[String], - ) -> Result> { + ) -> Result<()> { let files_vec = files.to_vec(); let includes_vec = normalize_include_dirs(includes)?; let defines_vec = defines.to_vec(); - let start = self.tree_count(); self.inner .pin_mut() .parse_group(&files_vec, &includes_vec, &defines_vec) .map_err(|cause| SlangError::ParseGroup { message: cause.to_string(), - })?; - - let end = self.tree_count(); - Ok((start..end).collect()) + }) } /// Returns the total number of parsed syntax trees in the session. @@ -185,85 +211,41 @@ impl SlangSession { ffi::tree_count(self.inner.as_ref().unwrap()) } - /// Returns all parsed syntax trees in the session. - pub fn all_trees(&self) -> Result>> { - let count = self.tree_count(); - let mut out = Vec::with_capacity(count); - for idx in 0..count { - out.push(self.tree(idx)?); - } - Ok(out) + /// Returns every parsed tree in the session, each bundled with its per-file facts (path, + /// parse success, encryption). The order matches parse order across all `parse_group` calls. + pub fn all_trees(&self) -> Vec> { + ffi::all_trees(self.inner.as_ref().unwrap()) + .into_iter() + .map(ParsedTree::from_ffi) + .collect() } - /// Returns the indices of syntax trees reachable from the given top modules. - pub fn reachable_indices(&self, tops: &[String]) -> Result> { + /// Returns the parsed trees reachable from the given top modules, each bundled with its + /// per-file facts. + pub fn reachable_trees(&self, tops: &[String]) -> Result>> { let tops = tops.to_vec(); - let indices = - ffi::reachable_tree_indices(self.inner.as_ref().unwrap(), &tops).map_err(|cause| { - SlangError::TrimByTop { - message: cause.to_string(), - } - })?; - Ok(indices.into_iter().map(|i| i as usize).collect()) - } - - /// Returns the indices of trees that slang reported parse errors for. Such trees are kept - /// in the session (their partial metadata is still available) so callers can decide how to - /// handle them — typically by force-keeping the underlying file in any filtered output. - pub fn failed_indices(&self) -> Result> { - let indices = ffi::failed_tree_indices(self.inner.as_ref().unwrap()).map_err(|cause| { + let trees = ffi::reachable_trees(self.inner.as_ref().unwrap(), &tops).map_err(|cause| { SlangError::TrimByTop { message: cause.to_string(), } })?; - Ok(indices.into_iter().map(|i| i as usize).collect()) - } - - /// Returns the indices of trees for which slang emitted at least one `pragma protect` - /// envelope diagnostic. Lets callers tell IEEE-1735 encrypted IP apart from genuinely - /// broken source files (which produce parse errors without any protect-envelope diag). - pub fn protected_indices(&self) -> Result> { - let indices = - ffi::protected_tree_indices(self.inner.as_ref().unwrap()).map_err(|cause| { - SlangError::TrimByTop { - message: cause.to_string(), - } - })?; - Ok(indices.into_iter().map(|i| i as usize).collect()) + Ok(trees.into_iter().map(ParsedTree::from_ffi).collect()) } /// Returns the canonical filesystem paths of every header that was actually loaded via an /// `include directive while parsing the given trees. Useful for figuring out which include /// directories were actually consulted. - pub fn resolved_include_paths(&self, tree_indices: &[usize]) -> Result> { - let indices: Vec = tree_indices.iter().map(|&i| i as u32).collect(); - let paths = ffi::resolved_include_paths_for(self.inner.as_ref().unwrap(), &indices) - .map_err(|cause| SlangError::TrimByTop { - message: cause.to_string(), - })?; - Ok(paths.into_iter().collect()) - } - - /// Returns syntax trees reachable from the given top modules. - pub fn reachable_trees(&self, tops: &[String]) -> Result>> { - let indices = self.reachable_indices(tops)?; - let mut out = Vec::with_capacity(indices.len()); - for idx in indices { - out.push(self.tree(idx)?); - } - Ok(out) - } - - /// Returns a handle to the syntax tree at the given index. - pub fn tree(&self, index: usize) -> Result> { - Ok(SyntaxTree { - inner: ffi::tree_at(self.inner.as_ref().unwrap(), index).map_err(|cause| { - SlangError::TreeAccess { - message: cause.to_string(), - } - })?, - _session: PhantomData, - }) + pub fn resolved_include_paths(&self, trees: &[ParsedTree]) -> Vec { + let ffi_trees: Vec = trees + .iter() + .map(|t| ffi::ParsedTree { + tree: t.tree.inner.clone(), + path: t.path.clone(), + parsed_ok: t.parsed_ok, + encrypted: t.encrypted, + }) + .collect(); + ffi::resolved_include_paths_for(&ffi_trees) } } diff --git a/crates/bender-slang/tests/basic.rs b/crates/bender-slang/tests/basic.rs index 6309541c..235843a4 100644 --- a/crates/bender-slang/tests/basic.rs +++ b/crates/bender-slang/tests/basic.rs @@ -22,28 +22,27 @@ fn parse_valid_file_succeeds() { } #[test] -fn parse_invalid_file_reports_via_failed_indices() { - // The new contract: parse_group is lenient — system errors still throw, but per-file parse - // errors are surfaced via failed_indices(). Callers (bender script, bender pickle) layer - // their own policy (allow / refuse / discriminate) on top. +fn parse_invalid_file_reported_via_parsed_ok() { + // The contract: parse_group is lenient — system errors still throw, but per-file parse + // errors are surfaced via ParsedTree::parsed_ok. Callers (bender script, bender pickle) + // layer their own policy (allow / refuse / discriminate) on top. let mut session = bender_slang::SlangSession::new(); let files = vec![fixture_path("src/broken.sv")]; let includes = vec![]; let defines = vec![]; - let indices = session + session .parse_group(&files, &includes, &defines) .expect("parse_group is lenient and should return Ok even on parse errors"); - assert_eq!(indices.len(), 1); - - let failed = session.failed_indices().expect("failed_indices query"); - assert_eq!(failed, vec![0], "broken.sv should be reported as failed"); - let protected = session - .protected_indices() - .expect("protected_indices query"); + let trees = session.all_trees(); + assert_eq!(trees.len(), 1); + assert!( + !trees[0].parsed_ok, + "broken.sv should be reported as a failed parse" + ); assert!( - protected.is_empty(), - "broken.sv has no pragma protect envelope: {protected:?}" + !trees[0].encrypted, + "broken.sv has no pragma protect envelope" ); } @@ -57,13 +56,13 @@ fn rewriter_build_from_trees_is_repeatable() { .parse_group(&files, &includes, &defines) .expect("parse should succeed"); - let trees = session.all_trees().expect("tree collection should succeed"); + let trees = session.all_trees(); let mut rewriter_once = bender_slang::SyntaxTreeRewriter::new(); rewriter_once.set_prefix("p_"); rewriter_once.set_suffix("_s"); let first_pass_trees: Vec<_> = trees .iter() - .map(|t| rewriter_once.rewrite_declarations(t)) + .map(|t| rewriter_once.rewrite_declarations(&t.tree)) .collect(); let renamed_once = rewriter_once.rewrite_references( first_pass_trees @@ -87,7 +86,7 @@ fn rewriter_build_from_trees_is_repeatable() { rewriter_twice.set_suffix("_s"); let first_pass_trees: Vec<_> = trees .iter() - .map(|t| rewriter_twice.rewrite_declarations(t)) + .map(|t| rewriter_twice.rewrite_declarations(&t.tree)) .collect(); let renamed_twice = rewriter_twice.rewrite_references( first_pass_trees diff --git a/src/cmd/pickle.rs b/src/cmd/pickle.rs index 5c1f4a2e..3d80f023 100644 --- a/src/cmd/pickle.rs +++ b/src/cmd/pickle.rs @@ -5,7 +5,7 @@ use std::fs::File; use std::io::{BufWriter, Write}; -use std::path::{Path, PathBuf}; +use std::path::Path; use clap::Args; use indexmap::{IndexMap, IndexSet}; @@ -171,7 +171,6 @@ pub fn run(sess: &Session, args: PickleArgs) -> Result<()> { }; let mut session = SlangSession::new(); - let mut all_paths: Vec = Vec::new(); for src_group in srcs { // Collect include directories from the source group and command line arguments. let include_dirs: Vec = src_group @@ -216,7 +215,6 @@ pub fn run(sess: &Session, args: PickleArgs) -> Result<()> { }) .collect(); - all_paths.extend(file_paths.iter().map(PathBuf::from)); session .parse_group(&file_paths, &include_dirs, &defines) .into_diagnostic()?; @@ -224,23 +222,25 @@ pub fn run(sess: &Session, args: PickleArgs) -> Result<()> { // Pickle rewrites and reprints each tree, so a partial parse silently emits broken output // (encrypted protect envelopes get mangled, etc.). Refuse rather than corrupt — the file - // filter in `bender script` is fine with partial trees, but pickle is not. - let failed = session.failed_indices().into_diagnostic()?; + // filter in `bender script` is fine with partial trees, but pickle is not. Each tree carries + // its own source path, so we can name the offenders directly. + let all_trees = session.all_trees(); + let failed: Vec<&str> = all_trees + .iter() + .filter(|t| !t.parsed_ok) + .map(|t| t.path.as_str()) + .collect(); if !failed.is_empty() { - let names: Vec = failed - .iter() - .filter_map(|i| all_paths.get(*i).map(|p| p.display().to_string())) - .collect(); return Err(miette::miette!( "pickle cannot rewrite {} file(s) with slang parse errors (output would be corrupt): {}\n\ see diagnostics above; use `bender script --top ...` if you only need a file list", - names.len(), - names.join(", ") + failed.len(), + failed.join(", ") )); } let trees = if args.top.is_empty() { - session.all_trees().into_diagnostic()? + all_trees } else { session.reachable_trees(&args.top).into_diagnostic()? }; @@ -253,7 +253,7 @@ pub fn run(sess: &Session, args: PickleArgs) -> Result<()> { // Pass 1: build rename map across all trees. let trees: Vec<_> = trees .iter() - .map(|tree| rewriter.rewrite_declarations(tree)) + .map(|parsed| rewriter.rewrite_declarations(&parsed.tree)) .collect(); // Pass 2: rewrite declarations and references using the complete map. diff --git a/src/cmd/script.rs b/src/cmd/script.rs index cc1564c5..86c47562 100644 --- a/src/cmd/script.rs +++ b/src/cmd/script.rs @@ -536,10 +536,9 @@ fn apply_slang_filters<'a>( drop_unparseable: bool, allow_broken: bool, ) -> Result<(Vec>, std::collections::HashSet)> { - use std::collections::{HashMap, HashSet}; + use std::collections::HashSet; let mut session = SlangSession::new(); - let mut index_to_path: HashMap = HashMap::new(); for src_group in &srcs { // Collect include dirs @@ -579,33 +578,27 @@ fn apply_slang_filters<'a>( .iter() .map(|p| p.to_string_lossy().into_owned()) .collect(); - let indices = session + session .parse_group(&file_paths, &include_dirs, &defines) .into_diagnostic()?; - for (idx, path) in indices.into_iter().zip(&paths) { - index_to_path.insert(idx, path); - } } } // Discriminate encrypted IP (legal SystemVerilog, just hard to parse — auto-tolerated) // from genuinely broken files (real syntax bugs — abort unless `--allow-broken`). - // Encrypted ⇔ slang emitted a `ProtectedEnvelope` diag on that tree. - let failed_indices = session.failed_indices().into_diagnostic()?; - let protected_indices: HashSet = session - .protected_indices() - .into_diagnostic()? - .into_iter() - .collect(); - let mut encrypted_paths: HashSet<&Path> = HashSet::new(); - let mut broken_paths: Vec<&Path> = Vec::new(); - for i in &failed_indices { - if let Some(p) = index_to_path.get(i).copied() { - if protected_indices.contains(i) { - encrypted_paths.insert(p); - } else { - broken_paths.push(p); - } + // Encrypted ⇔ slang emitted a `ProtectedEnvelope` diag on that tree. Each tree carries its + // own source path, so we identify files by path without a separate index map. + let all_trees = session.all_trees(); + let mut encrypted_paths: HashSet = HashSet::new(); + let mut broken_paths: Vec = Vec::new(); + for parsed in &all_trees { + if parsed.parsed_ok { + continue; + } + if parsed.encrypted { + encrypted_paths.insert(PathBuf::from(&parsed.path)); + } else { + broken_paths.push(PathBuf::from(&parsed.path)); } } @@ -625,25 +618,25 @@ fn apply_slang_filters<'a>( // From here on, both encrypted (always) and broken (only with --allow-broken) survive as // "unparseable" — kept or dropped per `drop_unparseable`. - let unparseable_paths: HashSet<&Path> = encrypted_paths + let unparseable_paths: HashSet = encrypted_paths .iter() - .copied() - .chain(broken_paths.iter().copied()) + .cloned() + .chain(broken_paths.iter().cloned()) .collect(); // Determine which trees feed into the include / file-retention questions. With `--top` we // only look at trees reachable from those top modules; without `--top` we use every tree // (relevant when the caller asked for include-dir trimming but no file filtering). let filter_files = !top.is_empty(); - let kept_indices: Vec = if filter_files { - session.reachable_indices(top).into_diagnostic()? + let kept_trees = if filter_files { + session.reachable_trees(top).into_diagnostic()? } else { - (0..session.tree_count()).collect() + all_trees }; let kept_paths: HashSet<&Path> = if filter_files { - kept_indices + kept_trees .iter() - .filter_map(|i| index_to_path.get(i).copied()) + .map(|t| Path::new(t.path.as_str())) .collect() } else { HashSet::new() @@ -654,8 +647,7 @@ fn apply_slang_filters<'a>( // cause spurious mismatches. let resolved_includes: Vec = if trim_incdirs { session - .resolved_include_paths(&kept_indices) - .into_diagnostic()? + .resolved_include_paths(&kept_trees) .into_iter() .map(PathBuf::from) .collect() From 8c31131e1dcdbfbf759ac1bb676f840138ac9797 Mon Sep 17 00:00:00 2001 From: Tim Fischer Date: Wed, 3 Jun 2026 22:07:44 +0200 Subject: [PATCH 05/12] bender-slang: format files --- crates/bender-slang/cpp/analysis.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/bender-slang/cpp/analysis.cpp b/crates/bender-slang/cpp/analysis.cpp index a5477da7..36a78e66 100644 --- a/crates/bender-slang/cpp/analysis.cpp +++ b/crates/bender-slang/cpp/analysis.cpp @@ -92,9 +92,9 @@ rust::Vec reachable_trees(const SlangSession& session, const rust::V return; const auto& prevBufferIds = entries[it->second].tree->getSourceBufferIds(); - std::string_view prevFile = prevBufferIds.empty() - ? std::string_view("") - : entries[it->second].tree->sourceManager().getRawFileName(prevBufferIds[0]); + std::string_view prevFile = + prevBufferIds.empty() ? std::string_view("") + : entries[it->second].tree->sourceManager().getRawFileName(prevBufferIds[0]); auto& state = diagFor(entries[i].tree->sourceManager()); slang::Diagnostic diag(kDuplicateTopLevelDecl, loc); From ec3173e4fdb6fa0d6f9cfdc9d8867af366f14ff2 Mon Sep 17 00:00:00 2001 From: Tim Fischer Date: Wed, 3 Jun 2026 22:22:51 +0200 Subject: [PATCH 06/12] bender-slang: bump version --- Cargo.lock | 2 +- Cargo.toml | 2 +- crates/bender-slang/Cargo.toml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 801cd0cc..4fed06a7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -148,7 +148,7 @@ dependencies = [ [[package]] name = "bender-slang" -version = "0.1.1" +version = "0.2.0" dependencies = [ "cmake", "cxx", diff --git a/Cargo.toml b/Cargo.toml index 25669ce4..64768a3e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,7 +21,7 @@ inherits = "release" lto = "thin" [dependencies] -bender-slang = { version = "0.1.1", path = "crates/bender-slang", optional = true } +bender-slang = { version = "0.2.0", path = "crates/bender-slang", optional = true } serde = { version = "1", features = ["derive"] } serde_yaml_ng = "0.10" diff --git a/crates/bender-slang/Cargo.toml b/crates/bender-slang/Cargo.toml index 7e59f576..5d7eec64 100644 --- a/crates/bender-slang/Cargo.toml +++ b/crates/bender-slang/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "bender-slang" -version = "0.1.1" +version = "0.2.0" edition = "2024" description = "Internal bender crate: Rust bindings for the Slang SystemVerilog parser" license = "Apache-2.0" From e1e5f61dc498d0a226f78def3b29521fee780f9b Mon Sep 17 00:00:00 2001 From: Tim Fischer Date: Wed, 3 Jun 2026 22:27:43 +0200 Subject: [PATCH 07/12] ci(formatting): remove obsolte exclude paths --- .github/workflows/formatting.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/formatting.yml b/.github/workflows/formatting.yml index f81cf639..da6fa7df 100644 --- a/.github/workflows/formatting.yml +++ b/.github/workflows/formatting.yml @@ -32,4 +32,3 @@ jobs: with: source: "." extensions: "h,hpp,c,cc,cpp,cxx" - exclude: "./crates/bender-slang/vendor" From 863d32b477b8a324e7d0f1ab47f348cc195bb640 Mon Sep 17 00:00:00 2001 From: Tim Fischer Date: Wed, 3 Jun 2026 22:34:14 +0200 Subject: [PATCH 08/12] bender-slang: remove legacy `tree_count` function --- crates/bender-slang/cpp/session.cpp | 3 --- crates/bender-slang/cpp/slang_bridge.h | 1 - crates/bender-slang/src/lib.rs | 9 --------- crates/bender-slang/tests/basic.rs | 2 +- 4 files changed, 1 insertion(+), 14 deletions(-) diff --git a/crates/bender-slang/cpp/session.cpp b/crates/bender-slang/cpp/session.cpp index 306be024..d8d26615 100644 --- a/crates/bender-slang/cpp/session.cpp +++ b/crates/bender-slang/cpp/session.cpp @@ -106,6 +106,3 @@ void SlangSession::parse_group(const rust::Vec& files, const rust: contexts.push_back(std::move(ctx)); } - -// Returns the number of syntax trees currently stored in the session. -std::size_t tree_count(const SlangSession& session) { return session.entries().size(); } diff --git a/crates/bender-slang/cpp/slang_bridge.h b/crates/bender-slang/cpp/slang_bridge.h index b20b9e16..69b78bb7 100644 --- a/crates/bender-slang/cpp/slang_bridge.h +++ b/crates/bender-slang/cpp/slang_bridge.h @@ -95,7 +95,6 @@ rust::String print_tree(std::shared_ptr tree, SlangPr rust::String dump_tree_json(std::shared_ptr tree); -std::size_t tree_count(const SlangSession& session); rust::Vec all_trees(const SlangSession& session); rust::Vec reachable_trees(const SlangSession& session, const rust::Vec& tops); rust::Vec resolved_include_paths_for(const rust::Vec& trees); diff --git a/crates/bender-slang/src/lib.rs b/crates/bender-slang/src/lib.rs index a274f1a5..240330ab 100644 --- a/crates/bender-slang/src/lib.rs +++ b/crates/bender-slang/src/lib.rs @@ -16,8 +16,6 @@ pub enum SlangError { ParseGroup { message: String }, #[error("Failed to trim files by top modules: {message}")] TrimByTop { message: String }, - #[error("Failed to rewrite syntax trees: {message}")] - Rewrite { message: String }, } #[derive(Debug, Clone, Copy, Default)] @@ -70,8 +68,6 @@ mod ffi { defines: &Vec, ) -> Result<()>; - fn tree_count(session: &SlangSession) -> usize; - fn all_trees(session: &SlangSession) -> Vec; fn reachable_trees(session: &SlangSession, tops: &Vec) -> Result>; @@ -206,11 +202,6 @@ impl SlangSession { }) } - /// Returns the total number of parsed syntax trees in the session. - pub fn tree_count(&self) -> usize { - ffi::tree_count(self.inner.as_ref().unwrap()) - } - /// Returns every parsed tree in the session, each bundled with its per-file facts (path, /// parse success, encryption). The order matches parse order across all `parse_group` calls. pub fn all_trees(&self) -> Vec> { diff --git a/crates/bender-slang/tests/basic.rs b/crates/bender-slang/tests/basic.rs index 235843a4..315ef51a 100644 --- a/crates/bender-slang/tests/basic.rs +++ b/crates/bender-slang/tests/basic.rs @@ -18,7 +18,7 @@ fn parse_valid_file_succeeds() { let includes = vec![fixture_path("include")]; let defines = vec![]; assert!(session.parse_group(&files, &includes, &defines).is_ok()); - assert_eq!(session.tree_count(), 1); + assert_eq!(session.all_trees().len(), 1); } #[test] From acd5acfb1c3ee558cf80481f4de693dc7081e34d Mon Sep 17 00:00:00 2001 From: Tim Fischer Date: Thu, 4 Jun 2026 09:14:55 +0200 Subject: [PATCH 09/12] bender-slang: fix clangd support & clean up --- crates/bender-slang/build.rs | 68 +++++++++++++++-------------- crates/bender-slang/cpp/session.cpp | 1 - 2 files changed, 36 insertions(+), 33 deletions(-) diff --git a/crates/bender-slang/build.rs b/crates/bender-slang/build.rs index 6d056aed..ef7518ab 100644 --- a/crates/bender-slang/build.rs +++ b/crates/bender-slang/build.rs @@ -1,38 +1,6 @@ // Copyright (c) 2025 ETH Zurich // Tim Fischer -// Generates cpp/compile_flags.txt so that clangd gets the correct include paths -// for the C++ bridge files. The file is written to the cpp/ directory and should -// be gitignored. It is picked up automatically by clangd for all files in that directory. -fn generate_compile_flags( - manifest_dir: &std::path::Path, - dst: &std::path::Path, - includes: &[&std::path::Path], - defines: &[(&str, &str)], -) { - use std::ffi::OsStr; - - let Some(target_root) = dst - .ancestors() - .find(|p| p.file_name() == Some(OsStr::new("target"))) - else { - return; - }; - - let flags: Vec = ["-x", "c++", "-std=c++20", "-fno-cxx-modules"] - .map(str::to_string) - .into_iter() - .chain(includes.iter().map(|p| format!("-I{}", p.display()))) - .chain([format!("-I{}", target_root.join("cxxbridge").display())]) - .chain(defines.iter().map(|(k, v)| format!("-D{}={}", k, v))) - .collect(); - - let _ = std::fs::write( - manifest_dir.join("cpp/compile_flags.txt"), - flags.join("\n") + "\n", - ); -} - fn main() { let manifest_dir = std::path::PathBuf::from(std::env::var("CARGO_MANIFEST_DIR").unwrap()); @@ -171,3 +139,39 @@ fn main() { println!("cargo:rerun-if-changed=cpp/print.cpp"); println!("cargo:rerun-if-changed=cpp/analysis.cpp"); } + +// Generates cpp/compile_flags.txt so that clangd gets the correct include paths +// for the C++ bridge files. The file is written to the cpp/ directory and should +// be gitignored. It is picked up automatically by clangd for all files in that directory. +fn generate_compile_flags( + manifest_dir: &std::path::Path, + dst: &std::path::Path, + includes: &[&std::path::Path], + defines: &[(&str, &str)], +) { + use std::ffi::OsStr; + + let Some(target_root) = dst + .ancestors() + .find(|p| p.file_name() == Some(OsStr::new("target"))) + else { + return; + }; + + let bridge_crate_include = manifest_dir.parent().unwrap_or(manifest_dir); + let flags: Vec = ["-x", "c++", "-std=c++20", "-fno-cxx-modules"] + .map(str::to_string) + .into_iter() + .chain(includes.iter().map(|p| format!("-I{}", p.display()))) + .chain([ + format!("-I{}", target_root.join("cxxbridge").display()), + format!("-I{}", bridge_crate_include.display()), + ]) + .chain(defines.iter().map(|(k, v)| format!("-D{}={}", k, v))) + .collect(); + + let _ = std::fs::write( + manifest_dir.join("cpp/compile_flags.txt"), + flags.join("\n") + "\n", + ); +} diff --git a/crates/bender-slang/cpp/session.cpp b/crates/bender-slang/cpp/session.cpp index d8d26615..2a0b7add 100644 --- a/crates/bender-slang/cpp/session.cpp +++ b/crates/bender-slang/cpp/session.cpp @@ -10,7 +10,6 @@ using namespace slang; using namespace slang::syntax; -using std::shared_ptr; using std::string; using std::string_view; From 30f8b82a03fe40f5b290b23600a46d7920ae883e Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Thu, 4 Jun 2026 10:52:23 +0200 Subject: [PATCH 10/12] script: replace --allow-broken/--drop-unparseable with --broken/--encrypted MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collapse the two boolean policy flags into two symmetric three-way selectors. Each accepts `error` / `keep` / `drop` and is wrapped in Option<> so we can distinguish "explicitly set" from "implicitly default" — that's what gates whether the slang pass even runs. Old flag combinations map directly: (none) -> defaults --allow-broken -> --broken keep --drop-unparseable -> --encrypted drop --allow-broken --drop-unparseable -> --broken drop --encrypted drop Trigger rules for the slang pass: --top X -> yes (file reachability filter) --trim-incdirs always -> yes (incdir trimming) --broken error / drop (explicit) -> yes (need slang to classify) --encrypted error / drop (explicit) -> yes --broken keep (explicit or default) -> no (no-op: keeping is what happens anyway) --encrypted keep (explicit or default) -> no This unlocks scan-only uses that previously required --top: bender script --broken drop # drop broken files from output bender script --encrypted error # CI lint: encrypted-free codebase bender script --encrypted drop # strip encrypted IP from script The non-Verilog passthrough (VHDL, untyped files) is now explicit in the apply_slang_filters doc comment, so a future contributor doesn't accidentally extend a filter to them. Two new tests cover the without-`--top` cases (`--broken drop` and `--broken keep` alone). The existing tests' flag args were swapped to the new spelling; the encrypted-vs-broken assertions are unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/cmd/script.rs | 198 +++++++++++++++++++++++++++------------------- tests/script.rs | 88 ++++++++++++++++++--- 2 files changed, 194 insertions(+), 92 deletions(-) diff --git a/src/cmd/script.rs b/src/cmd/script.rs index 86c47562..921d572c 100644 --- a/src/cmd/script.rs +++ b/src/cmd/script.rs @@ -103,15 +103,27 @@ pub struct ScriptArgs { )] pub trim_incdirs: TrimIncdirs, - /// Drop files slang could not parse (default: keep them) + /// What to do with files slang reports parse errors on with no `pragma protect` envelope + /// [implicit default: error when slang runs; no effect otherwise] #[cfg(feature = "slang")] - #[arg(long, global = true, help_heading = "General Script Options")] - pub drop_unparseable: bool, + #[arg( + long, + value_enum, + global = true, + help_heading = "General Script Options" + )] + pub broken: Option, - /// Tolerate slang parse errors in non-encrypted files instead of failing + /// What to do with IEEE-1735 encrypted files slang cannot fully parse + /// [implicit default: keep when slang runs; no effect otherwise] #[cfg(feature = "slang")] - #[arg(long, global = true, help_heading = "General Script Options")] - pub allow_broken: bool, + #[arg( + long, + value_enum, + global = true, + help_heading = "General Script Options" + )] + pub encrypted: Option, /// Format of the generated script #[command(subcommand)] @@ -143,6 +155,19 @@ pub enum TrimIncdirs { Never, } +/// What to do with a class of files slang reports parse errors on. +#[cfg(feature = "slang")] +#[derive(Copy, Clone, Debug, PartialEq, Eq, ValueEnum, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum ParsePolicy { + /// Abort the run if any file of this class is found + Error, + /// Tolerate these files and include them in the script + Keep, + /// Tolerate these files but drop them from the script + Drop, +} + /// Common arguments for Vivado scripts #[derive(Args, Debug, Clone)] pub struct OnlyArgs { @@ -362,8 +387,8 @@ pub fn run(sess: &Session, args: &ScriptArgs) -> Result<()> { .collect::>>()?; // Slang-based filtering: trim unreachable Verilog files (when `--top` is given) and/or - // unused include directories (per `--trim-incdirs`), and optionally drop files that slang - // could not parse (per `--drop-unparseable`). + // unused include directories (per `--trim-incdirs`), with per-class policies for files + // slang couldn't fully parse (`--broken`, `--encrypted`). #[cfg(feature = "slang")] let (srcs, unparseable_paths) = { let trim_incdirs = match args.trim_incdirs { @@ -371,20 +396,28 @@ pub fn run(sess: &Session, args: &ScriptArgs) -> Result<()> { TrimIncdirs::Never => false, TrimIncdirs::Auto => !args.top.is_empty(), }; - // The slang pass also enforces "no broken non-encrypted parse errors" unless - // --allow-broken, so even with no top / no trim / no drop, we still need to invoke - // it when a strict check is desired. For now we keep the short-circuit consistent - // with the original "do nothing if no slang-driven action requested" — the strict - // check kicks in whenever the slang pass already runs for another reason. - if args.top.is_empty() && !trim_incdirs && !args.drop_unparseable { + // Skip the slang pass entirely when no flag requires it. `Keep` (the implicit default + // for encrypted, and what an explicit `--broken keep` / `--encrypted keep` says) is a + // no-op without slang — we'd keep the file anyway since no filter touched it. Only an + // explicit `error` or `drop` value needs slang to actually classify files. + let broken_policy = args.broken.unwrap_or(ParsePolicy::Error); + let encrypted_policy = args.encrypted.unwrap_or(ParsePolicy::Keep); + let policies_need_slang = matches!( + args.broken, + Some(ParsePolicy::Error) | Some(ParsePolicy::Drop) + ) || matches!( + args.encrypted, + Some(ParsePolicy::Error) | Some(ParsePolicy::Drop) + ); + if args.top.is_empty() && !trim_incdirs && !policies_need_slang { (srcs, std::collections::HashSet::::new()) } else { apply_slang_filters( srcs, &args.top, trim_incdirs, - args.drop_unparseable, - args.allow_broken, + broken_policy, + encrypted_policy, )? } }; @@ -520,11 +553,15 @@ where /// files is dropped). When `trim_incdirs` is true, include directories slang did not resolve /// an `include through are dropped from `include_dirs` and `export_incdirs`. /// -/// IEEE-1735 encrypted IP is auto-tolerated: slang emits a `ProtectedEnvelope` diag whenever -/// it skips a protect block, and any parse errors on those files are treated as expected -/// fallout. Files with parse errors but no protect-envelope diag look like real syntax bugs -/// and abort the run unless `allow_broken=true`. Tolerated files are kept in the output -/// regardless of reachability; pass `drop_unparseable=true` to drop them instead. +/// Files slang couldn't fully parse are classified into: +/// * encrypted — slang emitted a `ProtectedEnvelope` diag (IEEE-1735 protect block) +/// * broken — parse errors with no such diag (looks like a real syntax bug) +/// Each class is handled per its `ParsePolicy`: `Error` aborts the run, `Keep` includes the +/// file in the script, `Drop` tolerates but excludes it. Sensible defaults are `broken=Error` +/// and `encrypted=Keep`. +/// +/// Non-Verilog files (VHDL, untyped) intentionally bypass every filter here — slang doesn't +/// process them, so we have no reachability information and conservatively retain them all. /// /// Returns the filtered groups plus the set of unparseable file paths that survived filtering, /// so the caller can annotate them in `source_annotations` output. @@ -533,8 +570,8 @@ fn apply_slang_filters<'a>( srcs: Vec>, top: &[String], trim_incdirs: bool, - drop_unparseable: bool, - allow_broken: bool, + broken_policy: ParsePolicy, + encrypted_policy: ParsePolicy, ) -> Result<(Vec>, std::collections::HashSet)> { use std::collections::HashSet; @@ -584,13 +621,12 @@ fn apply_slang_filters<'a>( } } - // Discriminate encrypted IP (legal SystemVerilog, just hard to parse — auto-tolerated) - // from genuinely broken files (real syntax bugs — abort unless `--allow-broken`). - // Encrypted ⇔ slang emitted a `ProtectedEnvelope` diag on that tree. Each tree carries its - // own source path, so we identify files by path without a separate index map. + // Discriminate encrypted IP (legal SystemVerilog, just hard to parse) from genuinely broken + // files (real syntax bugs). Encrypted ⇔ slang emitted a `ProtectedEnvelope` diag on that + // tree. Each tree carries its own source path, so we identify files by path directly. let all_trees = session.all_trees(); let mut encrypted_paths: HashSet = HashSet::new(); - let mut broken_paths: Vec = Vec::new(); + let mut broken_paths: HashSet = HashSet::new(); for parsed in &all_trees { if parsed.parsed_ok { continue; @@ -598,31 +634,36 @@ fn apply_slang_filters<'a>( if parsed.encrypted { encrypted_paths.insert(PathBuf::from(&parsed.path)); } else { - broken_paths.push(PathBuf::from(&parsed.path)); + broken_paths.insert(PathBuf::from(&parsed.path)); } } - if !broken_paths.is_empty() && !allow_broken { - let listed = broken_paths - .iter() - .map(|p| p.display().to_string()) - .collect::>() - .join("\n "); + let list = |set: &HashSet| -> String { + let mut v: Vec = set.iter().map(|p| p.display().to_string()).collect(); + v.sort(); + v.join("\n ") + }; + + // Policy enforcement: abort up front for any `Error` class. + if broken_policy == ParsePolicy::Error && !broken_paths.is_empty() { return Err(miette::miette!( "slang reported parse errors in {} file(s) with no `pragma protect` envelope (looks like real syntax bugs):\n {}\n\ - see diagnostics above; pass --allow-broken to tolerate these errors and continue", + see diagnostics above; pass --broken keep or --broken drop to continue", broken_paths.len(), - listed + list(&broken_paths) + )); + } + if encrypted_policy == ParsePolicy::Error && !encrypted_paths.is_empty() { + return Err(miette::miette!( + "slang reported parse errors in {} encrypted file(s) and --encrypted error was requested:\n {}", + encrypted_paths.len(), + list(&encrypted_paths) )); } - // From here on, both encrypted (always) and broken (only with --allow-broken) survive as - // "unparseable" — kept or dropped per `drop_unparseable`. - let unparseable_paths: HashSet = encrypted_paths - .iter() - .cloned() - .chain(broken_paths.iter().cloned()) - .collect(); + // After Error-class abort: any remaining unparseable file is either Kept or Dropped. + let broken_kept = broken_policy == ParsePolicy::Keep; + let encrypted_kept = encrypted_policy == ParsePolicy::Keep; // Determine which trees feed into the include / file-retention questions. With `--top` we // only look at trees reachable from those top modules; without `--top` we use every tree @@ -660,16 +701,20 @@ fn apply_slang_filters<'a>( }; // Single retention rule per Verilog file: - // * unparseable files keep iff !drop_unparseable (regardless of reachability) + // * broken files keep iff `broken_policy == Keep` (regardless of reachability) + // * encrypted files keep iff `encrypted_policy == Keep` (regardless of reachability) // * everything else keep iff no --top filter is active, or it's reachable from one let retain_verilog = |p: &Path| -> bool { - if unparseable_paths.contains(p) { - !drop_unparseable + if broken_paths.contains(p) { + broken_kept + } else if encrypted_paths.contains(p) { + encrypted_kept } else { !filter_files || kept_paths.contains(p) } }; - let run_file_filter = filter_files || drop_unparseable; + let drop_anything = broken_policy == ParsePolicy::Drop || encrypted_policy == ParsePolicy::Drop; + let run_file_filter = filter_files || drop_anything; let filtered: Vec> = srcs .into_iter() @@ -692,43 +737,36 @@ fn apply_slang_filters<'a>( .filter(|group| !group.files.is_empty()) .collect(); - // Summary lines: report encrypted and broken files separately so users can tell apart the - // automatic tolerance (encrypted) from the explicit one (broken, only here if - // --allow-broken was set since we'd have errored out otherwise). - let kept_unparseable: HashSet = if drop_unparseable { - HashSet::new() - } else { - unparseable_paths.iter().map(|p| p.to_path_buf()).collect() - }; - let verb = if drop_unparseable { - "dropped" - } else { - "kept in script output" - }; - if !encrypted_paths.is_empty() { - let names: Vec = encrypted_paths - .iter() - .map(|p| p.display().to_string()) - .collect(); - eprintln!( - "warning: {} encrypted file(s) ({}): {}", - names.len(), - verb, - names.join(", "), - ); - } - if !broken_paths.is_empty() { - let names: Vec = broken_paths - .iter() - .map(|p| p.display().to_string()) - .collect(); + // Summary lines: report encrypted and broken classes separately so users can tell apart + // automatic vs explicit tolerance and the inclusion verdict at a glance. + let kept_unparseable: HashSet = encrypted_paths + .iter() + .filter(|_| encrypted_kept) + .chain(broken_paths.iter().filter(|_| broken_kept)) + .cloned() + .collect(); + let class_summary = |label: &str, set: &HashSet, policy: ParsePolicy| { + if set.is_empty() || policy == ParsePolicy::Error { + // Error was handled above; Keep/Drop are the only branches that reach here. + return; + } + let verb = match policy { + ParsePolicy::Keep => "kept in script output", + ParsePolicy::Drop => "dropped", + ParsePolicy::Error => unreachable!(), + }; + let mut names: Vec = set.iter().map(|p| p.display().to_string()).collect(); + names.sort(); eprintln!( - "warning: {} broken file(s) ({} via --allow-broken): {}", + "warning: {} {} file(s) ({}): {}", names.len(), + label, verb, names.join(", "), ); - } + }; + class_summary("encrypted", &encrypted_paths, encrypted_policy); + class_summary("broken", &broken_paths, broken_policy); Ok((filtered, kept_unparseable)) } diff --git a/tests/script.rs b/tests/script.rs index 0fac7871..a3d3b0ed 100644 --- a/tests/script.rs +++ b/tests/script.rs @@ -220,15 +220,17 @@ mod tests { ); } - /// `--drop-unparseable` drops files slang couldn't parse, even when `--top` keeps the rest. + /// `--encrypted drop` removes encrypted files from the output even when `--top` keeps + /// the rest of the reachable design. #[test] - fn script_drop_unparseable_excludes_encrypted_file() { + fn script_encrypted_drop_excludes_encrypted_file() { let out = run_script(&[ "--target", "encrypted", "--top", "encrypted_top", - "--drop-unparseable", + "--encrypted", + "drop", "flist-plus", ]); let files = source_basenames(&out); @@ -236,7 +238,24 @@ mod tests { assert!(files.contains("encrypted_user.sv")); assert!( !files.contains("encrypted_ip.sv"), - "encrypted IP should be dropped by --drop-unparseable: {files:?}" + "encrypted IP should be dropped by --encrypted drop: {files:?}" + ); + } + + /// `--encrypted error` makes the run abort when any encrypted file is encountered — useful + /// as a CI lint for codebases that should be encryption-free. + #[test] + fn script_encrypted_error_aborts() { + let stderr = run_script_failing(&[ + "--target", + "encrypted", + "--encrypted", + "error", + "flist-plus", + ]); + assert!( + stderr.contains("encrypted file(s)") && stderr.contains("--encrypted error"), + "expected encrypted-lint abort message, got stderr:\n{stderr}" ); } @@ -259,7 +278,7 @@ mod tests { } /// A file with a real syntax error (no `pragma protect` envelope) should abort the run by - /// default — that's the discrimination from encrypted IP, which is auto-tolerated. + /// default — `--broken` defaults to `error`, while encrypted IP is auto-tolerated. #[test] fn script_broken_file_fails_by_default() { let stderr = run_script_failing(&["--target", "broken", "--top", "broken", "flist-plus"]); @@ -268,27 +287,72 @@ mod tests { "expected real-bug error message, got stderr:\n{stderr}" ); assert!( - stderr.contains("--allow-broken"), - "error should mention --allow-broken escape hatch:\n{stderr}" + stderr.contains("--broken keep") || stderr.contains("--broken drop"), + "error should point at the --broken keep/drop escape hatches:\n{stderr}" ); } - /// `--allow-broken` lets users opt into tolerance for non-encrypted parse errors. The - /// broken file is kept in the output with a warning instead of aborting. + /// `--broken keep` lets users opt into tolerance for non-encrypted parse errors. The broken + /// file is kept in the output with a warning instead of aborting. #[test] - fn script_broken_file_allow_broken_keeps_it() { + fn script_broken_keep_includes_broken_file() { let out = run_script(&[ "--target", "broken", "--top", "broken", - "--allow-broken", + "--broken", + "keep", "flist-plus", ]); let files = source_basenames(&out); assert!( files.contains("broken.sv"), - "broken.sv should survive with --allow-broken: {files:?}" + "broken.sv should survive with --broken keep: {files:?}" + ); + } + + /// `--broken drop` tolerates broken files but excludes them from the script — useful when + /// the downstream tool would choke on the broken file but you still want bender to finish. + #[test] + fn script_broken_drop_excludes_broken_file() { + let out = run_script(&[ + "--target", + "broken", + "--top", + "broken", + "--broken", + "drop", + "flist-plus", + ]); + let files = source_basenames(&out); + assert!( + !files.contains("broken.sv"), + "broken.sv should be dropped with --broken drop: {files:?}" + ); + } + + /// Explicit `--broken drop` triggers slang even without `--top`, so users can drop broken + /// files from a plain `bender script` output without setting up reachability filtering. + #[test] + fn script_broken_drop_without_top_excludes_broken_file() { + let out = run_script(&["--target", "broken", "--broken", "drop", "flist-plus"]); + let files = source_basenames(&out); + assert!( + !files.contains("broken.sv"), + "broken.sv should be dropped without --top when --broken drop is set: {files:?}" + ); + } + + /// `--broken keep` is a no-op without `--top` (keeping is what happens anyway when slang + /// doesn't run). Verifies broken.sv passes through with no warnings emitted. + #[test] + fn script_broken_keep_without_top_no_slang() { + let out = run_script(&["--target", "broken", "--broken", "keep", "flist-plus"]); + let files = source_basenames(&out); + assert!( + files.contains("broken.sv"), + "broken.sv should pass through with --broken keep: {files:?}" ); } From 6bb3bc697d53dffea50ad05fbf96a73be019b4d5 Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Thu, 4 Jun 2026 14:00:03 +0200 Subject: [PATCH 11/12] script: gate canonicalize imports on the slang feature The cfg-gated `canonicalize` imports (std::fs on unix, dunce on windows) are only used inside `apply_slang_filters`, which itself is already `#[cfg(feature = "slang")]`. Build with --no-default-features was emitting `unused import: std::fs::canonicalize`. Combine the cfg conditions so the imports only exist when the function that uses them does. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/cmd/script.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cmd/script.rs b/src/cmd/script.rs index 921d572c..ce1a915f 100644 --- a/src/cmd/script.rs +++ b/src/cmd/script.rs @@ -6,10 +6,10 @@ use std::io::Write; use std::path::{Path, PathBuf}; -#[cfg(unix)] +#[cfg(all(unix, feature = "slang"))] use std::fs::canonicalize; -#[cfg(windows)] +#[cfg(all(windows, feature = "slang"))] use dunce::canonicalize; use clap::{ArgAction, Args, Subcommand, ValueEnum}; From 3d4b11a6d90b0db1f06d13f0161865dc5746efe5 Mon Sep 17 00:00:00 2001 From: Michael Rogenmoser Date: Thu, 4 Jun 2026 14:02:23 +0200 Subject: [PATCH 12/12] script: group slang-related flags under their own help heading Move --top, --trim-incdirs, --broken, and --encrypted from "General Script Options" into "Slang Options" so they read together and make it obvious at a glance which flags require the slang feature. Mirrors the pattern already used in `bender pickle`. Help-text-only change; no behavior or test impact. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/cmd/script.rs | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/cmd/script.rs b/src/cmd/script.rs index ce1a915f..f1f5d5bc 100644 --- a/src/cmd/script.rs +++ b/src/cmd/script.rs @@ -89,7 +89,7 @@ pub struct ScriptArgs { /// Trim unreachable Verilog files via the given top-level module(s) #[cfg(feature = "slang")] - #[arg(long, global = true, help_heading = "General Script Options")] + #[arg(long, global = true, help_heading = "Slang Options")] pub top: Vec, /// Drop unused include directories from the generated script @@ -99,30 +99,20 @@ pub struct ScriptArgs { value_enum, default_value_t, global = true, - help_heading = "General Script Options" + help_heading = "Slang Options" )] pub trim_incdirs: TrimIncdirs, /// What to do with files slang reports parse errors on with no `pragma protect` envelope /// [implicit default: error when slang runs; no effect otherwise] #[cfg(feature = "slang")] - #[arg( - long, - value_enum, - global = true, - help_heading = "General Script Options" - )] + #[arg(long, value_enum, global = true, help_heading = "Slang Options")] pub broken: Option, /// What to do with IEEE-1735 encrypted files slang cannot fully parse /// [implicit default: keep when slang runs; no effect otherwise] #[cfg(feature = "slang")] - #[arg( - long, - value_enum, - global = true, - help_heading = "General Script Options" - )] + #[arg(long, value_enum, global = true, help_heading = "Slang Options")] pub encrypted: Option, /// Format of the generated script