From 2604201b3a190def66c3028b0085fa6577097ce1 Mon Sep 17 00:00:00 2001 From: LunaStev Date: Tue, 19 May 2026 20:34:05 +0900 Subject: [PATCH 1/2] feat: add `export(abi)` syntax, standalone Windows linkage, and inline ASM stack contracts This commit introduces several core features requested for systems programming and bare-metal environments. It adds explicit `export(abi)` and `export(abi, "symbol")` syntax to the frontend, allowing Wave to export functions to C/assembly callers without relying on hacky extern definitions. It also finalizes the Windows cross-compilation pipeline by providing bundled linking via `ld.lld`, eliminating the dependency on host GCC/MinGW installations. [Details] 1. `export(abi)` Support: - Added new `TokenType::Export` to the lexer and `parse_export` to the parser. - Functions can now be explicitly exported with specific ABIs (e.g., `export(c) fun my_func() {}`), allowing LLVM to assign them `External` linkage and proper calling conventions. - Supports both single-function (`export(c) fun`) and block-based (`export(c) { fun ... }`) declarations, similar to the `extern` block syntax. - Added `export(c, "symbol")` to assign custom global symbol names, which is crucial for defining OS entry points (e.g., `_start` or `efi_main`). 2. Windows GNU Linkage Improvements: - `build_windows_gnu_linker_args` now natively prefers `ld.lld` if bundled, rather than deferring to a system GCC. - The build system automatically links core Windows dynamic libraries (`-lkernel32`, `-luser32`, `-lmsvcrt`, etc.) and the MinGW self-contained CRT (`crt2.o`) when standard libraries are enabled. 3. Inline Assembly Stack Contracts: - Implemented advanced static analysis in `x86_64_stack_analysis`, `aarch64_stack_analysis`, and `riscv64_stack_analysis` to inspect inline assembly for instructions that mutate the stack pointer (`push`, `pop`, `sub rsp`, etc.) or perform branching (`jmp`, `ret`, `call`). - Added pseudo-clobber strings: `clobber("stack")`, `clobber("nostack")`, and `clobber("noreturn")`. - The compiler now rigidly enforces stack contracts, immediately triggering a compile error if an inline ASM block modifies the stack pointer unpredictably without declaring `clobber("stack")` or `clobber("noreturn")`. - Added `builder.build_unreachable()` generation for `clobber("noreturn")` inline assembly, helping LLVM optimize kernel-level dead ends. 4. LLVM Code Generation Attributes: - Functions compiled for Freestanding (`--freestanding` or `*-none-elf`) targets now automatically receive `noredzone` and `nounwind` attributes, ensuring safe interrupt handling and avoiding implicit exception propagation in kernel space. 5. Frontend/Parser Improvements: - Fixed an issue where the `deref` pseudo-variable assignment fallback would leak into LLVM codegen. The parser now properly lowers `deref ptr = x` into an explicit `Expression::Assignment` target. - Refactored `parse_extern_header` into a generalized `parse_ffi_header` to share parsing logic between `extern` and `export`. Signed-off-by: LunaStev --- README.md | 18 +- front/lexer/src/ident.rs | 5 + front/lexer/src/token.rs | 1 + front/parser/src/ast.rs | 7 + front/parser/src/parser/decl.rs | 28 +- front/parser/src/parser/functions.rs | 77 ++++- front/parser/src/parser/parse.rs | 24 +- front/parser/src/parser/stmt.rs | 38 +- llvm/src/backend.rs | 1 + llvm/src/codegen/ir.rs | 100 +++++- llvm/src/codegen/plan.rs | 383 ++++++++++++++++++++- llvm/src/expression/rvalue/asm.rs | 8 +- llvm/src/expression/rvalue/assign.rs | 53 ++- llvm/src/statement/asm.rs | 7 +- llvm/src/statement/assign.rs | 80 +---- src/cli.rs | 1 + src/flags.rs | 1 + src/runner.rs | 1 + test/test107.wave | 13 + test/test108.wave | 64 ++++ tests/codegen_regressions.rs | 495 +++++++++++++++++++++++++++ 21 files changed, 1279 insertions(+), 126 deletions(-) create mode 100644 test/test107.wave create mode 100644 test/test108.wave create mode 100644 tests/codegen_regressions.rs diff --git a/README.md b/README.md index 497cbe95..f1868102 100644 --- a/README.md +++ b/README.md @@ -82,30 +82,36 @@ Wave follows a tiered platform policy to set clear expectations for stability, C

- ๐Ÿฅ‡ Tier 1 ยท Primary โ€” Linux, Darwin, WaveOS + ๐Ÿฅ‡ Tier 1 ยท Primary โ€” Linux / ELF, Darwin/macOS, WaveOS / Freestanding
  • Full standard library support
  • Required CI coverage
  • ABI stability commitment
  • Release-blocking platforms
  • +
  • Official release packaging target
๐Ÿฅˆ Tier 2 ยท Secondary โ€” FreeBSD, Redox, Fuchsia
    -
  • Build support maintained
  • +
  • Official support target, but not release-blocking
  • +
  • Object generation support expected
  • +
  • Binary linking supported when a valid sysroot/toolchain is provided
  • Partial standard library coverage
  • +
  • Clear diagnostics required for missing sysroot, CRT, linker, or libc support
  • Open to community collaboration
- ๐Ÿฅ‰ Tier 3 ยท Experimental โ€” OpenBSD, Windows (MinGW/GNU) + ๐Ÿฅ‰ Tier 3 ยท Experimental โ€” Windows / PE-COFF, OpenBSD, NetBSD, DragonFlyBSD, Haiku
    -
  • Cross-compilation from Linux supported
  • -
  • Basic standard library coverage (via Wine/MinGW)
  • -
  • Experimental support for native Windows binaries
  • +
  • Experimental target support
  • +
  • Cross-compilation may be available on a best-effort basis
  • +
  • Object generation is prioritized over full hosted binary execution
  • +
  • Standard library coverage may be incomplete
  • +
  • Native execution, packaging, and installer support are not guaranteed
diff --git a/front/lexer/src/ident.rs b/front/lexer/src/ident.rs index 0fc5bef6..11d8caf4 100644 --- a/front/lexer/src/ident.rs +++ b/front/lexer/src/ident.rs @@ -45,6 +45,11 @@ impl<'a> Lexer<'a> { lexeme: "extern".to_string(), line: self.line, }, + "export" => Token { + token_type: TokenType::Export, + lexeme: "export".to_string(), + line: self.line, + }, "type" => Token { token_type: TokenType::Type, lexeme: "type".to_string(), diff --git a/front/lexer/src/token.rs b/front/lexer/src/token.rs index 7fa1d245..bb9bd592 100644 --- a/front/lexer/src/token.rs +++ b/front/lexer/src/token.rs @@ -82,6 +82,7 @@ impl fmt::Display for UnsignedIntegerType { pub enum TokenType { Fun, Extern, + Export, Type, Enum, Static, diff --git a/front/parser/src/ast.rs b/front/parser/src/ast.rs index 41674307..ad23412e 100644 --- a/front/parser/src/ast.rs +++ b/front/parser/src/ast.rs @@ -74,6 +74,13 @@ pub struct FunctionNode { pub parameters: Vec, pub return_type: Option, pub body: Vec, + pub export: Option, +} + +#[derive(Debug, Clone)] +pub struct ExportAttribute { + pub abi: String, + pub symbol: Option, } #[derive(Debug, Clone)] diff --git a/front/parser/src/parser/decl.rs b/front/parser/src/parser/decl.rs index b3c819a8..e2c5a598 100644 --- a/front/parser/src/parser/decl.rs +++ b/front/parser/src/parser/decl.rs @@ -364,11 +364,18 @@ fn expect(tokens: &mut Peekable>, ty: TokenType, msg: &str) -> b } } -/// extern(abi) ... / extern(abi, "sym") ... -fn parse_extern_header(tokens: &mut Peekable>) -> Option<(String, Option)> { +/// (abi) ... / (abi, "sym") ... +pub(super) fn parse_ffi_header( + tokens: &mut Peekable>, + keyword: &str, +) -> Option<(String, Option)> { skip_ws(tokens); - if !expect(tokens, TokenType::Lparen, "Expected '(' after 'extern'") { + if !expect( + tokens, + TokenType::Lparen, + &format!("Expected '(' after '{}'", keyword), + ) { return None; } @@ -381,8 +388,8 @@ fn parse_extern_header(tokens: &mut Peekable>) -> Option<(String }) => name.clone(), other => { println!( - "Error: Expected ABI identifier in extern(...), found {:?}", - other + "Error: Expected ABI identifier in {}(...), found {:?}", + keyword, other ); return None; } @@ -403,8 +410,8 @@ fn parse_extern_header(tokens: &mut Peekable>) -> Option<(String }) => Some(s.clone()), other => { println!( - "Error: Expected string literal after ',' in extern(...), found {:?}", - other + "Error: Expected string literal after ',' in {}(...), found {:?}", + keyword, other ); return None; } @@ -416,7 +423,7 @@ fn parse_extern_header(tokens: &mut Peekable>) -> Option<(String if !expect( tokens, TokenType::Rparen, - "Expected ')' to close extern(...)", + &format!("Expected ')' to close {}(...)", keyword), ) { return None; } @@ -424,6 +431,11 @@ fn parse_extern_header(tokens: &mut Peekable>) -> Option<(String Some((abi, global_symbol)) } +/// extern(abi) ... / extern(abi, "sym") ... +fn parse_extern_header(tokens: &mut Peekable>) -> Option<(String, Option)> { + parse_ffi_header(tokens, "extern") +} + fn peek_non_ws_token_type(tokens: &Peekable>) -> Option { let mut it = tokens.clone(); while let Some(t) = it.peek() { diff --git a/front/parser/src/parser/functions.rs b/front/parser/src/parser/functions.rs index 7780e1f8..e46160f0 100644 --- a/front/parser/src/parser/functions.rs +++ b/front/parser/src/parser/functions.rs @@ -10,7 +10,7 @@ // SPDX-License-Identifier: MPL-2.0 // AI TRAINING NOTICE: Prohibited without prior written permission. No use for machine learning or generative AI training, fine-tuning, distillation, embedding, or dataset creation. -use crate::ast::{ASTNode, FunctionNode, ParameterNode, StatementNode, Value}; +use crate::ast::{ASTNode, ExportAttribute, FunctionNode, ParameterNode, StatementNode, Value}; use crate::expr::parse_expression; use crate::parser::asm::*; use crate::parser::control::*; @@ -199,6 +199,13 @@ pub fn parse_parameters(tokens: &mut Peekable>) -> Vec>) -> Option { + parse_function_with_export(tokens, None) +} + +pub fn parse_function_with_export( + tokens: &mut Peekable>, + export: Option, +) -> Option { tokens.next(); skip_ws(tokens); @@ -252,9 +259,77 @@ pub fn parse_function(tokens: &mut Peekable>) -> Option { parameters, body, return_type, + export, })) } +pub fn parse_export(tokens: &mut Peekable>) -> Option> { + let (abi, global_symbol) = parse_ffi_header(tokens, "export")?; + let export = ExportAttribute { + abi, + symbol: global_symbol, + }; + + skip_ws(tokens); + + if tokens.peek().map(|t| t.token_type.clone()) == Some(TokenType::Lbrace) { + if export.symbol.is_some() { + println!("Error: export block cannot use a single symbol alias"); + return None; + } + + tokens.next(); + + let mut nodes = Vec::new(); + loop { + skip_ws(tokens); + + match tokens.peek().map(|t| t.token_type.clone()) { + Some(TokenType::Rbrace) => { + tokens.next(); + break; + } + Some(TokenType::Fun) => { + let node = parse_function_with_export(tokens, Some(export.clone()))?; + if let ASTNode::Function(func) = &node { + if !func.generic_params.is_empty() { + println!("Error: exported functions cannot be generic"); + return None; + } + } + nodes.push(node); + } + Some(TokenType::Whitespace) | Some(TokenType::Newline) => { + tokens.next(); + } + other => { + println!("Error: Unexpected token in export block: {:?}", other); + return None; + } + } + } + + skip_ws(tokens); + if tokens.peek().map(|t| t.token_type.clone()) == Some(TokenType::SemiColon) { + tokens.next(); + } + + Some(nodes) + } else if tokens.peek().map(|t| t.token_type.clone()) == Some(TokenType::Fun) { + let node = parse_function_with_export(tokens, Some(export))?; + if let ASTNode::Function(func) = &node { + if !func.generic_params.is_empty() { + println!("Error: exported functions cannot be generic"); + return None; + } + } + Some(vec![node]) + } else { + println!("Error: Expected 'fun' or '{{' after export(...)"); + None + } +} + pub fn extract_body(tokens: &mut Peekable>) -> Option> { let mut body = vec![]; diff --git a/front/parser/src/parser/parse.rs b/front/parser/src/parser/parse.rs index cb31e717..6a2c9bf9 100644 --- a/front/parser/src/parser/parse.rs +++ b/front/parser/src/parser/parse.rs @@ -12,7 +12,7 @@ use crate::ast::ASTNode; use crate::parser::decl::*; -use crate::parser::functions::parse_function; +use crate::parser::functions::{parse_export, parse_function}; use crate::parser::items::*; use crate::verification::*; use lexer::token::TokenType; @@ -234,6 +234,26 @@ pub fn parse_syntax_only(tokens: &[Token]) -> Result, ParseError> { .with_help("check ABI syntax, function signature, and separators")); } } + TokenType::Export => { + let anchor = (*token).clone(); + iter.next(); + if let Some(export_nodes) = parse_export(&mut iter) { + nodes.extend(export_nodes); + } else { + return Err(ParseError::syntax_at( + Some(&anchor), + "failed to parse export declaration", + ) + .with_context("top-level export block/declaration") + .with_expected_many([ + "export(c) fun name(...) { ... }", + "export(c, \"symbol\") fun name(...) { ... }", + "export(c) { fun a(...) { ... } fun b(...) { ... } }", + ]) + .with_found_token(iter.peek().copied()) + .with_help("exports require a concrete non-generic function body")); + } + } TokenType::Const => { let anchor = (*token).clone(); iter.next(); @@ -356,7 +376,7 @@ pub fn parse_syntax_only(tokens: &[Token]) -> Result, ParseError> { .with_context("top-level items") .with_expected_many([ "import", "extern", "const", "static", "type", "enum", "struct", - "proto", "fun", + "proto", "fun", "export", ]) .with_found_token(Some(token)) .with_help("only declarations are allowed at top level"), diff --git a/front/parser/src/parser/stmt.rs b/front/parser/src/parser/stmt.rs index 73debd6d..8444a40e 100644 --- a/front/parser/src/parser/stmt.rs +++ b/front/parser/src/parser/stmt.rs @@ -10,7 +10,7 @@ // SPDX-License-Identifier: MPL-2.0 // AI TRAINING NOTICE: Prohibited without prior written permission. No use for machine learning or generative AI training, fine-tuning, distillation, embedding, or dataset creation. -use crate::ast::{ASTNode, AssignOperator, Expression, Operator, StatementNode}; +use crate::ast::{ASTNode, AssignOperator, Expression, StatementNode}; use crate::expr::{is_assignable, parse_expression, parse_expression_from_token}; use crate::parser::control::{parse_for, parse_if, parse_match, parse_while}; use crate::parser::decl::{parse_let, parse_var}; @@ -93,28 +93,22 @@ pub fn parse_assignment( ))) } - None => match left_expr { - Expression::Variable(name) => Some(ASTNode::Statement(StatementNode::Assign { - variable: name, - value: right_expr, - })), - - other => { - if !is_assignable(&other) { - println!("Error: Unsupported assignment left expression: {:?}", other); - return None; - } - - Some(ASTNode::Statement(StatementNode::Assign { - variable: "deref".to_string(), - value: Expression::BinaryExpression { - left: Box::new(other), - operator: Operator::Assign, - right: Box::new(right_expr), - }, - })) + None => { + if !is_assignable(&left_expr) { + println!( + "Error: Unsupported assignment left expression: {:?}", + left_expr + ); + return None; } - }, + + Some(ASTNode::Statement(StatementNode::Expression( + Expression::Assignment { + target: Box::new(left_expr), + value: Box::new(right_expr), + }, + ))) + } } } diff --git a/llvm/src/backend.rs b/llvm/src/backend.rs index 2db2403b..b5ec7f4c 100644 --- a/llvm/src/backend.rs +++ b/llvm/src/backend.rs @@ -26,6 +26,7 @@ pub struct BackendOptions { pub linker: Option, pub link_args: Vec, pub no_default_libs: bool, + pub freestanding: bool, } fn is_windows_gnu_target(target: Option<&str>) -> bool { diff --git a/llvm/src/codegen/ir.rs b/llvm/src/codegen/ir.rs index aaf2d23a..db640d01 100644 --- a/llvm/src/codegen/ir.rs +++ b/llvm/src/codegen/ir.rs @@ -10,8 +10,9 @@ // SPDX-License-Identifier: MPL-2.0 // AI TRAINING NOTICE: Prohibited without prior written permission. No use for machine learning or generative AI training, fine-tuning, distillation, embedding, or dataset creation. +use inkwell::attributes::{Attribute, AttributeLoc}; use inkwell::context::Context; -use inkwell::module::Module; +use inkwell::module::{Linkage, Module}; use inkwell::passes::PassBuilderOptions; use inkwell::types::{BasicMetadataTypeEnum, BasicType, BasicTypeEnum}; use inkwell::values::{BasicValue, BasicValueEnum, FunctionValue}; @@ -29,7 +30,7 @@ use std::collections::{HashMap, HashSet}; use std::sync::Once; use crate::backend::BackendOptions; -use crate::codegen::target::require_supported_target_from_triple; +use crate::codegen::target::{require_supported_target_from_triple, CodegenTarget}; use crate::statement::generate_statement_ir; use super::consts::{create_llvm_const_value, ConstEvalError}; @@ -75,6 +76,50 @@ fn target_opt_level_from_flag(opt_flag: &str) -> OptimizationLevel { } } +fn code_model_from_backend(backend: &BackendOptions, target: CodegenTarget) -> CodeModel { + if let Some(model) = backend.code_model.as_deref() { + return match model { + "default" => CodeModel::Default, + "jitdefault" | "jit-default" => CodeModel::JITDefault, + "small" => CodeModel::Small, + "kernel" => CodeModel::Kernel, + "medium" => CodeModel::Medium, + "large" => CodeModel::Large, + other => panic!("unsupported -C code-model={}", other), + }; + } + + match target { + CodegenTarget::FreestandingX86_64 => CodeModel::Kernel, + _ => CodeModel::Default, + } +} + +fn reloc_mode_from_backend(backend: &BackendOptions, target: CodegenTarget) -> RelocMode { + if let Some(model) = backend.relocation_model.as_deref() { + return match model { + "default" => RelocMode::Default, + "static" => RelocMode::Static, + "pic" | "pie" => RelocMode::PIC, + "dynamic-no-pic" | "dynamic_no_pic" => RelocMode::DynamicNoPic, + other => panic!("unsupported -C relocation-model={}", other), + }; + } + + if backend.freestanding + || matches!( + target, + CodegenTarget::FreestandingX86_64 + | CodegenTarget::FreestandingArm64 + | CodegenTarget::FreestandingRISCV64 + ) + { + RelocMode::Static + } else { + RelocMode::Default + } +} + static INIT_LLVM_TARGETS: Once = Once::new(); fn codegen_trace(step: &str) { @@ -117,6 +162,32 @@ fn should_run_llvm_pass_pipeline() -> bool { !cfg!(target_os = "windows") } +fn should_disable_red_zone(backend: &BackendOptions, target: CodegenTarget) -> bool { + backend.freestanding + || matches!( + target, + CodegenTarget::FreestandingX86_64 + | CodegenTarget::FreestandingArm64 + | CodegenTarget::FreestandingRISCV64 + ) +} + +fn apply_function_codegen_attrs<'ctx>( + context: &'ctx Context, + function: FunctionValue<'ctx>, + disable_red_zone: bool, +) { + if disable_red_zone { + let no_red_zone = Attribute::get_named_enum_kind_id("noredzone"); + let attr = context.create_enum_attribute(no_red_zone, 0); + function.add_attribute(AttributeLoc::Function, attr); + + let no_unwind = Attribute::get_named_enum_kind_id("nounwind"); + let attr = context.create_enum_attribute(no_unwind, 0); + function.add_attribute(AttributeLoc::Function, attr); + } +} + pub unsafe fn generate_ir( ast_nodes: &[ASTNode], opt_flag: &str, @@ -193,10 +264,13 @@ fn build_module( TargetMachine::get_default_triple() }; let abi_target = require_supported_target_from_triple(&triple); + let disable_red_zone = should_disable_red_zone(backend, abi_target); codegen_trace("lookup target"); let target = Target::from_triple(&triple).unwrap(); let cpu = backend.cpu.as_deref().unwrap_or("generic"); let features = backend.features.as_deref().unwrap_or(""); + let reloc_mode = reloc_mode_from_backend(backend, abi_target); + let code_model = code_model_from_backend(backend, abi_target); codegen_trace("create target machine"); let tm = target @@ -205,8 +279,8 @@ fn build_module( cpu, features, target_opt_level_from_flag(opt_flag), - RelocMode::Default, - CodeModel::Default, + reloc_mode, + code_model, ) .unwrap(); @@ -400,9 +474,19 @@ fn build_module( name, parameters, return_type, + export, .. } in &function_nodes { + if let Some(export) = export { + if !is_supported_extern_abi(&export.abi) { + panic!( + "unsupported export ABI '{}' for function '{}': only export(c) is currently supported", + export.abi, name + ); + } + } + let param_types: Vec = parameters .iter() .map(|p| { @@ -428,7 +512,13 @@ fn build_module( } }; - let function = module.add_function(name, fn_type, None); + let llvm_name = export + .as_ref() + .and_then(|export| export.symbol.as_deref()) + .unwrap_or(name.as_str()); + let linkage = export.as_ref().map(|_| Linkage::External); + let function = module.add_function(llvm_name, fn_type, linkage); + apply_function_codegen_attrs(context, function, disable_red_zone); functions.insert(name.clone(), function); } diff --git a/llvm/src/codegen/plan.rs b/llvm/src/codegen/plan.rs index 600011d7..cdedac6e 100644 --- a/llvm/src/codegen/plan.rs +++ b/llvm/src/codegen/plan.rs @@ -29,6 +29,8 @@ pub struct AsmPlan<'a> { pub clobbers: Vec, pub has_side_effects: bool, + pub align_stack: bool, + pub noreturn: bool, } #[derive(Debug, Clone)] @@ -269,9 +271,9 @@ fn build_default_clobbers( | CodegenTarget::DarwinX86_64 | CodegenTarget::WindowsX86_64Gnu | CodegenTarget::FreestandingX86_64 => { - const GPRS: [&str; 16] = [ - "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "rbp", "rsp", "r8", "r9", "r10", - "r11", "r12", "r13", "r14", "r15", + const GPRS: [&str; 14] = [ + "rax", "rbx", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", + "r13", "r14", "r15", ]; for r in GPRS { @@ -371,6 +373,27 @@ fn normalize_special_clobber(target: CodegenTarget, token: &str) -> Option bool { + matches!( + normalize_token(token).as_str(), + "stack" | "uses_stack" | "uses-stack" + ) +} + +fn is_nostack_pseudo_clobber(token: &str) -> bool { + matches!( + normalize_token(token).as_str(), + "nostack" | "no_stack" | "no-stack" + ) +} + +fn is_noreturn_pseudo_clobber(token: &str) -> bool { + matches!( + normalize_token(token).as_str(), + "noreturn" | "no_return" | "no-return" + ) +} + fn normalize_clobber_item(target: CodegenTarget, s: &str) -> String { let t = s.trim(); @@ -427,6 +450,13 @@ fn merge_clobbers( let mut seen: HashSet = base.iter().cloned().collect(); for raw in user { + if is_stack_pseudo_clobber(raw) + || is_nostack_pseudo_clobber(raw) + || is_noreturn_pseudo_clobber(raw) + { + continue; + } + let c = normalize_clobber_item(target, raw); if let Some(inner) = c.strip_prefix("~{").and_then(|x| x.strip_suffix('}')) { @@ -447,6 +477,349 @@ fn merge_clobbers( base } +#[derive(Debug, Clone, Copy)] +struct StackContract { + stack_declared: bool, + nostack_declared: bool, + noreturn_declared: bool, +} + +fn stack_contract_from_user_clobbers(user: &[String]) -> StackContract { + let mut stack_declared = false; + let mut nostack_declared = false; + let mut noreturn_declared = false; + + for item in user { + if is_stack_pseudo_clobber(item) { + stack_declared = true; + } + if is_nostack_pseudo_clobber(item) { + nostack_declared = true; + } + if is_noreturn_pseudo_clobber(item) { + noreturn_declared = true; + } + } + + if stack_declared && nostack_declared { + panic!("asm cannot declare both clobber(\"stack\") and clobber(\"nostack\")"); + } + + StackContract { + stack_declared, + nostack_declared, + noreturn_declared, + } +} + +fn strip_inline_asm_comment(line: &str) -> &str { + line.split_once("//") + .map(|(code, _)| code) + .unwrap_or(line) + .split_once('#') + .map(|(code, _)| code) + .unwrap_or(line) +} + +fn asm_instruction_text(line: &str) -> String { + let mut code = strip_inline_asm_comment(line).trim().to_ascii_lowercase(); + + loop { + let Some((label, rest)) = code.split_once(':') else { + break; + }; + + let label = label.trim(); + if label.is_empty() + || !label + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_' || c == '.') + { + break; + } + + code = rest.trim().to_string(); + } + + code +} + +fn asm_mnemonic(code: &str) -> &str { + code.split(|c: char| c.is_ascii_whitespace() || c == ';') + .next() + .unwrap_or("") +} + +fn parse_x86_imm(raw: &str) -> Option { + let mut s = raw.trim(); + s = s.trim_start_matches('$'); + s = s.trim_start_matches('#'); + s = s.trim_end_matches(','); + + if let Some(hex) = s.strip_prefix("0x").or_else(|| s.strip_prefix("-0x")) { + let negative = s.starts_with('-'); + let value = i64::from_str_radix(hex, 16).ok()?; + return Some(if negative { -value } else { value }); + } + + s.parse::().ok() +} + +fn parse_x86_rsp_adjustment(code: &str) -> Option { + let mut parts = code + .split(|c: char| c.is_ascii_whitespace() || c == ',') + .filter(|p| !p.is_empty()); + + let op = parts.next()?; + let first = parts.next()?; + let second = parts.next()?; + + let sp_is_second = matches!(second, "rsp" | "%rsp" | "esp" | "%esp" | "sp" | "%sp"); + let sp_is_first = matches!(first, "rsp" | "%rsp" | "esp" | "%esp" | "sp" | "%sp"); + + match op { + "sub" | "subq" | "subl" if sp_is_second => parse_x86_imm(first).map(|v| -v), + "add" | "addq" | "addl" if sp_is_second => parse_x86_imm(first), + "sub" | "subq" | "subl" if sp_is_first => parse_x86_imm(second).map(|v| -v), + "add" | "addq" | "addl" if sp_is_first => parse_x86_imm(second), + _ => None, + } +} + +fn x86_jmp_operand_is_indirect(code: &str) -> bool { + let mut parts = code + .split(|c: char| c.is_ascii_whitespace() || c == ',') + .filter(|p| !p.is_empty()); + + let _op = parts.next(); + let Some(operand) = parts.next() else { + return false; + }; + let operand = operand.trim_start_matches('*').trim_start_matches('%'); + + operand.starts_with('[') + || matches!( + reg_phys_group_x86_64(operand), + Some( + "rax" + | "rbx" + | "rcx" + | "rdx" + | "rsi" + | "rdi" + | "rbp" + | "rsp" + | "r8" + | "r9" + | "r10" + | "r11" + | "r12" + | "r13" + | "r14" + | "r15" + ) + ) +} + +#[derive(Debug, Default, Clone, Copy)] +struct StackAnalysis { + touches_stack: bool, + unknown_stack_write: bool, + unbalanced_delta: i64, + nonreturning_branch: bool, +} + +fn x86_64_stack_analysis(line: &str) -> StackAnalysis { + let code = asm_instruction_text(line); + if code.is_empty() { + return StackAnalysis::default(); + } + + let first = asm_mnemonic(&code); + let mut out = StackAnalysis::default(); + + match first { + "call" | "callq" => { + out.touches_stack = true; + return out; + } + "push" | "pushq" => { + out.touches_stack = true; + out.unbalanced_delta = -8; + return out; + } + "pop" | "popq" => { + out.touches_stack = true; + out.unbalanced_delta = 8; + return out; + } + "ret" | "retq" => { + out.touches_stack = true; + out.unbalanced_delta = 8; + return out; + } + "retf" | "retfq" => { + out.touches_stack = true; + out.unbalanced_delta = 16; + return out; + } + "iret" | "iretq" => { + out.touches_stack = true; + out.unknown_stack_write = true; + return out; + } + "leave" | "enter" => { + out.touches_stack = true; + out.unknown_stack_write = true; + return out; + } + "jmp" | "jmpq" => { + out.nonreturning_branch = x86_jmp_operand_is_indirect(&code); + return out; + } + _ => {} + } + + if let Some(delta) = parse_x86_rsp_adjustment(&code) { + out.touches_stack = true; + out.unbalanced_delta = delta; + return out; + } + + let writes_sp = code.starts_with("mov rsp") + || code.starts_with("movq rsp") + || code.starts_with("mov %rsp") + || code.starts_with("movq %rsp") + || code.starts_with("and rsp") + || code.starts_with("andq rsp") + || code.starts_with("and %rsp") + || code.starts_with("andq %rsp") + || code.starts_with("xor rsp") + || code.starts_with("xor %rsp") + || code.starts_with("lea rsp") + || code.starts_with("lea %rsp"); + + if writes_sp { + out.touches_stack = true; + out.unknown_stack_write = true; + return out; + } + + out.touches_stack = code.contains("rsp") + || code.contains("esp") + || code.contains("[sp") + || code.contains(" sp,") + || code.contains(", sp"); + out +} + +fn aarch64_stack_analysis(line: &str) -> StackAnalysis { + let code = asm_instruction_text(line); + if code.is_empty() { + return StackAnalysis::default(); + } + + let mut out = StackAnalysis::default(); + let first = asm_mnemonic(&code); + + out.nonreturning_branch = matches!(first, "br"); + out.touches_stack = code == "ret" + || code.starts_with("ret ") + || code.starts_with("bl ") + || code.starts_with("blr ") + || code.contains(" sp,") + || code.contains(", sp") + || code.contains("[sp"); + if out.touches_stack && code.contains(" sp,") { + out.unknown_stack_write = true; + } + out +} + +fn riscv64_stack_analysis(line: &str) -> StackAnalysis { + let code = asm_instruction_text(line); + if code.is_empty() { + return StackAnalysis::default(); + } + + let mut out = StackAnalysis::default(); + let first = asm_mnemonic(&code); + + out.nonreturning_branch = matches!(first, "jr"); + out.touches_stack = code == "ret" + || code.starts_with("call ") + || code.starts_with("jal ") + || code.starts_with("jalr ") + || code.contains(" sp,") + || code.contains(", sp") + || code.contains("(sp)"); + if out.touches_stack && (code.contains(" sp,") || code.starts_with("addi sp")) { + out.unknown_stack_write = true; + } + out +} + +fn asm_stack_analysis(target: CodegenTarget, instructions: &[String]) -> StackAnalysis { + let mut total = StackAnalysis::default(); + + for line in instructions { + let item = match target { + CodegenTarget::LinuxX86_64 + | CodegenTarget::DarwinX86_64 + | CodegenTarget::WindowsX86_64Gnu + | CodegenTarget::FreestandingX86_64 => x86_64_stack_analysis(line), + CodegenTarget::LinuxArm64 + | CodegenTarget::DarwinArm64 + | CodegenTarget::FreestandingArm64 => aarch64_stack_analysis(line), + CodegenTarget::FreestandingRISCV64 => riscv64_stack_analysis(line), + }; + + total.touches_stack |= item.touches_stack; + total.unknown_stack_write |= item.unknown_stack_write; + total.nonreturning_branch |= item.nonreturning_branch; + total.unbalanced_delta += item.unbalanced_delta; + } + + total +} + +fn validate_stack_contract( + target: CodegenTarget, + instructions: &[String], + contract: StackContract, +) { + let analysis = asm_stack_analysis(target, instructions); + + if analysis.touches_stack && !contract.stack_declared { + panic!( + "asm touches the stack or performs a call/return; declare clobber(\"stack\") to make the stack contract explicit" + ); + } + + if analysis.touches_stack && contract.nostack_declared { + panic!("asm declares clobber(\"nostack\") but touches the stack or performs a call/return"); + } + + if analysis.nonreturning_branch && !contract.noreturn_declared { + panic!( + "asm contains a non-returning branch; declare clobber(\"noreturn\") so codegen can terminate the block explicitly" + ); + } + + if analysis.unknown_stack_write && !contract.noreturn_declared { + panic!( + "asm writes the stack pointer in a way codegen cannot prove balanced; restore the original stack pointer or declare clobber(\"noreturn\")" + ); + } + + if analysis.unbalanced_delta != 0 && !contract.noreturn_declared { + panic!( + "asm stack delta is not balanced ({} bytes); restore the stack pointer or declare clobber(\"noreturn\")", + analysis.unbalanced_delta + ); + } +} + impl<'a> AsmPlan<'a> { pub fn build( target: CodegenTarget, @@ -458,6 +831,8 @@ impl<'a> AsmPlan<'a> { ) -> Self { let asm_code = instructions.join("\n"); let asm_code = gcc_percent_to_llvm_dollar(&asm_code); + let stack_contract = stack_contract_from_user_clobbers(user_clobbers_raw); + validate_stack_contract(target, instructions, stack_contract); // outputs let mut used_out_phys: HashSet = HashSet::new(); @@ -565,6 +940,8 @@ impl<'a> AsmPlan<'a> { inputs, clobbers, has_side_effects: true, + align_stack: stack_contract.stack_declared, + noreturn: stack_contract.noreturn_declared, } } diff --git a/llvm/src/expression/rvalue/asm.rs b/llvm/src/expression/rvalue/asm.rs index 0153ab8d..944b2352 100644 --- a/llvm/src/expression/rvalue/asm.rs +++ b/llvm/src/expression/rvalue/asm.rs @@ -52,6 +52,10 @@ pub(crate) fn gen<'ctx, 'a>( ); let constraints_str = plan.constraints_string(); + if plan.noreturn { + panic!("asm expression cannot declare clobber(\"noreturn\")"); + } + let mut operand_vals: Vec> = Vec::with_capacity(plan.inputs.len()); for inp in &plan.inputs { let v = eval_asm_in_expr(env, inp.value); @@ -70,7 +74,7 @@ pub(crate) fn gen<'ctx, 'a>( plan.asm_code.clone(), constraints_str, plan.has_side_effects, - false, + plan.align_stack, Some(inline_asm_dialect_for_target(target)), false, ); @@ -104,7 +108,7 @@ pub(crate) fn gen<'ctx, 'a>( plan.asm_code.clone(), constraints_str, plan.has_side_effects, - false, + plan.align_stack, Some(inline_asm_dialect_for_target(target)), false, ); diff --git a/llvm/src/expression/rvalue/assign.rs b/llvm/src/expression/rvalue/assign.rs index 6279a566..568a74b9 100644 --- a/llvm/src/expression/rvalue/assign.rs +++ b/llvm/src/expression/rvalue/assign.rs @@ -14,8 +14,9 @@ use super::ExprGenEnv; use crate::codegen::types::TypeFlavor; use crate::codegen::{generate_address_ir, wave_type_to_llvm_type}; use crate::statement::variable::{coerce_basic_value, CoercionMode}; -use inkwell::types::{AsTypeRef, BasicTypeEnum}; +use inkwell::types::{AsTypeRef, BasicType, BasicTypeEnum}; use inkwell::values::{BasicValue, BasicValueEnum}; +use inkwell::AddressSpace; use parser::ast::{AssignOperator, Expression, WaveType}; fn normalize_struct_name(raw: &str) -> &str { @@ -243,12 +244,58 @@ fn materialize_for_store<'ctx, 'a>( } } +fn try_gen_store_through_rvalue_pointer<'ctx, 'a>( + env: &mut ExprGenEnv<'ctx, 'a>, + target: &Expression, + value: &Expression, +) -> Option> { + let Expression::Deref(inner) = target else { + return None; + }; + + if wave_type_of_lvalue(env, target).is_some() { + return None; + } + + if is_null_expr(value) { + panic!( + "cannot infer pointee type for null assignment through pointer expression: {:?}", + target + ); + } + + let mut rhs = env.gen(value, None); + let element_type = rhs.get_type(); + rhs = materialize_for_store(env, rhs, element_type, "rvalue_ptr_assign_agg_load"); + + let ptr_ty = env + .context + .ptr_type(AddressSpace::default()) + .as_basic_type_enum(); + let ptr_val = env.gen(inner, Some(ptr_ty)); + let BasicValueEnum::PointerValue(dst_ptr) = ptr_val else { + panic!( + "deref assignment target must evaluate to a pointer, got {:?}", + ptr_val.get_type() + ); + }; + + env.builder.build_store(dst_ptr, rhs).unwrap(); + Some(rhs) +} + pub(crate) fn gen_assign_operation<'ctx, 'a>( env: &mut ExprGenEnv<'ctx, 'a>, target: &Expression, operator: &AssignOperator, value: &Expression, ) -> BasicValueEnum<'ctx> { + if matches!(operator, AssignOperator::Assign) { + if let Some(v) = try_gen_store_through_rvalue_pointer(env, target, value) { + return v; + } + } + let ptr = generate_address_ir( env.context, env.builder, @@ -400,6 +447,10 @@ pub(crate) fn gen_assignment<'ctx, 'a>( target: &Expression, value: &Expression, ) -> BasicValueEnum<'ctx> { + if let Some(v) = try_gen_store_through_rvalue_pointer(env, target, value) { + return v; + } + let ptr = generate_address_ir( env.context, env.builder, diff --git a/llvm/src/statement/asm.rs b/llvm/src/statement/asm.rs index e880e1ef..631e5cb9 100644 --- a/llvm/src/statement/asm.rs +++ b/llvm/src/statement/asm.rs @@ -277,7 +277,7 @@ pub(super) fn gen_asm_stmt_ir<'ctx>( plan.asm_code.clone(), constraints_str, plan.has_side_effects, - false, + plan.align_stack, Some(inline_asm_dialect_for_target(target)), false, ); @@ -286,6 +286,11 @@ pub(super) fn gen_asm_stmt_ir<'ctx>( .build_indirect_call(fn_type, inline_asm, &operand_vals, "inline_asm") .unwrap(); + if plan.noreturn { + builder.build_unreachable().unwrap(); + return; + } + if out_places.is_empty() { return; } diff --git a/llvm/src/statement/assign.rs b/llvm/src/statement/assign.rs index 08f16567..f773d4e0 100644 --- a/llvm/src/statement/assign.rs +++ b/llvm/src/statement/assign.rs @@ -12,14 +12,12 @@ use crate::codegen::abi_c::ExternCInfo; use crate::codegen::types::TypeFlavor; -use crate::codegen::{ - generate_address_and_type_ir, generate_address_ir, wave_type_to_llvm_type, VariableInfo, -}; +use crate::codegen::{wave_type_to_llvm_type, VariableInfo}; use crate::expression::rvalue::generate_expression_ir; use crate::statement::variable::{coerce_basic_value, CoercionMode}; use inkwell::module::Module; use inkwell::targets::TargetData; -use inkwell::types::{BasicType, BasicTypeEnum, StructType}; +use inkwell::types::{BasicTypeEnum, StructType}; use inkwell::values::BasicValueEnum; use parser::ast::{Expression, Mutability}; use std::collections::HashMap; @@ -38,77 +36,9 @@ pub(super) fn gen_assign_ir<'ctx>( extern_c_info: &HashMap>, ) { if variable == "deref" { - if let Expression::BinaryExpression { left, right, .. } = value { - if let Expression::Deref(inner_expr) = &**left { - let target_ptr = generate_address_ir( - context, - builder, - inner_expr, - variables, - module, - struct_types, - struct_field_indices, - ); - - let expected_elem_ty: BasicTypeEnum<'ctx> = match &**inner_expr { - Expression::Variable(name) => { - let info = variables - .get(name) - .unwrap_or_else(|| panic!("Pointer var '{}' not declared", name)); - match &info.ty { - parser::ast::WaveType::Pointer(inner) => wave_type_to_llvm_type( - context, - inner.as_ref(), - struct_types, - TypeFlavor::Value, - ), - parser::ast::WaveType::String => context.i8_type().as_basic_type_enum(), - other => panic!("deref target is not a pointer/string: {:?}", other), - } - } - _ => { - let (_, ty) = generate_address_and_type_ir( - context, - builder, - inner_expr, - variables, - module, - struct_types, - struct_field_indices, - ); - ty - } - }; - - let mut val = generate_expression_ir( - context, - builder, - right, - variables, - module, - Some(expected_elem_ty), - global_consts, - struct_types, - struct_field_indices, - target_data, - extern_c_info, - ); - - if val.get_type() != expected_elem_ty { - val = coerce_basic_value( - context, - builder, - val, - expected_elem_ty, - "deref_assign_cast", - CoercionMode::Implicit, - ); - } - - builder.build_store(target_ptr, val).unwrap(); - } - } - return; + panic!( + "internal error: legacy StatementNode::Assign(\"deref\") reached codegen; parser must lower lvalue assignment to Expression::Assignment" + ); } let (dst_ptr, dst_mutability, dst_wave_ty) = { diff --git a/src/cli.rs b/src/cli.rs index ca4aa7e2..ce9066fa 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -311,6 +311,7 @@ fn effective_global_for_build(global: &Global, build: &BuildRequest) -> Global { if build.freestanding { out.llvm.no_default_libs = true; + out.llvm.freestanding = true; } if build.no_start_files { out.llvm.link_args.push("-nostartfiles".to_string()); diff --git a/src/flags.rs b/src/flags.rs index 328a2001..098d4237 100644 --- a/src/flags.rs +++ b/src/flags.rs @@ -78,6 +78,7 @@ pub struct LlvmFlags { pub linker: Option, pub link_args: Vec, pub no_default_libs: bool, + pub freestanding: bool, } #[derive(Default, Clone)] diff --git a/src/runner.rs b/src/runner.rs index 9e4c8e4a..e5bd56d8 100644 --- a/src/runner.rs +++ b/src/runner.rs @@ -890,6 +890,7 @@ fn build_backend_options(llvm: &LlvmFlags) -> BackendOptions { linker: llvm.linker.clone(), link_args: llvm.link_args.clone(), no_default_libs: llvm.no_default_libs, + freestanding: llvm.freestanding, } } diff --git a/test/test107.wave b/test/test107.wave new file mode 100644 index 00000000..3222eb99 --- /dev/null +++ b/test/test107.wave @@ -0,0 +1,13 @@ +export(c, "wave_add_i32") fun add_i32(a: i32, b: i32) -> i32 { + return a + b; +} + +export(c) { + fun wave_inc_i32(a: i32) -> i32 { + return a + 1; + } + + fun wave_dec_i32(a: i32) -> i32 { + return a - 1; + } +} diff --git a/test/test108.wave b/test/test108.wave new file mode 100644 index 00000000..e78ec474 --- /dev/null +++ b/test/test108.wave @@ -0,0 +1,64 @@ +const KERNEL_ENTRY_POINT: u64 = 0x200000; +const KERNEL_IMAGE_SIZE: u64 = 4; + +static embedded_kernel: array = [ + 0xB0, 0x45, 0xE6, 0xE9 +]; + +struct WaveBootInfo { + magic: u64; +} + +fun copy_memory(dst: ptr, src: ptr, size: u64) { + let mut i: u64 = 0; + + while (i < size) { + let d: ptr = (dst as u64 + i) as ptr; + let s: ptr = (src as u64 + i) as ptr; + let v: u8 = deref s; + + deref d = v; + + i = i + 1; + } +} + +fun load_embedded_kernel(kernel_base: u64) { + copy_memory(kernel_base as ptr, &embedded_kernel[0], KERNEL_IMAGE_SIZE); +} + +fun jump_to_kernel(kernel_entry: u64, boot_info: ptr, kernel_stack_top: u64) { + asm { + "cli" + "mov rsp, rdx" + "and rsp, -16" + "xor rbp, rbp" + + "mov rdi, rcx" + + "xor rax, rax" + "xor rbx, rbx" + "xor rsi, rsi" + "xor rdx, rdx" + "xor r8, r8" + "xor r9, r9" + "xor r10, r10" + "xor r12, r12" + "xor r13, r13" + "xor r14, r14" + "xor r15, r15" + + "jmp r11" + + in("r11") kernel_entry + in("rcx") boot_info + in("rdx") kernel_stack_top + clobber("stack") + clobber("noreturn") + } +} + +fun boot_smoke(boot_info: ptr, stack_top: u64) { + load_embedded_kernel(KERNEL_ENTRY_POINT); + jump_to_kernel(KERNEL_ENTRY_POINT, boot_info, stack_top); +} diff --git a/tests/codegen_regressions.rs b/tests/codegen_regressions.rs new file mode 100644 index 00000000..fa75aa42 --- /dev/null +++ b/tests/codegen_regressions.rs @@ -0,0 +1,495 @@ +// This file is part of the Wave language project. +// Copyright (c) 2024โ€“2026 Wave Foundation +// Copyright (c) 2024โ€“2026 LunaStev and contributors +// +// This Source Code Form is subject to the terms of the +// Mozilla Public License, v. 2.0. +// If a copy of the MPL was not distributed with this file, +// You can obtain one at https://mozilla.org/MPL/2.0/. +// +// SPDX-License-Identifier: MPL-2.0 +// AI TRAINING NOTICE: Prohibited without prior written permission. No use for machine learning or generative AI training, fine-tuning, distillation, embedding, or dataset creation. + +use std::ffi::OsStr; +use std::fs; +use std::path::{Path, PathBuf}; +use std::process::Command; + +fn wavec_bin() -> PathBuf { + if let Some(path) = option_env!("CARGO_BIN_EXE_wavec") { + return PathBuf::from(path); + } + + PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("target/debug/wavec") +} + +fn temp_case_dir(name: &str) -> PathBuf { + let dir = std::env::temp_dir().join(format!("wavec-{}-{}", name, std::process::id())); + let _ = fs::remove_dir_all(&dir); + fs::create_dir_all(&dir).unwrap(); + dir +} + +fn write_wave(dir: &Path, name: &str, source: &str) -> PathBuf { + let path = dir.join(name); + fs::write(&path, source).unwrap(); + path +} + +fn run_wavec(args: I) +where + I: IntoIterator, + S: AsRef, +{ + let output = Command::new(wavec_bin()).args(args).output().unwrap(); + assert!( + output.status.success(), + "wavec failed with status {}\nstdout:\n{}\nstderr:\n{}", + output.status, + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); +} + +fn run_wavec_expect_failure(args: I) -> String +where + I: IntoIterator, + S: AsRef, +{ + let output = Command::new(wavec_bin()).args(args).output().unwrap(); + assert!( + !output.status.success(), + "wavec unexpectedly succeeded\nstdout:\n{}\nstderr:\n{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ); + + format!( + "{}\n{}", + String::from_utf8_lossy(&output.stdout), + String::from_utf8_lossy(&output.stderr) + ) +} + +fn bytes_contains(haystack: &[u8], needle: &[u8]) -> bool { + haystack + .windows(needle.len()) + .any(|window| window == needle) +} + +#[test] +fn lvalue_store_updates_deref_index_and_struct_fields() { + let dir = temp_case_dir("lvalue-store"); + let src = write_wave( + &dir, + "lvalue_store.wave", + r#" +struct Pair { + a: i32; + b: i32; +} + +fun write_deref(p: ptr, v: i32) { + deref p = v; +} + +fun write_index(p: ptr, v: i32) { + p[1] = v; +} + +fun write_field(p: ptr, v: i32) { + p.b = v; +} + +fun id_ptr(p: ptr) -> ptr { + return p; +} + +fun main() -> i32 { + let mut x: i32 = 1; + write_deref(&x, 41); + if (x != 41) { + return 1; + } + + let mut arr: array = [1, 2, 3]; + write_index(&arr[0], 9); + if (arr[1] != 9) { + return 2; + } + + let mut pair: Pair = Pair { a: 7, b: 8 }; + write_field(&pair, 99); + if (pair.b != 99) { + return 3; + } + + pair.a = 12; + if (pair.a != 12) { + return 4; + } + + deref id_ptr(&x) = 77; + if (x != 77) { + return 5; + } + + return 0; +} +"#, + ); + + let target_dir = dir.join("target"); + run_wavec([ + OsStr::new("build"), + src.as_os_str(), + OsStr::new("--run"), + OsStr::new("--target-dir"), + target_dir.as_os_str(), + ]); +} + +#[test] +fn freestanding_codegen_marks_functions_no_red_zone() { + let dir = temp_case_dir("freestanding-noredzone"); + let src = write_wave( + &dir, + "leaf.wave", + r#" +fun leaf(a: i64, b: i64, c: i64, d: i64, e: i64) -> i64 { + let x: i64 = a + b; + let y: i64 = c + d; + return x + y + e; +} +"#, + ); + + let explicit_dir = dir.join("explicit"); + run_wavec([ + OsStr::new("build"), + src.as_os_str(), + OsStr::new("--freestanding"), + OsStr::new("--emit=ir"), + OsStr::new("--out-dir"), + explicit_dir.as_os_str(), + ]); + let explicit_out = explicit_dir.join("leaf.ll"); + let explicit_ir = fs::read_to_string(&explicit_out).unwrap(); + assert!( + explicit_ir.contains("noredzone"), + "--freestanding IR must carry the LLVM noredzone function attribute:\n{}", + explicit_ir + ); + assert!( + explicit_ir.contains("nounwind"), + "--freestanding IR must mark Wave functions nounwind:\n{}", + explicit_ir + ); + + let bare_dir = dir.join("bare"); + run_wavec([ + OsStr::new("build"), + src.as_os_str(), + OsStr::new("--target"), + OsStr::new("x86_64-unknown-none-elf"), + OsStr::new("--emit=ir"), + OsStr::new("--out-dir"), + bare_dir.as_os_str(), + ]); + let bare_out = bare_dir.join("leaf.ll"); + let bare_ir = fs::read_to_string(&bare_out).unwrap(); + assert!( + bare_ir.contains("noredzone"), + "bare-metal target IR must carry the LLVM noredzone function attribute:\n{}", + bare_ir + ); + assert!( + bare_ir.contains("nounwind"), + "bare-metal target IR must mark Wave functions nounwind:\n{}", + bare_ir + ); +} + +#[test] +fn inline_asm_requires_explicit_stack_contract() { + let dir = temp_case_dir("asm-stack-contract"); + let bad_src = write_wave( + &dir, + "bad_stack.wave", + r#" +fun main() { + asm { + "sub rsp, 8" + "add rsp, 8" + } +} +"#, + ); + + let bad_dir = dir.join("bad"); + let err = run_wavec_expect_failure([ + OsStr::new("build"), + bad_src.as_os_str(), + OsStr::new("--emit=ir"), + OsStr::new("--out-dir"), + bad_dir.as_os_str(), + ]); + assert!( + err.contains("clobber(\\\"stack\\\")") || err.contains("clobber(\"stack\")"), + "stack contract diagnostic should mention clobber(\"stack\"):\n{}", + err + ); + + let good_src = write_wave( + &dir, + "good_stack.wave", + r#" +fun main() { + asm { + "sub rsp, 8" + "add rsp, 8" + clobber("stack") + } +} +"#, + ); + let good_dir = dir.join("good"); + run_wavec([ + OsStr::new("build"), + good_src.as_os_str(), + OsStr::new("--emit=ir"), + OsStr::new("--out-dir"), + good_dir.as_os_str(), + ]); + let ir = fs::read_to_string(good_dir.join("good_stack.ll")).unwrap(); + assert!( + ir.contains("asm sideeffect alignstack"), + "stack-declared inline asm should be volatile and alignstack:\n{}", + ir + ); + + let unbalanced_src = write_wave( + &dir, + "unbalanced_stack.wave", + r#" +fun main() { + asm { + "sub rsp, 8" + clobber("stack") + } +} +"#, + ); + let unbalanced_dir = dir.join("unbalanced"); + let err = run_wavec_expect_failure([ + OsStr::new("build"), + unbalanced_src.as_os_str(), + OsStr::new("--emit=ir"), + OsStr::new("--out-dir"), + unbalanced_dir.as_os_str(), + ]); + assert!( + err.contains("stack delta is not balanced"), + "unbalanced stack asm should be rejected:\n{}", + err + ); + + let missing_noreturn_src = write_wave( + &dir, + "missing_noreturn.wave", + r#" +fun main() { + asm { + "jmp rax" + in("rax") 0 + } +} +"#, + ); + let missing_noreturn_dir = dir.join("missing-noreturn"); + let err = run_wavec_expect_failure([ + OsStr::new("build"), + missing_noreturn_src.as_os_str(), + OsStr::new("--emit=ir"), + OsStr::new("--out-dir"), + missing_noreturn_dir.as_os_str(), + ]); + assert!( + err.contains("clobber(\\\"noreturn\\\")") || err.contains("clobber(\"noreturn\")"), + "non-returning asm should require clobber(\"noreturn\"):\n{}", + err + ); + + let noreturn_src = write_wave( + &dir, + "noreturn.wave", + r#" +fun jump_out(addr: u64) { + asm { + "jmp rax" + in("rax") addr + clobber("noreturn") + } +} +"#, + ); + let noreturn_dir = dir.join("noreturn"); + run_wavec([ + OsStr::new("build"), + noreturn_src.as_os_str(), + OsStr::new("--emit=ir"), + OsStr::new("--out-dir"), + noreturn_dir.as_os_str(), + ]); + let ir = fs::read_to_string(noreturn_dir.join("noreturn.ll")).unwrap(); + assert!( + ir.contains("unreachable"), + "noreturn inline asm should terminate the current IR block:\n{}", + ir + ); +} + +#[test] +fn inline_asm_rejects_invalid_contracts_and_allows_local_jumps() { + let dir = temp_case_dir("asm-contract-extra"); + + let local_jump_src = write_wave( + &dir, + "local_jump.wave", + r#" +fun main() { + asm { + "jmp 1f" + "1:" + } +} +"#, + ); + let local_jump_dir = dir.join("local-jump"); + run_wavec([ + OsStr::new("build"), + local_jump_src.as_os_str(), + OsStr::new("--emit=ir"), + OsStr::new("--out-dir"), + local_jump_dir.as_os_str(), + ]); + + let conflict_src = write_wave( + &dir, + "conflicting_stack.wave", + r#" +fun main() { + asm { + "nop" + clobber("stack") + clobber("nostack") + } +} +"#, + ); + let conflict_dir = dir.join("conflict"); + let err = run_wavec_expect_failure([ + OsStr::new("build"), + conflict_src.as_os_str(), + OsStr::new("--emit=ir"), + OsStr::new("--out-dir"), + conflict_dir.as_os_str(), + ]); + assert!( + err.contains("cannot declare both"), + "stack/nostack conflict should be rejected:\n{}", + err + ); + + let expr_noreturn_src = write_wave( + &dir, + "expr_noreturn.wave", + r#" +fun main() -> i64 { + let x: i64 = asm { + "jmp rax" + in("rax") 0 + clobber("noreturn") + }; + return x; +} +"#, + ); + let expr_noreturn_dir = dir.join("expr-noreturn"); + let err = run_wavec_expect_failure([ + OsStr::new("build"), + expr_noreturn_src.as_os_str(), + OsStr::new("--emit=ir"), + OsStr::new("--out-dir"), + expr_noreturn_dir.as_os_str(), + ]); + assert!( + err.contains("asm expression cannot declare"), + "asm expressions must reject noreturn:\n{}", + err + ); + + let clobber_operand_conflict_src = write_wave( + &dir, + "clobber_operand_conflict.wave", + r#" +fun main() { + let x: i64 = 1; + asm { + "mov rax, rax" + in("rax") x + clobber("rax") + } +} +"#, + ); + let clobber_operand_conflict_dir = dir.join("clobber-operand-conflict"); + let err = run_wavec_expect_failure([ + OsStr::new("build"), + clobber_operand_conflict_src.as_os_str(), + OsStr::new("--emit=ir"), + OsStr::new("--out-dir"), + clobber_operand_conflict_dir.as_os_str(), + ]); + assert!( + err.contains("conflicts with an input/output operand register"), + "clobber/operand register conflict should be rejected:\n{}", + err + ); +} + +#[test] +fn waveos_boot_smoke_builds_windows_freestanding_coff_object() { + let dir = temp_case_dir("waveos-boot-smoke-coff"); + let source = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("test/test108.wave"); + let object = dir.join("waveos_boot_smoke.obj"); + + run_wavec([ + OsStr::new("build"), + source.as_os_str(), + OsStr::new("--target"), + OsStr::new("x86_64-pc-windows-gnu"), + OsStr::new("--freestanding"), + OsStr::new("--emit=obj"), + OsStr::new("-o"), + object.as_os_str(), + ]); + + let bytes = fs::read(&object).unwrap(); + assert!( + bytes_contains(&bytes, &[0xB0, 0x45, 0xE6, 0xE9]), + "COFF object must keep the embedded kernel byte array in .data" + ); + assert!( + bytes_contains(&bytes, b"embedded_kernel"), + "COFF object must keep a relocatable embedded_kernel symbol" + ); + assert!( + bytes_contains(&bytes, &[0x41, 0xFF, 0xE3]), + "jump_to_kernel must lower to an indirect jmp through r11" + ); + assert!( + !bytes_contains(&bytes, &[0x49, 0xC7, 0xC3, 0x00, 0x00, 0x20, 0x00]), + "jump_to_kernel must not hard-code mov r11, 0x200000" + ); +} From 2e676bb2218dfbe6dbdadd2949e99d6ddc48fdc4 Mon Sep 17 00:00:00 2001 From: LunaStev Date: Tue, 19 May 2026 20:57:03 +0900 Subject: [PATCH 2/2] Refactor tests: add `--target` parameters, gate runtime linker tests, and enhance IR validation This update introduces `run_link_tests_enabled` for conditional tests, specifies explicit targets (`x86_64-unknown-linux-gnu`), and refines IR checks to ensure expected GEP/store operations are present without requiring a host linker. Signed-off-by: LunaStev --- .github/workflows/rust.yml | 9 ++++--- tests/codegen_regressions.rs | 51 +++++++++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 7a927453..6bbc3487 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -22,12 +22,12 @@ jobs: wget https://apt.llvm.org/llvm.sh chmod +x llvm.sh sudo ./llvm.sh 21 + echo "LLVM_SYS_211_PREFIX=/usr/lib/llvm-21" >> "$GITHUB_ENV" + echo "LLVM_CONFIG_PATH=/usr/lib/llvm-21/bin/llvm-config" >> "$GITHUB_ENV" + echo "/usr/lib/llvm-21/bin" >> "$GITHUB_PATH" - name: Build - run: | - export LLVM_SYS_211_PREFIX=/usr/lib/llvm-21 - cargo clean - cargo build --verbose + run: cargo build --verbose - name: Run tests run: cargo test --verbose @@ -46,6 +46,7 @@ jobs: run: | echo "LLVM_SYS_211_PREFIX=$(brew --prefix llvm@21)" >> $GITHUB_ENV echo "LLVM_CONFIG_PATH=$(brew --prefix llvm@21)/bin/llvm-config" >> $GITHUB_ENV + echo "$(brew --prefix llvm@21)/bin" >> $GITHUB_PATH - name: Build run: cargo build --verbose diff --git a/tests/codegen_regressions.rs b/tests/codegen_regressions.rs index fa75aa42..3eb48e19 100644 --- a/tests/codegen_regressions.rs +++ b/tests/codegen_regressions.rs @@ -77,6 +77,10 @@ fn bytes_contains(haystack: &[u8], needle: &[u8]) -> bool { .any(|window| window == needle) } +fn run_link_tests_enabled() -> bool { + std::env::var_os("WAVE_RUN_LINK_TESTS").is_some() +} + #[test] fn lvalue_store_updates_deref_index_and_struct_fields() { let dir = temp_case_dir("lvalue-store"); @@ -139,14 +143,35 @@ fun main() -> i32 { "#, ); - let target_dir = dir.join("target"); + let ir_dir = dir.join("ir"); run_wavec([ OsStr::new("build"), src.as_os_str(), - OsStr::new("--run"), - OsStr::new("--target-dir"), - target_dir.as_os_str(), + OsStr::new("--target"), + OsStr::new("x86_64-unknown-none-elf"), + OsStr::new("--freestanding"), + OsStr::new("--emit=ir"), + OsStr::new("--out-dir"), + ir_dir.as_os_str(), ]); + + let ir = fs::read_to_string(ir_dir.join("lvalue_store.ll")).unwrap(); + assert!( + ir.contains("store i32") && ir.contains("getelementptr"), + "lvalue store regression should generate store/GEP operations without requiring a host linker:\n{}", + ir + ); + + if run_link_tests_enabled() { + let target_dir = dir.join("target"); + run_wavec([ + OsStr::new("build"), + src.as_os_str(), + OsStr::new("--run"), + OsStr::new("--target-dir"), + target_dir.as_os_str(), + ]); + } } #[test] @@ -230,6 +255,8 @@ fun main() { let err = run_wavec_expect_failure([ OsStr::new("build"), bad_src.as_os_str(), + OsStr::new("--target"), + OsStr::new("x86_64-unknown-linux-gnu"), OsStr::new("--emit=ir"), OsStr::new("--out-dir"), bad_dir.as_os_str(), @@ -257,6 +284,8 @@ fun main() { run_wavec([ OsStr::new("build"), good_src.as_os_str(), + OsStr::new("--target"), + OsStr::new("x86_64-unknown-linux-gnu"), OsStr::new("--emit=ir"), OsStr::new("--out-dir"), good_dir.as_os_str(), @@ -284,6 +313,8 @@ fun main() { let err = run_wavec_expect_failure([ OsStr::new("build"), unbalanced_src.as_os_str(), + OsStr::new("--target"), + OsStr::new("x86_64-unknown-linux-gnu"), OsStr::new("--emit=ir"), OsStr::new("--out-dir"), unbalanced_dir.as_os_str(), @@ -310,6 +341,8 @@ fun main() { let err = run_wavec_expect_failure([ OsStr::new("build"), missing_noreturn_src.as_os_str(), + OsStr::new("--target"), + OsStr::new("x86_64-unknown-linux-gnu"), OsStr::new("--emit=ir"), OsStr::new("--out-dir"), missing_noreturn_dir.as_os_str(), @@ -337,6 +370,8 @@ fun jump_out(addr: u64) { run_wavec([ OsStr::new("build"), noreturn_src.as_os_str(), + OsStr::new("--target"), + OsStr::new("x86_64-unknown-linux-gnu"), OsStr::new("--emit=ir"), OsStr::new("--out-dir"), noreturn_dir.as_os_str(), @@ -369,6 +404,8 @@ fun main() { run_wavec([ OsStr::new("build"), local_jump_src.as_os_str(), + OsStr::new("--target"), + OsStr::new("x86_64-unknown-linux-gnu"), OsStr::new("--emit=ir"), OsStr::new("--out-dir"), local_jump_dir.as_os_str(), @@ -391,6 +428,8 @@ fun main() { let err = run_wavec_expect_failure([ OsStr::new("build"), conflict_src.as_os_str(), + OsStr::new("--target"), + OsStr::new("x86_64-unknown-linux-gnu"), OsStr::new("--emit=ir"), OsStr::new("--out-dir"), conflict_dir.as_os_str(), @@ -419,6 +458,8 @@ fun main() -> i64 { let err = run_wavec_expect_failure([ OsStr::new("build"), expr_noreturn_src.as_os_str(), + OsStr::new("--target"), + OsStr::new("x86_64-unknown-linux-gnu"), OsStr::new("--emit=ir"), OsStr::new("--out-dir"), expr_noreturn_dir.as_os_str(), @@ -447,6 +488,8 @@ fun main() { let err = run_wavec_expect_failure([ OsStr::new("build"), clobber_operand_conflict_src.as_os_str(), + OsStr::new("--target"), + OsStr::new("x86_64-unknown-linux-gnu"), OsStr::new("--emit=ir"), OsStr::new("--out-dir"), clobber_operand_conflict_dir.as_os_str(),