diff --git a/kernel/src/ipc/sighand.rs b/kernel/src/ipc/sighand.rs index 7d1d645752..a61e476d59 100644 --- a/kernel/src/ipc/sighand.rs +++ b/kernel/src/ipc/sighand.rs @@ -291,6 +291,17 @@ impl SigHand { g.flags.remove(flag); } + pub fn flags_test_and_clear(&self, flag: SignalFlags, clear: bool) -> bool { + let mut g = self.inner_mut(); + if !g.flags.contains(flag) { + return false; + } + if clear { + g.flags.remove(flag); + } + true + } + pub fn stop_signal(&self) -> Signal { self.inner().stop_signal } @@ -450,11 +461,11 @@ fn default_sighandlers() -> Vec { let mut r = vec![Sigaction::default(); MAX_SIG_NUM]; let mut sig_ign = Sigaction::default(); // 收到忽略的信号,重启系统调用 - // Linux 对 SIGCHLD/SIGURG/SIGWINCH 默认忽略;这里显式设置 Ignore + // Linux ignores SIGURG/SIGWINCH by default; SIGCHLD is also ignored by default, + // but the handler must remain SIG_DFL to distinguish default ignore from explicit SIG_IGN. sig_ign.set_action(SigactionType::SaHandler(SaHandlerType::Ignore)); sig_ign.flags_mut().insert(SigFlags::SA_RESTART); - r[Signal::SIGCHLD as usize - 1] = sig_ign; r[Signal::SIGURG as usize - 1] = sig_ign; r[Signal::SIGWINCH as usize - 1] = sig_ign; diff --git a/kernel/src/ipc/syscall/sys_kill.rs b/kernel/src/ipc/syscall/sys_kill.rs index 3eb0b1804f..0be2e7d4de 100644 --- a/kernel/src/ipc/syscall/sys_kill.rs +++ b/kernel/src/ipc/syscall/sys_kill.rs @@ -31,7 +31,7 @@ pub enum PidConverter { } impl PidConverter { - /// ### 为 `wait` 和 `kill` 调用使用 + /// ### For `kill` syscall use pub fn from_id(id: i32) -> Option { if id < -1 { let pgid = ProcessManager::find_vpid(RawPid::from(-id as usize)); @@ -46,33 +46,6 @@ impl PidConverter { Some(PidConverter::Pid(pid)) } } - - /// ### 为 `waitid` 使用:which/upid 已在封装层基本校验 - /// 约定:which: 0=P_ALL, 1=P_PID(id>0), 2=P_PGID(id>=0; 0=当前组) - pub fn from_waitid(which: u32, upid: i32) -> Option { - match which { - 0 => Some(PidConverter::All), - 1 => { - if upid <= 0 { - return None; - } - Self::from_id(upid) - } - 2 => { - if upid < 0 { - return None; - } - // P_PGID: upid==0 -> 当前进程组;>0 -> 指定pgid - // from_id: id< -1 为 pgid,因此这里将正 pgid 映射为负数传入 - if upid == 0 { - Self::from_id(0) - } else { - Self::from_id(-upid) - } - } - _ => None, - } - } } /// Check if the current process has permission to send a signal to the target process. diff --git a/kernel/src/process/exit.rs b/kernel/src/process/exit.rs index 0f7219486d..cde5f0d4f2 100644 --- a/kernel/src/process/exit.rs +++ b/kernel/src/process/exit.rs @@ -1,13 +1,15 @@ -use alloc::sync::{Arc, Weak}; -use core::{intrinsics::likely, sync::atomic::Ordering}; +use alloc::{ + sync::{Arc, Weak}, + vec::Vec, +}; +use core::sync::atomic::Ordering; use system_error::SystemError; use crate::{ arch::ipc::signal::{SigChildCode, Signal}, driver::tty::tty_core::TtyCore, ipc::signal_types::SignalFlags, - ipc::syscall::sys_kill::PidConverter, - process::pid::PidType, + process::{pid::PidType, ptrace, wait::WaitSelector}, syscall::user_access::UserBufferWriter, }; @@ -15,7 +17,7 @@ use super::{ abi::WaitOption, dec_visible_thread_count, resource::{RUsage, RUsageWho}, - ProcessControlBlock, ProcessFlags, ProcessManager, ProcessState, RawPid, + ProcessControlBlock, ProcessFlags, ProcessManager, RawPid, }; /// 将内核中保存的 wstatus(已经按 wait4 语义左移过的编码值) @@ -25,6 +27,21 @@ fn wstatus_to_waitid_status(raw_wstatus: i32) -> i32 { (raw_wstatus >> 8) & 0xff } +#[inline(always)] +fn wstatus_to_waitid_exit_info(raw_wstatus: i32) -> (i32, i32) { + let signal = raw_wstatus & 0x7f; + if signal == 0 { + ( + wstatus_to_waitid_status(raw_wstatus), + SigChildCode::Exited.into(), + ) + } else if (raw_wstatus & 0x80) != 0 { + (signal, SigChildCode::Dumped.into()) + } else { + (signal, SigChildCode::Killed.into()) + } +} + /// mt-exec: de_thread 正在等待旧 leader 完成 PID/TID 交换时,禁止提前回收 fn reap_blocked_by_group_exec(child_pcb: &Arc) -> bool { if !child_pcb.is_thread_group_leader() { @@ -40,6 +57,19 @@ fn reap_blocked_by_group_exec(child_pcb: &Arc) -> bool { .unwrap_or(true) } +fn delay_group_leader(child_pcb: &Arc) -> bool { + if !child_pcb.is_thread_group_leader() { + return false; + } + + child_pcb + .threads_read_irqsave() + .group_tasks_clone() + .iter() + .filter_map(Weak::upgrade) + .any(|task| !task.is_exited() && !task.is_zombie() && !task.is_dead()) +} + /// mt-exec: 非执行线程的组长在退出时,延迟 PID/TGID/PGID/SID 的 unhash /// 以避免 de_thread 交换 TID/raw_pid 时出现 ESRCH。 fn should_defer_unhash_for_group_exec(pcb: &ProcessControlBlock, group_dead: bool) -> bool { @@ -64,7 +94,15 @@ fn should_defer_unhash_for_group_exec(pcb: &ProcessControlBlock, group_dead: boo /// - __WALL: 等待所有子进程,忽略 exit_signal /// - __WCLONE: 只等待"克隆"子进程(exit_signal != SIGCHLD) /// - 默认(无 __WCLONE): 只等待"正常"子进程(exit_signal == SIGCHLD) -fn child_matches_wait_options(child_pcb: &Arc, options: WaitOption) -> bool { +fn child_matches_wait_options( + child_pcb: &Arc, + options: WaitOption, + relation: WaitRelation, +) -> bool { + if relation == WaitRelation::Ptraced { + return true; + } + // __WALL 匹配所有子进程 if options.contains(WaitOption::WALL) { return true; @@ -78,6 +116,97 @@ fn child_matches_wait_options(child_pcb: &Arc, options: Wai is_clone_child == wants_clone } +#[derive(Clone, Copy, PartialEq, Eq)] +enum WaitRelation { + Natural, + Ptraced, +} + +#[derive(Clone, Copy)] +struct WaitRelations(u8); + +impl WaitRelations { + const NATURAL: u8 = 1 << 0; + const PTRACED: u8 = 1 << 1; + + fn empty() -> Self { + Self(0) + } + + fn insert(&mut self, relation: WaitRelation) { + self.0 |= match relation { + WaitRelation::Natural => Self::NATURAL, + WaitRelation::Ptraced => Self::PTRACED, + }; + } + + fn contains(self, relation: WaitRelation) -> bool { + let bit = match relation { + WaitRelation::Natural => Self::NATURAL, + WaitRelation::Ptraced => Self::PTRACED, + }; + self.0 & bit != 0 + } +} + +struct WaitCandidate { + child: Arc, + relations: WaitRelations, +} + +fn push_wait_candidate( + candidates: &mut Vec, + child: Arc, + relation: WaitRelation, +) { + let raw_pid = child.raw_pid(); + if let Some(candidate) = candidates.iter_mut().find(|p| p.child.raw_pid() == raw_pid) { + candidate.relations.insert(relation); + return; + } + + let mut relations = WaitRelations::empty(); + relations.insert(relation); + candidates.push(WaitCandidate { child, relations }); +} + +fn wait_candidate_children(options: WaitOption) -> Vec { + let current = ProcessManager::current_pcb(); + let leader = get_thread_group_leader(¤t); + + let natural_owners = if options.contains(WaitOption::WNOTHREAD) { + // DragonOS stores natural children on the thread-group leader. Linux's + // __WNOTHREAD eligibility is still enforced later through fork_parent. + vec![leader.clone()] + } else { + ProcessManager::thread_group_tasks_snapshot(leader.clone()) + }; + + let mut candidates = Vec::new(); + for waiter in natural_owners { + let parent_ns = waiter.active_pid_ns(); + for pid in waiter.children.read().iter().copied() { + if let Some(pcb) = ProcessManager::find_task_by_pid_ns(pid, &parent_ns) { + push_wait_candidate(&mut candidates, pcb, WaitRelation::Natural); + } + } + } + + let ptrace_waiters = if options.contains(WaitOption::WNOTHREAD) { + vec![current] + } else { + ProcessManager::thread_group_tasks_snapshot(leader) + }; + for waiter in ptrace_waiters { + for pid in ptrace::tracees_of(&waiter) { + if let Some(pcb) = ProcessManager::find(pid) { + push_wait_candidate(&mut candidates, pcb, WaitRelation::Ptraced); + } + } + } + candidates +} + fn fill_wait_rusage(child_pcb: &Arc, kwo: &mut KernelWaitOption) -> RUsage { let usage = child_pcb .get_rusage(RUsageWho::RUsageBoth) @@ -95,7 +224,7 @@ fn account_reaped_child_rusage(child_rusage: &RUsage) { /// 内核wait4时的参数 #[derive(Debug)] pub struct KernelWaitOption<'a> { - pub pid_converter: PidConverter, + pub selector: WaitSelector, pub options: WaitOption, pub ret_status: i32, pub ret_info: Option, @@ -112,9 +241,9 @@ pub struct WaitIdInfo { } impl KernelWaitOption<'_> { - pub fn new(pid_converter: PidConverter, options: WaitOption) -> Self { + pub fn new(selector: WaitSelector, options: WaitOption) -> Self { Self { - pid_converter, + selector, options, ret_status: 0, ret_info: None, @@ -126,17 +255,13 @@ impl KernelWaitOption<'_> { pub fn kernel_wait4( pid: i32, - wstatus_buf: Option>, options: WaitOption, rusage_buf: Option<&mut RUsage>, -) -> Result { - if pid == i32::MIN { - return Err(SystemError::ESRCH); - } - let converter = PidConverter::from_id(pid).ok_or(SystemError::ECHILD)?; +) -> Result<(usize, i32), SystemError> { + let selector = WaitSelector::from_wait4_pid(pid)?; // 构造参数 - let mut kwo = KernelWaitOption::new(converter, options); + let mut kwo = KernelWaitOption::new(selector, options); kwo.options.insert(WaitOption::WEXITED); kwo.ret_rusage = rusage_buf; @@ -144,23 +269,16 @@ pub fn kernel_wait4( // 调用do_wait,执行等待 let r = do_wait(&mut kwo)?; - // 如果有wstatus_buf,则将wstatus写入用户空间 - if let Some(mut wstatus_buf) = wstatus_buf { - // wait4 路径始终返回 wstatus(编码值),不能使用 ret_info - let wstatus = kwo.ret_status; - wstatus_buf.copy_one_to_user(&wstatus, 0)?; - } - - return Ok(r); + Ok((r, kwo.ret_status)) } /// waitid 的内核实现:基于 do_wait,返回 0,必要时写回 siginfo 与 rusage pub fn kernel_waitid( - pid_selector: PidConverter, + pid_selector: WaitSelector, mut infop: Option>, // PosixSigInfo options: WaitOption, rusage_buf: Option<&mut RUsage>, -) -> Result { +) -> Result { // 构造参数 let mut kwo = KernelWaitOption::new(pid_selector, options); kwo.ret_rusage = rusage_buf; @@ -218,7 +336,7 @@ pub fn kernel_waitid( // } } - Ok(0) + Ok(kwo.ret_info.is_some()) } /// 检查子进程是否可以被当前线程等待 @@ -289,61 +407,274 @@ fn wait_visible_pid(child_pcb: &Arc) -> RawPid { .unwrap_or(RawPid(0)) } +enum CandidateDecision { + Ready(Result), + Pending { can_change: bool }, + Ineligible, +} + +struct ScanDecision { + ready: Option>, + has_eligible: bool, + has_future_event: bool, +} + +impl ScanDecision { + fn new() -> Self { + Self { + ready: None, + has_eligible: false, + has_future_event: false, + } + } + + fn observe(&mut self, decision: CandidateDecision) { + match decision { + CandidateDecision::Ready(result) => { + self.ready = Some(result); + } + CandidateDecision::Pending { can_change } => { + self.has_eligible = true; + self.has_future_event |= can_change; + } + CandidateDecision::Ineligible => {} + } + } +} + +fn relation_is_eligible( + child_pcb: &Arc, + relation: WaitRelation, + options: WaitOption, +) -> bool { + match relation { + WaitRelation::Natural => is_eligible_child(child_pcb, options), + WaitRelation::Ptraced => { + let current = ProcessManager::current_pcb(); + ptrace::is_wait_tracee_of(child_pcb, ¤t, options) + } + } +} + +fn report_wait_event( + child_pcb: &Arc, + relation: WaitRelation, + kwo: &mut KernelWaitOption, +) -> CandidateDecision { + if !relation_is_eligible(child_pcb, relation, kwo.options) + || !child_matches_wait_options(child_pcb, kwo.options, relation) + { + return CandidateDecision::Ineligible; + } + + let state = child_pcb.sched_info().state(); + + // Linux wait_consider_task() checks zombie before stopped/continued. + // A zombie leader with live subthreads is still an eligible child even when + // the caller did not request WEXITED; otherwise waitid(WSTOPPED|WNOHANG) + // would incorrectly report ECHILD while the thread group can still change. + let delayed_zombie = child_pcb.is_zombie() + && (delay_group_leader(child_pcb) || reap_blocked_by_group_exec(child_pcb)); + if child_pcb.is_zombie() && !delayed_zombie && kwo.options.contains(WaitOption::WEXITED) { + let Some(raw_wstatus) = state.raw_wstatus().map(|status| status as i32) else { + return CandidateDecision::Pending { can_change: false }; + }; + if !kwo.options.contains(WaitOption::WNOWAIT) && !child_pcb.try_mark_dead_from_zombie() { + return CandidateDecision::Ineligible; + } + + let pid = wait_visible_pid(child_pcb); + let (status, cause) = wstatus_to_waitid_exit_info(raw_wstatus); + let child_rusage = fill_wait_rusage(child_pcb, kwo); + kwo.no_task_error = None; + kwo.ret_status = raw_wstatus; + kwo.ret_info = Some(WaitIdInfo { pid, status, cause }); + + if !kwo.options.contains(WaitOption::WNOWAIT) { + account_reaped_child_rusage(&child_rusage); + unsafe { ProcessManager::release(child_pcb.raw_pid()) }; + } + + return CandidateDecision::Ready(Ok(pid.into())); + } + + let stop_requested = + relation == WaitRelation::Ptraced || kwo.options.contains(WaitOption::WSTOPPED); + if state.is_stopped() + && stop_requested + && child_pcb.sighand().flags_test_and_clear( + SignalFlags::CLD_STOPPED, + !kwo.options.contains(WaitOption::WNOWAIT), + ) + { + let stopsig = child_pcb.sighand().stop_signal() as i32; + let cause = if relation == WaitRelation::Ptraced { + SigChildCode::Trapped.into() + } else { + SigChildCode::Stopped.into() + }; + kwo.no_task_error = None; + kwo.ret_info = Some(WaitIdInfo { + pid: wait_visible_pid(child_pcb), + status: stopsig, + cause, + }); + kwo.ret_status = (stopsig << 8) | 0x7f; + fill_wait_rusage(child_pcb, kwo); + return CandidateDecision::Ready(Ok(wait_visible_pid(child_pcb).into())); + } + + if kwo.options.contains(WaitOption::WCONTINUED) + && child_pcb.sighand().flags_test_and_clear( + SignalFlags::CLD_CONTINUED, + !kwo.options.contains(WaitOption::WNOWAIT), + ) + { + kwo.no_task_error = None; + kwo.ret_info = Some(WaitIdInfo { + pid: wait_visible_pid(child_pcb), + status: Signal::SIGCONT as i32, + cause: SigChildCode::Continued.into(), + }); + kwo.ret_status = 0xffff; + fill_wait_rusage(child_pcb, kwo); + return CandidateDecision::Ready(Ok(wait_visible_pid(child_pcb).into())); + } + + let can_change = if child_pcb.is_zombie() { + delayed_zombie + && (relation == WaitRelation::Natural + || kwo.options.contains(WaitOption::WEXITED) + || kwo.options.contains(WaitOption::WCONTINUED)) + } else { + true + }; + CandidateDecision::Pending { can_change } +} + +fn report_candidate_relation( + child_pcb: &Arc, + relation: WaitRelation, + kwo: &mut KernelWaitOption, + scan: &mut ScanDecision, +) -> bool { + let decision = report_wait_event(child_pcb, relation, kwo); + let ready = matches!(decision, CandidateDecision::Ready(_)); + scan.observe(decision); + ready +} + +fn report_candidate( + candidate: &WaitCandidate, + kwo: &mut KernelWaitOption, + scan: &mut ScanDecision, +) -> bool { + // A tracee that is also a natural child should be observed through the + // ptrace relation first, matching Linux's wait_consider_task() switch to + // ptrace semantics for children traced by the caller's thread group. + if candidate.relations.contains(WaitRelation::Ptraced) + && report_candidate_relation(&candidate.child, WaitRelation::Ptraced, kwo, scan) + { + return true; + } + if candidate.relations.contains(WaitRelation::Natural) + && report_candidate_relation(&candidate.child, WaitRelation::Natural, kwo, scan) + { + return true; + } + false +} + +fn scan_wait_candidates( + kwo: &mut KernelWaitOption, + candidates: &[WaitCandidate], + mut matches_selector: F, +) -> ScanDecision +where + F: FnMut(&Arc) -> bool, +{ + let mut scan = ScanDecision::new(); + for candidate in candidates { + if !matches_selector(&candidate.child) { + continue; + } + if report_candidate(candidate, kwo, &mut scan) { + break; + } + } + scan +} + +fn scan_result_or_wait(scan: ScanDecision) -> Result, SystemError> { + if let Some(result) = scan.ready { + return result.map(Some); + } + if !scan.has_eligible || !scan.has_future_event { + return Err(SystemError::ECHILD); + } + Ok(None) +} + /// 参考 https://code.dragonos.org.cn/xref/linux-6.1.9/kernel/exit.c#1573 fn do_wait(kwo: &mut KernelWaitOption) -> Result { - let mut tmp_child_pcb: Option> = None; // todo: 在signal struct里面增加等待队列,并在这里初始化子进程退出的回调,使得子进程退出时,能唤醒当前进程。 kwo.no_task_error = Some(SystemError::ECHILD); - let retval = match kwo.pid_converter.clone() { - PidConverter::Pid(pid) => { + let retval = match kwo.selector.clone() { + WaitSelector::Pid(pid) => { if pid.pid_vnr().data() == ProcessManager::current_pcb().raw_tgid().data() { return Err(SystemError::ECHILD); } let current = ProcessManager::current_pcb(); - // 获取用于等待的 PCB(线程组 leader 或当前线程,取决于 WNOTHREAD) - let parent = get_thread_group_leader(¤t); - let check_child = |kwo: &mut KernelWaitOption| -> Option> { - let child_pcb = match pid.pid_task(PidType::PID) { - Some(child_pcb) => child_pcb, - None => return Some(Err(SystemError::ECHILD)), - }; - // 检查子进程是否可以被当前线程等待 - // 根据 Linux 语义: - // - 默认情况下,线程组中的任何线程可以等待同一线程组中任何线程 fork 的子进程 - // - 如果指定了 __WNOTHREAD,则只能等待当前线程自己创建的子进程 - if !is_eligible_child(&child_pcb, kwo.options) { - return Some(Err(SystemError::ECHILD)); + let wait_queue_owner = get_thread_group_leader(¤t); + let check_child = |kwo: &mut KernelWaitOption| -> Result, SystemError> { + let natural_child = pid.thread_group_leader_task(); + let ptrace_child = pid.pid_task(PidType::PID); + + let mut candidates = Vec::new(); + if let Some(child_pcb) = natural_child { + push_wait_candidate(&mut candidates, child_pcb, WaitRelation::Natural); + } + if let Some(child_pcb) = ptrace_child { + if ptrace::is_wait_tracee_of( + &child_pcb, + &ProcessManager::current_pcb(), + kwo.options, + ) { + push_wait_candidate(&mut candidates, child_pcb, WaitRelation::Ptraced); + } } - // 检查子进程是否匹配等待选项(__WALL/__WCLONE) - if !child_matches_wait_options(&child_pcb, kwo.options) { - return Some(Err(SystemError::ECHILD)); + let mut scan = ScanDecision::new(); + for candidate in &candidates { + if report_candidate(candidate, kwo, &mut scan) { + break; + } } - do_waitpid(child_pcb, kwo) + scan_result_or_wait(scan) }; - // 等待指定子进程:睡眠在父进程自己的 wait_queue 上 - // 子进程退出时会发送信号并唤醒父进程的 wait_queue loop { - // Fast path: check without sleeping - if let Some(r) = check_child(kwo) { - break r; + if let Some(pid) = check_child(kwo)? { + break Ok(pid); } if kwo.options.contains(WaitOption::WNOHANG) { break Ok(0); } - let mut ready: Option> = None; - let wait_res = parent.wait_queue.wait_event_interruptible( - || { - if let Some(r) = check_child(kwo) { - ready = Some(r); + let mut ready: Option, SystemError>> = None; + let wait_res = wait_queue_owner.wait_queue.wait_event_interruptible( + || match check_child(kwo) { + Ok(Some(pid)) => { + ready = Some(Ok(Some(pid))); + true + } + Ok(None) => false, + Err(err) => { + ready = Some(Err(err)); true - } else { - false } }, None::, @@ -352,7 +683,7 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { match wait_res { Ok(()) => { if let Some(r) = ready.take() { - break r; + break r.map(|pid| pid.unwrap_or(0)); } if ProcessManager::current_pcb().has_pending_signal_fast() { break Err(SystemError::ERESTARTSYS); @@ -364,250 +695,41 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { } } } - PidConverter::All => { - // 等待任意子进程:使用线程组 leader 的 wait_queue 和 children 列表 - // 这样线程组中的任何线程都可以等待同一线程组中任何线程 fork 的子进程 + WaitSelector::Any => { let current = ProcessManager::current_pcb(); - let parent = get_thread_group_leader(¤t); + let wait_queue_owner = get_thread_group_leader(¤t); loop { if kwo.options.contains(WaitOption::WNOHANG) { - let rd_children = parent.children.read(); - if rd_children.is_empty() { - break Err(SystemError::ECHILD); - } - let mut scan_result: Option> = None; - let mut has_waitable_child = false; - let mut all_waitable_children_exited = true; - let mut pid_to_release: Option = None; - - for pid in rd_children.iter() { - let pcb = match ProcessManager::find_task_by_vpid(*pid) { - Some(p) => p, - None => continue, - }; - if !is_eligible_child(&pcb, kwo.options) { - continue; - } - if !child_matches_wait_options(&pcb, kwo.options) { - continue; - } - has_waitable_child = true; - - let state = pcb.sched_info().state(); - if !pcb.is_zombie() { - all_waitable_children_exited = false; - } - - if state.is_stopped() - && kwo.options.contains(WaitOption::WSTOPPED) - && pcb.sighand().flags_contains(SignalFlags::CLD_STOPPED) - { - let stopsig = pcb.sighand().stop_signal() as i32; - kwo.no_task_error = None; - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&pcb), - status: stopsig, - cause: SigChildCode::Stopped.into(), - }); - kwo.ret_status = (stopsig << 8) | 0x7f; - fill_wait_rusage(&pcb, kwo); - if !kwo.options.contains(WaitOption::WNOWAIT) { - pcb.sighand().flags_remove(SignalFlags::CLD_STOPPED); - } - scan_result = Some(Ok(wait_visible_pid(&pcb).into())); - - break; - } else if kwo.options.contains(WaitOption::WCONTINUED) - && pcb.sighand().flags_contains(SignalFlags::CLD_CONTINUED) - { - kwo.no_task_error = None; - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&pcb), - status: Signal::SIGCONT as i32, - cause: SigChildCode::Continued.into(), - }); - kwo.ret_status = 0xffff; - fill_wait_rusage(&pcb, kwo); - if !kwo.options.contains(WaitOption::WNOWAIT) { - pcb.sighand().flags_remove(SignalFlags::CLD_CONTINUED); - } - scan_result = Some(Ok(wait_visible_pid(&pcb).into())); - - break; - } else if pcb.is_zombie() && kwo.options.contains(WaitOption::WEXITED) { - if reap_blocked_by_group_exec(&pcb) { - continue; - } - let Some(code) = state.exit_code() else { - continue; - }; - let raw = code as i32; - kwo.ret_status = raw; - let status8 = wstatus_to_waitid_status(raw); - kwo.no_task_error = None; - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&pcb), - status: status8, - cause: SigChildCode::Exited.into(), - }); - tmp_child_pcb = Some(pcb.clone()); - let child_rusage = fill_wait_rusage(&pcb, kwo); - if !kwo.options.contains(WaitOption::WNOWAIT) { - if !pcb.try_mark_dead_from_zombie() { - continue; - } - account_reaped_child_rusage(&child_rusage); - pid_to_release = Some(pcb.raw_pid()); - } - scan_result = Some(Ok(wait_visible_pid(&pcb).into())); - - break; - } - } - drop(rd_children); - if let Some(pid) = pid_to_release { - unsafe { ProcessManager::release(pid) }; - } - if let Some(r) = scan_result.take() { - break r; - } - if !has_waitable_child { - break Err(SystemError::ECHILD); - } - if all_waitable_children_exited && !kwo.options.contains(WaitOption::WEXITED) { - break Err(SystemError::ECHILD); - } - break Ok(0); + let candidates = wait_candidate_children(kwo.options); + let scan = scan_wait_candidates(kwo, &candidates, |_| true); + break scan_result_or_wait(scan).map(|pid| pid.unwrap_or(0)); } - let mut scan_result: Option> = None; - let mut echild = false; + let mut ready: Option, SystemError>> = None; - let wait_res = parent.wait_queue.wait_event_interruptible( + let wait_res = wait_queue_owner.wait_queue.wait_event_interruptible( || { - let rd_childen = parent.children.read(); - if rd_childen.is_empty() { - echild = true; - return true; - } - let mut has_waitable_child = false; - let mut all_waitable_children_exited = true; - let mut pid_to_release: Option = None; - - for pid in rd_childen.iter() { - let pcb = match ProcessManager::find_task_by_vpid(*pid) { - Some(p) => p, - None => continue, - }; - - if !is_eligible_child(&pcb, kwo.options) { - continue; - } - if !child_matches_wait_options(&pcb, kwo.options) { - continue; + let candidates = wait_candidate_children(kwo.options); + let scan = scan_wait_candidates(kwo, &candidates, |_| true); + match scan_result_or_wait(scan) { + Ok(Some(pid)) => { + ready = Some(Ok(Some(pid))); + true } - has_waitable_child = true; - - let state = pcb.sched_info().state(); - if !pcb.is_zombie() { - all_waitable_children_exited = false; - } - - if state.is_stopped() - && kwo.options.contains(WaitOption::WSTOPPED) - && pcb.sighand().flags_contains(SignalFlags::CLD_STOPPED) - { - let stopsig = pcb.sighand().stop_signal() as i32; - kwo.no_task_error = None; - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&pcb), - status: stopsig, - cause: SigChildCode::Stopped.into(), - }); - kwo.ret_status = (stopsig << 8) | 0x7f; - fill_wait_rusage(&pcb, kwo); - if !kwo.options.contains(WaitOption::WNOWAIT) { - pcb.sighand().flags_remove(SignalFlags::CLD_STOPPED); - } - scan_result = Some(Ok(wait_visible_pid(&pcb).into())); - - break; - } else if kwo.options.contains(WaitOption::WCONTINUED) - && pcb.sighand().flags_contains(SignalFlags::CLD_CONTINUED) - { - kwo.no_task_error = None; - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&pcb), - status: Signal::SIGCONT as i32, - cause: SigChildCode::Continued.into(), - }); - kwo.ret_status = 0xffff; - fill_wait_rusage(&pcb, kwo); - if !kwo.options.contains(WaitOption::WNOWAIT) { - pcb.sighand().flags_remove(SignalFlags::CLD_CONTINUED); - } - scan_result = Some(Ok(wait_visible_pid(&pcb).into())); - - break; - } else if pcb.is_zombie() && kwo.options.contains(WaitOption::WEXITED) { - if reap_blocked_by_group_exec(&pcb) { - continue; - } - let Some(code) = state.exit_code() else { - continue; - }; - let raw = code as i32; - kwo.ret_status = raw; - let status8 = wstatus_to_waitid_status(raw); - kwo.no_task_error = None; - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&pcb), - status: status8, - cause: SigChildCode::Exited.into(), - }); - tmp_child_pcb = Some(pcb.clone()); - let child_rusage = fill_wait_rusage(&pcb, kwo); - if !kwo.options.contains(WaitOption::WNOWAIT) { - if !pcb.try_mark_dead_from_zombie() { - continue; - } - account_reaped_child_rusage(&child_rusage); - pid_to_release = Some(pcb.raw_pid()); - } - scan_result = Some(Ok(wait_visible_pid(&pcb).into())); - - break; + Ok(None) => false, + Err(err) => { + ready = Some(Err(err)); + true } } - drop(rd_childen); - if let Some(pid) = pid_to_release { - unsafe { ProcessManager::release(pid) }; - } - if scan_result.is_some() { - return true; - } - if !has_waitable_child { - echild = true; - return true; - } - if all_waitable_children_exited - && !kwo.options.contains(WaitOption::WEXITED) - { - echild = true; - return true; - } - false }, None::, ); match wait_res { Ok(()) => { - if let Some(r) = scan_result.take() { - break r; - } - if echild { - break Err(SystemError::ECHILD); + if let Some(r) = ready.take() { + break r.map(|pid| pid.unwrap_or(0)); } if ProcessManager::current_pcb().has_pending_signal_fast() { break Err(SystemError::ERESTARTSYS); @@ -618,274 +740,52 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { } } } - PidConverter::Pgid(Some(pgid)) => { - // 修复:根据 Linux waitpid 语义,waitpid(-pgid, ...) 只等待调用者的 - // **子进程**中属于指定进程组的进程,而不是进程组中的所有进程。 - // 因此,这里遍历线程组 leader 的 children 列表,检查每个子进程是否属于目标进程组。 + WaitSelector::Pgid(Some(pgid)) => { let current = ProcessManager::current_pcb(); - let parent = get_thread_group_leader(¤t); + let wait_queue_owner = get_thread_group_leader(¤t); loop { if kwo.options.contains(WaitOption::WNOHANG) { - let rd_children = parent.children.read(); - if rd_children.is_empty() { - break Err(SystemError::ECHILD); - } - - let mut has_matching_child = false; - let mut scan_result: Option> = None; - let mut all_matching_children_exited = true; - let mut pid_to_release: Option = None; - for child_pid in rd_children.iter() { - let pcb = match ProcessManager::find_task_by_vpid(*child_pid) { - Some(p) => p, - None => continue, - }; - - if !is_eligible_child(&pcb, kwo.options) { - continue; - } - + let candidates = wait_candidate_children(kwo.options); + let scan = scan_wait_candidates(kwo, &candidates, |pcb| { let child_pgrp = pcb.task_pgrp(); - let in_target_pgrp = match &child_pgrp { + match &child_pgrp { Some(cp) => Arc::ptr_eq(cp, &pgid), None => false, - }; - if !in_target_pgrp { - continue; - } - - if !child_matches_wait_options(&pcb, kwo.options) { - continue; - } - has_matching_child = true; - - let state = pcb.sched_info().state(); - if !pcb.is_zombie() { - all_matching_children_exited = false; } - - if state.is_stopped() - && kwo.options.contains(WaitOption::WSTOPPED) - && pcb.sighand().flags_contains(SignalFlags::CLD_STOPPED) - { - let stopsig = pcb.sighand().stop_signal() as i32; - kwo.no_task_error = None; - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&pcb), - status: stopsig, - cause: SigChildCode::Stopped.into(), - }); - kwo.ret_status = (stopsig << 8) | 0x7f; - fill_wait_rusage(&pcb, kwo); - if !kwo.options.contains(WaitOption::WNOWAIT) { - pcb.sighand().flags_remove(SignalFlags::CLD_STOPPED); - } - - scan_result = Some(Ok(wait_visible_pid(&pcb).into())); - break; - } else if kwo.options.contains(WaitOption::WCONTINUED) - && pcb.sighand().flags_contains(SignalFlags::CLD_CONTINUED) - { - kwo.no_task_error = None; - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&pcb), - status: Signal::SIGCONT as i32, - cause: SigChildCode::Continued.into(), - }); - kwo.ret_status = 0xffff; - fill_wait_rusage(&pcb, kwo); - if !kwo.options.contains(WaitOption::WNOWAIT) { - pcb.sighand().flags_remove(SignalFlags::CLD_CONTINUED); - } - - scan_result = Some(Ok(wait_visible_pid(&pcb).into())); - break; - } else if pcb.is_zombie() && kwo.options.contains(WaitOption::WEXITED) { - if reap_blocked_by_group_exec(&pcb) { - continue; - } - let Some(code) = state.exit_code() else { - continue; - }; - let raw = code as i32; - kwo.ret_status = raw; - let status8 = wstatus_to_waitid_status(raw); - kwo.no_task_error = None; - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&pcb), - status: status8, - cause: SigChildCode::Exited.into(), - }); - tmp_child_pcb = Some(pcb.clone()); - let child_rusage = fill_wait_rusage(&pcb, kwo); - if !kwo.options.contains(WaitOption::WNOWAIT) { - if !pcb.try_mark_dead_from_zombie() { - continue; - } - account_reaped_child_rusage(&child_rusage); - pid_to_release = Some(pcb.raw_pid()); - } - - scan_result = Some(Ok(wait_visible_pid(&pcb).into())); - break; - } - } - drop(rd_children); - if let Some(pid) = pid_to_release { - unsafe { ProcessManager::release(pid) }; - } - - if let Some(r) = scan_result.take() { - break r; - } - if !has_matching_child { - break Err(SystemError::ECHILD); - } - if all_matching_children_exited && !kwo.options.contains(WaitOption::WEXITED) { - break Err(SystemError::ECHILD); - } - break Ok(0); + }); + break scan_result_or_wait(scan).map(|pid| pid.unwrap_or(0)); } - let mut scan_result: Option> = None; - let mut echild = false; - let wait_res = parent.wait_queue.wait_event_interruptible( + let mut ready: Option, SystemError>> = None; + let wait_res = wait_queue_owner.wait_queue.wait_event_interruptible( || { - let rd_children = parent.children.read(); - if rd_children.is_empty() { - echild = true; - return true; - } - - let mut has_matching_child = false; - let mut all_matching_children_exited = true; - let mut pid_to_release: Option = None; - - for child_pid in rd_children.iter() { - let pcb = match ProcessManager::find_task_by_vpid(*child_pid) { - Some(p) => p, - None => continue, - }; - - if !is_eligible_child(&pcb, kwo.options) { - continue; - } - + let candidates = wait_candidate_children(kwo.options); + let scan = scan_wait_candidates(kwo, &candidates, |pcb| { let child_pgrp = pcb.task_pgrp(); - let in_target_pgrp = match &child_pgrp { + match &child_pgrp { Some(cp) => Arc::ptr_eq(cp, &pgid), None => false, - }; - if !in_target_pgrp { - continue; } - - if !child_matches_wait_options(&pcb, kwo.options) { - continue; + }); + match scan_result_or_wait(scan) { + Ok(Some(pid)) => { + ready = Some(Ok(Some(pid))); + true } - has_matching_child = true; - - let state = pcb.sched_info().state(); - - if !pcb.is_zombie() { - all_matching_children_exited = false; + Ok(None) => false, + Err(err) => { + ready = Some(Err(err)); + true } - - if state.is_stopped() - && kwo.options.contains(WaitOption::WSTOPPED) - && pcb.sighand().flags_contains(SignalFlags::CLD_STOPPED) - { - let stopsig = pcb.sighand().stop_signal() as i32; - kwo.no_task_error = None; - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&pcb), - status: stopsig, - cause: SigChildCode::Stopped.into(), - }); - kwo.ret_status = (stopsig << 8) | 0x7f; - fill_wait_rusage(&pcb, kwo); - if !kwo.options.contains(WaitOption::WNOWAIT) { - pcb.sighand().flags_remove(SignalFlags::CLD_STOPPED); - } - scan_result = Some(Ok(wait_visible_pid(&pcb).into())); - - break; - } else if kwo.options.contains(WaitOption::WCONTINUED) - && pcb.sighand().flags_contains(SignalFlags::CLD_CONTINUED) - { - kwo.no_task_error = None; - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&pcb), - status: Signal::SIGCONT as i32, - cause: SigChildCode::Continued.into(), - }); - kwo.ret_status = 0xffff; - fill_wait_rusage(&pcb, kwo); - if !kwo.options.contains(WaitOption::WNOWAIT) { - pcb.sighand().flags_remove(SignalFlags::CLD_CONTINUED); - } - scan_result = Some(Ok(wait_visible_pid(&pcb).into())); - - break; - } else if pcb.is_zombie() && kwo.options.contains(WaitOption::WEXITED) { - if reap_blocked_by_group_exec(&pcb) { - continue; - } - let Some(code) = state.exit_code() else { - continue; - }; - let raw = code as i32; - kwo.ret_status = raw; - let status8 = wstatus_to_waitid_status(raw); - kwo.no_task_error = None; - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&pcb), - status: status8, - cause: SigChildCode::Exited.into(), - }); - tmp_child_pcb = Some(pcb.clone()); - let child_rusage = fill_wait_rusage(&pcb, kwo); - if !kwo.options.contains(WaitOption::WNOWAIT) { - if !pcb.try_mark_dead_from_zombie() { - continue; - } - account_reaped_child_rusage(&child_rusage); - pid_to_release = Some(pcb.raw_pid()); - } - scan_result = Some(Ok(wait_visible_pid(&pcb).into())); - - break; - } - } - drop(rd_children); - if let Some(pid) = pid_to_release { - unsafe { ProcessManager::release(pid) }; - } - if scan_result.is_some() { - return true; - } - if !has_matching_child { - echild = true; - return true; } - if all_matching_children_exited - && !kwo.options.contains(WaitOption::WEXITED) - { - echild = true; - return true; - } - false }, None::, ); match wait_res { Ok(()) => { - if let Some(r) = scan_result.take() { - break r; - } - if echild { - break Err(SystemError::ECHILD); + if let Some(r) = ready.take() { + break r.map(|pid| pid.unwrap_or(0)); } if ProcessManager::current_pcb().has_pending_signal_fast() { break Err(SystemError::ERESTARTSYS); @@ -897,145 +797,12 @@ fn do_wait(kwo: &mut KernelWaitOption) -> Result { } } - PidConverter::Pgid(None) => { - // 进程组不存在,直接返回 ECHILD - // 这种情况发生在:进程组中的所有进程都已退出并被回收 - Err(SystemError::ECHILD) - } + WaitSelector::Pgid(None) => Err(SystemError::ECHILD), }; - drop(tmp_child_pcb); - - // log::debug!( - // "do_wait, kwo.pid: {}, retval = {:?}, kwo: {:?}", - // kwo.pid, - // retval, - // kwo.no_task_error - // ); - return retval; } -fn do_waitpid( - child_pcb: Arc, - kwo: &mut KernelWaitOption, -) -> Option> { - // 优先处理继续事件:与 Linux 语义一致,只要标志存在即可报告 - if kwo.options.contains(WaitOption::WCONTINUED) - && child_pcb - .sighand() - .flags_contains(SignalFlags::CLD_CONTINUED) - { - // log::debug!( - // "do_waitpid: report CLD_CONTINUED for pid={:?}", - // child_pcb.raw_pid() - // ); - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&child_pcb), - status: Signal::SIGCONT as i32, - cause: SigChildCode::Continued.into(), - }); - - // 设置 ret_status 供 wait4 使用 - // Linux wait(2) 语义:continued 进程的 wstatus = 0xffff - kwo.ret_status = 0xffff; - fill_wait_rusage(&child_pcb, kwo); - - if !kwo.options.contains(WaitOption::WNOWAIT) { - child_pcb.sighand().flags_remove(SignalFlags::CLD_CONTINUED); - } - return Some(Ok(wait_visible_pid(&child_pcb).into())); - } - - let state = child_pcb.sched_info().state(); - // 获取退出码 - match state { - ProcessState::Runnable => { - if kwo.options.contains(WaitOption::WNOHANG) { - return Some(Ok(0)); - } - } - ProcessState::Blocked(_) => { - // 对于被阻塞的子进程(如正在sleep),waitpid应该继续等待 - // 而不是立即返回0。只有当子进程真正退出时才应该返回。 - return None; - } - ProcessState::Stopped => { - // 非 ptrace 停止:报告 stopsig=SIGSTOP - let stopsig = child_pcb.sighand().stop_signal() as i32; - // 由于目前不支持ptrace,因此这个值为false - let ptrace = false; - - if (!ptrace) && (!kwo.options.contains(WaitOption::WSTOPPED)) { - // 调用方未请求 WSTOPPED,按照 Linux 语义应当继续等待其它事件 - // 而不是返回 0 并写回空的 siginfo。 - return None; - } - - // 填充 waitid 信息 - // log::debug!("do_waitpid: report CLD_STOPPED for pid={:?}", child_pcb.raw_pid()); - kwo.ret_info = Some(WaitIdInfo { - pid: wait_visible_pid(&child_pcb), - status: stopsig, - cause: SigChildCode::Stopped.into(), - }); - - // 设置 ret_status 供 wait4 使用 - // Linux wait(2) 语义:stopped 进程的 wstatus = (stopsig << 8) | 0x7f - kwo.ret_status = (stopsig << 8) | 0x7f; - fill_wait_rusage(&child_pcb, kwo); - - if !kwo.options.contains(WaitOption::WNOWAIT) { - // 消费一次停止事件标志(若存在) - child_pcb.sighand().flags_remove(SignalFlags::CLD_STOPPED); - } - - return Some(Ok(wait_visible_pid(&child_pcb).into())); - } - ProcessState::Exited(status) => { - if !child_pcb.is_zombie() { - return None; - } - if reap_blocked_by_group_exec(&child_pcb) { - return None; - } - let pid = wait_visible_pid(&child_pcb); - // Linux 语义:若等待集合未包含 WEXITED,则不报告退出事件 - if likely(!kwo.options.contains(WaitOption::WEXITED)) { - return None; - } - - // 始终填充 waitid 信息 - // log::debug!("do_waitpid: report CLD_EXITED for pid={:?}", child_pcb.raw_pid()); - kwo.ret_info = Some(WaitIdInfo { - pid, - status: wstatus_to_waitid_status(status as i32), - cause: SigChildCode::Exited.into(), - }); - - kwo.ret_status = status as i32; - let child_rusage = fill_wait_rusage(&child_pcb, kwo); - - // 若指定 WNOWAIT,则只观测不回收 - if !kwo.options.contains(WaitOption::WNOWAIT) { - if !child_pcb.try_mark_dead_from_zombie() { - drop(child_pcb); - return Some(Err(SystemError::ECHILD)); - } - account_reaped_child_rusage(&child_rusage); - unsafe { ProcessManager::release(child_pcb.raw_pid()) }; - drop(child_pcb); - } else { - // 观测模式下不回收,保持任务可再次被 wait 系列看到 - drop(child_pcb); - } - return Some(Ok(pid.into())); - } - }; - - return None; -} - impl ProcessControlBlock { fn dec_visible_thread_count_if_accounted(&self) { if self.take_visible_thread_accounted() { diff --git a/kernel/src/process/mod.rs b/kernel/src/process/mod.rs index 9b2d9a9e3f..11405ca4a7 100644 --- a/kernel/src/process/mod.rs +++ b/kernel/src/process/mod.rs @@ -27,7 +27,7 @@ use system_error::SystemError; use crate::{ arch::{ cpu::current_cpu_id, - ipc::signal::{AtomicSignal, SigSet, Signal}, + ipc::signal::{AtomicSignal, SigFlags, SigSet, Signal}, process::ArchPCBInfo, CurrentIrqArch, SigStackArch, }, @@ -96,6 +96,7 @@ pub mod pid; pub mod posix_timer; pub mod preempt; pub mod process_group; +pub mod ptrace; pub mod resource; pub mod rseq; pub mod seccomp; @@ -106,12 +107,14 @@ pub mod stdio; pub mod syscall; pub mod timer; pub mod utils; +pub mod wait; pub use cputime::ProcessCpuTime; /// 系统中所有进程的pcb static ALL_PROCESS: SpinLock>>> = SpinLock::new(None); +static PTRACE_RELATION_LOCK: SpinLock<()> = SpinLock::new(()); static NR_VISIBLE_THREADS: AtomicUsize = AtomicUsize::new(0); static TOTAL_FORKS: AtomicU64 = AtomicU64::new(0); static TOTAL_CONTEXT_SWITCHES: AtomicU64 = AtomicU64::new(0); @@ -855,6 +858,7 @@ impl ProcessManager { .adopt_childen() .unwrap_or_else(|e| panic!("adopte_childen failed: error: {e:?}")) }; + ProcessManager::exit_ptrace(current); // 在通知父进程之前,先标记为 Zombie,保证 wait 可见 current.set_exit_state_zombie(); let r = current.parent_pcb.read_irqsave().upgrade(); @@ -866,7 +870,24 @@ impl ProcessManager { // 检查子进程的 exit_signal,只有在正信号编号时才发送信号。 // Linux 语义中 exit_signal=0 表示不发信号但仍可 wait,-1 表示非 leader 线程。 let exit_signal = current.exit_signal.load(Ordering::SeqCst); - if exit_signal > 0 { + let sigchld_disposition = parent_pcb.sighand().handler(Signal::SIGCHLD); + let sigchld_ignored = sigchld_disposition + .as_ref() + .map(|sa| sa.is_ignore()) + .unwrap_or(false); + let sigchld_no_cldwait = sigchld_disposition + .as_ref() + .map(|sa| sa.flags().contains(SigFlags::SA_NOCLDWAIT)) + .unwrap_or(false); + let autoreap = !current.is_ptraced() + && exit_signal == Signal::SIGCHLD as i32 + && (sigchld_ignored || sigchld_no_cldwait); + let is_kthread = current.is_kthread(); + if autoreap && current.try_mark_dead_from_zombie() { + unsafe { ProcessManager::release(current.raw_pid()) }; + } + + if exit_signal > 0 && !(autoreap && sigchld_ignored) { let r = crate::ipc::kill::send_signal_to_pcb( parent_pcb.clone(), Signal::from(exit_signal), @@ -889,7 +910,7 @@ impl ProcessManager { .wakeup_all(Some(ProcessState::Blocked(true))); // kthread 退出时显式唤醒 kthreadd,使其回收 zombie - if current.is_kthread() { + if is_kthread { let _ = ProcessManager::wakeup(&parent_pcb); } @@ -907,6 +928,7 @@ impl ProcessManager { .wakeup_all(Some(ProcessState::Blocked(true))); } } + // todo: 这里还需要根据线程组的信息,决定信号的发送 } } @@ -1196,6 +1218,9 @@ impl ProcessManager { pub(super) unsafe fn release(pid: RawPid) { let pcb = ProcessManager::find(pid); if let Some(ref pcb) = pcb { + ProcessManager::exit_ptrace(pcb); + ProcessManager::ptrace_unlink_tracee(pcb); + let parent_child_vpid = pcb.real_parent_pcb().and_then(|parent| { let parent = ProcessManager::thread_group_leader_of(&parent); let parent_ns = parent.active_pid_ns(); @@ -1218,6 +1243,14 @@ impl ProcessManager { } } + pub fn ptrace_unlink_tracee(tracee: &Arc) { + ptrace::unlink_tracee(tracee) + } + + pub fn exit_ptrace(tracer: &Arc) { + ptrace::exit_ptrace(tracer) + } + /// 上下文切换完成后的钩子函数 unsafe fn switch_finish_hook() { // debug!("switch_finish_hook"); @@ -1350,7 +1383,10 @@ pub enum ProcessState { Blocked(bool), /// 进程被信号终止 Stopped, - /// 进程已经退出,usize表示进程的退出码 + /// The process has exited; usize holds the raw wait status used by Linux wait(2) family. + /// + /// Normal exit: `(exit_code & 0xff) << 8`; signal termination: signal number in the low 7 bits. + /// wait4/waitpid return this value as-is; only waitid `si_status` needs decoding from it. Exited(usize), } @@ -1412,15 +1448,25 @@ impl ProcessState { matches!(self, ProcessState::Stopped) } - /// Returns exit code if the process state is [`Exited`]. + /// Returns raw wait status if the process state is [`Exited`]. #[inline(always)] - pub fn exit_code(&self) -> Option { + pub fn raw_wstatus(&self) -> Option { match self { ProcessState::Exited(code) => Some(*code), _ => None, } } + /// Returns raw wait status if the process state is [`Exited`]. + /// + /// Kept for existing call sites; new wait code should prefer + /// [`ProcessState::raw_wstatus`] to avoid confusing raw wait status with + /// the user-visible exit code. + #[inline(always)] + pub fn exit_code(&self) -> Option { + self.raw_wstatus() + } + #[inline] pub fn to_u32(self) -> u32 { match self { @@ -1492,6 +1538,8 @@ bitflags! { const DEFER_UNHASH = 1 << 14; /// PID links and visible-thread accounting have already been released. const PID_UNHASHED = 1 << 15; + /// Task is currently traced by another task. + const PTRACED = 1 << 16; } } @@ -1620,6 +1668,10 @@ pub struct ProcessControlBlock { /// 子进程链表 children: RwLock>, + /// Tasks currently traced by this process. Entries are global raw pids. + ptraced: RwLock>, + /// Current tracer if this process is ptraced. + ptracer_pcb: RwLock>, /// 等待队列 wait_queue: WaitQueue, @@ -1791,6 +1843,8 @@ impl ProcessControlBlock { real_parent_pcb: RwLock::new(ppcb.clone()), fork_parent_pcb: RwLock::new(ppcb), children: RwLock::new(Vec::new()), + ptraced: RwLock::new(Vec::new()), + ptracer_pcb: RwLock::new(Weak::new()), wait_queue: WaitQueue::default(), cputime_wait_queue: WaitQueue::default(), thread: RwLock::new(ThreadInfo::new()), @@ -2265,6 +2319,21 @@ impl ProcessControlBlock { return self.real_parent_pcb.read_irqsave().upgrade(); } + pub fn ptracer_pcb(&self) -> Option> { + ptrace::ptracer_of(&self.self_ref.upgrade()?) + } + + pub fn is_ptraced(&self) -> bool { + ptrace::is_ptraced(self) + } + + pub fn ptraced_pids(&self) -> Vec { + let Some(this) = self.self_ref.upgrade() else { + return Vec::new(); + }; + ptrace::tracees_of(&this) + } + pub fn fork_parent_pcb(&self) -> Option> { self.fork_parent_pcb.read_irqsave().upgrade() } diff --git a/kernel/src/process/pid.rs b/kernel/src/process/pid.rs index 81897f639d..77aa104270 100644 --- a/kernel/src/process/pid.rs +++ b/kernel/src/process/pid.rs @@ -136,6 +136,11 @@ impl Pid { } } + pub fn thread_group_leader_task(&self) -> Option> { + self.tasks_iter(PidType::TGID) + .find(|task| task.is_thread_group_leader()) + } + pub fn pid_vnr(&self) -> RawPid { let active_pid_ns = ProcessManager::current_pcb().active_pid_ns(); self.pid_nr_ns(&active_pid_ns) diff --git a/kernel/src/process/ptrace.rs b/kernel/src/process/ptrace.rs new file mode 100644 index 0000000000..4cf10d662f --- /dev/null +++ b/kernel/src/process/ptrace.rs @@ -0,0 +1,161 @@ +use alloc::{ + sync::{Arc, Weak}, + vec::Vec, +}; +use system_error::SystemError; + +use super::{ + abi::WaitOption, ProcessControlBlock, ProcessFlags, ProcessManager, RawPid, + PTRACE_RELATION_LOCK, +}; + +fn traceme_allowed( + parent: &Arc, + child: &Arc, +) -> Result<(), SystemError> { + if is_ptraced_locked(child) { + return Err(SystemError::EPERM); + } + if parent.flags().contains(ProcessFlags::EXITING) { + return Err(SystemError::EPERM); + } + + // Linux also calls security_ptrace_traceme() here. DragonOS does not yet + // have the equivalent LSM/dumpable/credential/capability hooks wired into + // ptrace, so keep this as the single future extension point instead of + // spreading partial checks across syscall and wait code. + Ok(()) +} + +fn traceme_parent_for( + child: &Arc, +) -> Result, SystemError> { + let real_parent = child.real_parent_pcb().ok_or(SystemError::EPERM)?; + let Some(fork_parent) = child.fork_parent_pcb() else { + return Ok(real_parent); + }; + + if fork_parent.tgid == real_parent.tgid { + Ok(fork_parent) + } else { + Ok(real_parent) + } +} + +pub fn traceme_current() -> Result<(), SystemError> { + let _relation_guard = PTRACE_RELATION_LOCK.lock_irqsave(); + let current = ProcessManager::current_pcb(); + let tracer = traceme_parent_for(¤t)?; + traceme_allowed(&tracer, ¤t)?; + + let raw_pid = current.raw_pid(); + { + let mut ptracer = current.ptracer_pcb.write_irqsave(); + if ptracer.upgrade().is_some() { + return Err(SystemError::EPERM); + } + *ptracer = Arc::downgrade(&tracer); + current.flags().insert(ProcessFlags::PTRACED); + } + + let mut ptraced = tracer.ptraced.write_irqsave(); + if !ptraced.contains(&raw_pid) { + ptraced.push(raw_pid); + } + + Ok(()) +} + +pub fn unlink_tracee(tracee: &Arc) { + let _relation_guard = PTRACE_RELATION_LOCK.lock_irqsave(); + let tracer = { + let mut ptracer = tracee.ptracer_pcb.write_irqsave(); + let tracer = ptracer.upgrade(); + *ptracer = Weak::new(); + tracee.flags().remove(ProcessFlags::PTRACED); + tracer + }; + + if let Some(tracer) = tracer { + let raw_pid = tracee.raw_pid(); + tracer.ptraced.write_irqsave().retain(|pid| *pid != raw_pid); + } +} + +pub fn exit_ptrace(tracer: &Arc) { + let _relation_guard = PTRACE_RELATION_LOCK.lock_irqsave(); + let traced_pids: Vec = { + let mut ptraced = tracer.ptraced.write_irqsave(); + core::mem::take(&mut *ptraced) + }; + + for pid in traced_pids { + let Some(tracee) = ProcessManager::find(pid) else { + continue; + }; + { + let mut ptracer = tracee.ptracer_pcb.write_irqsave(); + if ptracer + .upgrade() + .as_ref() + .map(|t| Arc::ptr_eq(t, tracer)) + .unwrap_or(false) + { + *ptracer = Weak::new(); + tracee.flags().remove(ProcessFlags::PTRACED); + } + } + if let Some(real_parent) = tracee.real_parent_pcb() { + real_parent + .wait_queue + .wakeup_all(Some(super::ProcessState::Blocked(true))); + } + } +} + +pub fn tracees_of(tracer: &Arc) -> Vec { + let _relation_guard = PTRACE_RELATION_LOCK.lock_irqsave(); + tracees_of_locked(tracer) +} + +fn tracees_of_locked(tracer: &Arc) -> Vec { + tracer.ptraced.read_irqsave().clone() +} + +pub fn ptracer_of(tracee: &Arc) -> Option> { + let _relation_guard = PTRACE_RELATION_LOCK.lock_irqsave(); + ptracer_of_locked(tracee) +} + +fn ptracer_of_locked(tracee: &Arc) -> Option> { + tracee.ptracer_pcb.read_irqsave().upgrade() +} + +pub fn is_ptraced(tracee: &ProcessControlBlock) -> bool { + let _relation_guard = PTRACE_RELATION_LOCK.lock_irqsave(); + is_ptraced_locked(tracee) +} + +fn is_ptraced_locked(tracee: &ProcessControlBlock) -> bool { + tracee.flags().contains(ProcessFlags::PTRACED) + && tracee.ptracer_pcb.read_irqsave().upgrade().is_some() +} + +pub fn is_wait_tracee_of( + tracee: &Arc, + waiter: &Arc, + options: WaitOption, +) -> bool { + let _relation_guard = PTRACE_RELATION_LOCK.lock_irqsave(); + let Some(tracer) = ptracer_of_locked(tracee) else { + return false; + }; + + let same_waiter = Arc::ptr_eq(&tracer, waiter); + let same_thread_group = !options.contains(WaitOption::WNOTHREAD) && tracer.tgid == waiter.tgid; + if !same_waiter && !same_thread_group { + return false; + } + + tracees_of_locked(&tracer).contains(&tracee.raw_pid()) +} diff --git a/kernel/src/process/syscall/mod.rs b/kernel/src/process/syscall/mod.rs index c275dfd649..baa5d7c399 100644 --- a/kernel/src/process/syscall/mod.rs +++ b/kernel/src/process/syscall/mod.rs @@ -24,6 +24,7 @@ mod sys_init_module; mod sys_pidfdopen; mod sys_prctl; pub mod sys_prlimit64; +mod sys_ptrace; mod sys_rseq; mod sys_seccomp; mod sys_set_tid_address; diff --git a/kernel/src/process/syscall/sys_ptrace.rs b/kernel/src/process/syscall/sys_ptrace.rs new file mode 100644 index 0000000000..e0961506ae --- /dev/null +++ b/kernel/src/process/syscall/sys_ptrace.rs @@ -0,0 +1,65 @@ +use alloc::vec::Vec; + +use crate::{ + arch::{interrupt::TrapFrame, syscall::nr::SYS_PTRACE}, + process::{ptrace, ProcessManager, RawPid}, + syscall::table::{FormattedSyscallParam, Syscall}, +}; +use system_error::SystemError; + +const PTRACE_TRACEME: usize = 0; + +pub struct SysPtrace; + +impl SysPtrace { + fn request(args: &[usize]) -> usize { + args[0] + } + + fn pid(args: &[usize]) -> i32 { + args[1] as i32 + } + + fn addr(args: &[usize]) -> usize { + args[2] + } + + fn data(args: &[usize]) -> usize { + args[3] + } +} + +impl Syscall for SysPtrace { + fn num_args(&self) -> usize { + 4 + } + + fn handle(&self, args: &[usize], _frame: &mut TrapFrame) -> Result { + match Self::request(args) { + PTRACE_TRACEME => { + ptrace::traceme_current()?; + Ok(0) + } + _ => { + let pid = Self::pid(args); + if pid <= 0 + || ProcessManager::find_task_by_vpid(RawPid::new(pid as usize)).is_none() + { + return Err(SystemError::ESRCH); + } + Err(SystemError::EIO) + } + } + } + + fn entry_format(&self, args: &[usize]) -> Vec { + vec![ + FormattedSyscallParam::new("request", format!("{:#x}", Self::request(args))), + FormattedSyscallParam::new("pid", format!("{:#x}", Self::pid(args))), + FormattedSyscallParam::new("addr", format!("{:#x}", Self::addr(args))), + FormattedSyscallParam::new("data", format!("{:#x}", Self::data(args))), + ] + } +} + +syscall_table_macros::declare_syscall!(SYS_PTRACE, SysPtrace); diff --git a/kernel/src/process/syscall/sys_wait4.rs b/kernel/src/process/syscall/sys_wait4.rs index 705910ea75..3f7fc35bc5 100644 --- a/kernel/src/process/syscall/sys_wait4.rs +++ b/kernel/src/process/syscall/sys_wait4.rs @@ -44,17 +44,18 @@ impl Syscall for SysWait4 { // 权限校验 // todo: 引入rusage之后,更正以下权限校验代码中,rusage的大小 - let options = WaitOption::from_bits(options as u32).ok_or(SystemError::EINVAL)?; - - let wstatus_buf = if wstatus.is_null() { - None - } else { - Some(UserBufferWriter::new( - wstatus, - core::mem::size_of::(), - true, - )?) - }; + let options_bits = options as u32; + let valid_options = (WaitOption::WNOHANG + | WaitOption::WUNTRACED + | WaitOption::WCONTINUED + | WaitOption::WNOTHREAD + | WaitOption::WCLONE + | WaitOption::WALL) + .bits(); + if options_bits & !valid_options != 0 { + return Err(SystemError::EINVAL); + } + let options = WaitOption::from_bits(options_bits).ok_or(SystemError::EINVAL)?; let mut tmp_rusage = if rusage.is_null() { None @@ -62,15 +63,23 @@ impl Syscall for SysWait4 { Some(RUsage::default()) }; - let r = kernel_wait4(pid, wstatus_buf, options, tmp_rusage.as_mut())?; + let (r, status) = kernel_wait4(pid, options, tmp_rusage.as_mut())?; + + if r > 0 { + if !wstatus.is_null() { + let mut wstatus_buf = + UserBufferWriter::new(wstatus, core::mem::size_of::(), true)?; + wstatus_buf.copy_one_to_user(&status, 0)?; + } - if !rusage.is_null() { - let mut rusage_buf = UserBufferWriter::new::( - rusage as *mut RUsage, - core::mem::size_of::(), - true, - )?; - rusage_buf.copy_one_to_user(&tmp_rusage.unwrap(), 0)?; + if !rusage.is_null() { + let mut rusage_buf = UserBufferWriter::new::( + rusage as *mut RUsage, + core::mem::size_of::(), + true, + )?; + rusage_buf.copy_one_to_user(&tmp_rusage.unwrap(), 0)?; + } } return Ok(r); } diff --git a/kernel/src/process/syscall/sys_waitid.rs b/kernel/src/process/syscall/sys_waitid.rs index ff7393e80c..8184753585 100644 --- a/kernel/src/process/syscall/sys_waitid.rs +++ b/kernel/src/process/syscall/sys_waitid.rs @@ -1,13 +1,14 @@ use core::mem::size_of; use crate::arch::interrupt::TrapFrame; +use crate::arch::syscall::nr::SYS_WAITID; use crate::ipc::signal_types::PosixSigInfo; use crate::process::abi::WaitOption; use crate::process::exit::kernel_waitid; use crate::process::resource::RUsage; +use crate::process::wait::WaitSelector; use crate::syscall::table::{FormattedSyscallParam, Syscall}; use crate::syscall::user_access::UserBufferWriter; -use crate::{arch::syscall::nr::SYS_WAITID, ipc::syscall::sys_kill::PidConverter}; use alloc::vec::Vec; use system_error::SystemError; @@ -65,7 +66,10 @@ impl Syscall for SysWaitId { return Err(SystemError::EINVAL); } - // 构造 infop writer(可选) + // which/upid → WaitSelector (P_ALL=0, P_PID=1, P_PGID=2, P_PIDFD=3) + let pid_selector = WaitSelector::from_waitid(which, upid)?; + + // Build optional infop writer. Linux validates selector/options before touching user pointers. let infop_writer = if infop_ptr.is_null() { None } else { @@ -83,30 +87,11 @@ impl Syscall for SysWaitId { Some(RUsage::default()) }; - // which/upid → PidConverter(约定:P_ALL=0, P_PID=1, P_PGID=2, P_PIDFD=3) - let pid_selector = match which { - 0..=2 => { - match PidConverter::from_waitid(which, upid) { - Some(converter) => converter, - None => { - // 根据POSIX标准,当进程不存在或已被回收时,应该返回ECHILD - // 而不是ESRCH。这确保了与Linux行为的一致性。 - return Err(SystemError::ECHILD); - } - } - } - 3 => { - // P_PIDFD - return Err(SystemError::ENOSYS); - } - _ => return Err(SystemError::EINVAL), - }; - // 调用内核实现 - let _ = kernel_waitid(pid_selector, infop_writer, options, tmp_rusage.as_mut())?; + let has_event = kernel_waitid(pid_selector, infop_writer, options, tmp_rusage.as_mut())?; // log::debug!("sys_waitid: kernel_waitid returned OK"); - if !rusage_ptr.is_null() { + if has_event && !rusage_ptr.is_null() { let mut rusage_writer = UserBufferWriter::new::(rusage_ptr, size_of::(), true)?; rusage_writer.copy_one_to_user(&tmp_rusage.unwrap(), 0)?; diff --git a/kernel/src/process/wait.rs b/kernel/src/process/wait.rs new file mode 100644 index 0000000000..43ada62753 --- /dev/null +++ b/kernel/src/process/wait.rs @@ -0,0 +1,81 @@ +use alloc::sync::Arc; +use system_error::SystemError; + +use super::{pid::Pid, ProcessManager, RawPid}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum WaitSelector { + Any, + Pid(Arc), + Pgid(Option>), +} + +impl WaitSelector { + pub fn from_wait4_pid(pid: i32) -> Result { + if pid == i32::MIN { + return Err(SystemError::ESRCH); + } + + if pid < -1 { + Ok(Self::Pgid(ProcessManager::find_vpid(RawPid::from( + -pid as usize, + )))) + } else if pid == -1 { + Ok(Self::Any) + } else if pid == 0 { + Ok(Self::Pgid(Some( + ProcessManager::current_pcb() + .task_pgrp() + .ok_or(SystemError::ECHILD)?, + ))) + } else { + let pid = + ProcessManager::find_vpid(RawPid::from(pid as usize)).ok_or(SystemError::ECHILD)?; + Ok(Self::Pid(pid)) + } + } + + pub fn from_waitid(which: u32, upid: i32) -> Result { + match which { + // P_ALL + 0 => Ok(Self::Any), + // P_PID + 1 => { + if upid <= 0 { + return Err(SystemError::EINVAL); + } + let pid = ProcessManager::find_vpid(RawPid::from(upid as usize)) + .ok_or(SystemError::ECHILD)?; + Ok(Self::Pid(pid)) + } + // P_PGID + 2 => { + if upid < 0 { + return Err(SystemError::EINVAL); + } + if upid == 0 { + Ok(Self::Pgid(Some( + ProcessManager::current_pcb() + .task_pgrp() + .ok_or(SystemError::ECHILD)?, + ))) + } else { + Ok(Self::Pgid(ProcessManager::find_vpid(RawPid::new( + upid as usize, + )))) + } + } + // P_PIDFD is a waitid-specific selector. DragonOS has pidfd basics, + // but pidfd wait still needs fd validation and O_NONBLOCK/EAGAIN + // semantics, so keep the unsupported boundary explicit here after + // preserving Linux's invalid negative-fd boundary. + 3 => { + if upid < 0 { + return Err(SystemError::EINVAL); + } + Err(SystemError::ENOSYS) + } + _ => Err(SystemError::EINVAL), + } + } +} diff --git a/user/apps/tests/dunitest/suites/normal/wait_rusage.cc b/user/apps/tests/dunitest/suites/normal/wait_rusage.cc index 0996563636..bd3ff4aba4 100644 --- a/user/apps/tests/dunitest/suites/normal/wait_rusage.cc +++ b/user/apps/tests/dunitest/suites/normal/wait_rusage.cc @@ -13,6 +13,26 @@ #include "gtest/gtest.h" +#ifndef PTRACE_TRACEME +#define PTRACE_TRACEME 0 +#endif + +#ifndef __WCLONE +#define __WCLONE 0x80000000 +#endif + +#ifndef __WALL +#define __WALL 0x40000000 +#endif + +#ifndef __WNOTHREAD +#define __WNOTHREAD 0x20000000 +#endif + +#ifndef P_PIDFD +#define P_PIDFD 3 +#endif + namespace { uint64_t RusageCpuUsec(const struct rusage& ru) { @@ -52,8 +72,521 @@ void* ThreadBurn(void* arg) { return nullptr; } +void ExpectEncodedExitStatus(int status, int code) { + ASSERT_TRUE(WIFEXITED(status)) << status; + EXPECT_EQ(code, WEXITSTATUS(status)); + EXPECT_NE(code, status); +} + +struct ThreadForkArgs { + int ready_fd = -1; + int release_fd = -1; + pid_t child = -1; + int fork_errno = 0; + pid_t wait_result = -1; + int wait_errno = 0; + int wait_status = 0; +}; + +void* ForkChildFromThread(void* arg) { + auto* args = reinterpret_cast(arg); + pid_t child = fork(); + if (child == 0) { + _exit(17); + } + if (child < 0) { + args->fork_errno = errno; + } else { + args->child = child; + } + + char byte = child < 0 ? 'e' : 'x'; + if (write(args->ready_fd, &byte, 1) != 1) { + args->fork_errno = errno; + } + + if (child >= 0) { + char release = 0; + if (read(args->release_fd, &release, 1) != 1) { + args->fork_errno = errno; + return nullptr; + } + args->wait_result = wait4(child, &args->wait_status, __WNOTHREAD, nullptr); + if (args->wait_result < 0) { + args->wait_errno = errno; + } + } + return nullptr; +} + +struct ThreadTidArgs { + int ready_fd = -1; + int release_fd = -1; + pid_t tid = -1; +}; + +void* ReportTidAndWait(void* arg) { + auto* args = reinterpret_cast(arg); + args->tid = static_cast(syscall(SYS_gettid)); + char byte = 't'; + if (write(args->ready_fd, &byte, 1) != 1) { + return nullptr; + } + char release = 0; + if (read(args->release_fd, &release, 1) != 1) { + return nullptr; + } + return nullptr; +} + +struct BlockingThreadExitArgs { + int ready_fd = -1; + int release_fd = -1; +}; + +void* BlockThenExitThread(void* arg) { + auto* args = reinterpret_cast(arg); + char byte = 'r'; + if (write(args->ready_fd, &byte, 1) != 1) { + syscall(SYS_exit, 4); + } + char release = 0; + if (read(args->release_fd, &release, 1) != 1) { + syscall(SYS_exit, 5); + } + syscall(SYS_exit, 0); + return nullptr; +} + +struct ThreadPtraceForkArgs { + int result = -1; + int err = 0; + int status = 0; +}; + +void* ForkTracemeAndWaitFromThread(void* arg) { + auto* args = reinterpret_cast(arg); + int fds[2] = {}; + if (pipe(fds) != 0) { + args->err = errno; + return nullptr; + } + + pid_t child = fork(); + if (child == 0) { + close(fds[0]); + if (syscall(SYS_ptrace, PTRACE_TRACEME, 0, 0, 0) != 0) { + _exit(2); + } + if (write(fds[1], "x", 1) != 1) { + _exit(3); + } + close(fds[1]); + _exit(0); + } + if (child < 0) { + args->err = errno; + close(fds[0]); + close(fds[1]); + return nullptr; + } + + close(fds[1]); + char byte = 0; + if (read(fds[0], &byte, 1) != 1) { + args->err = errno; + close(fds[0]); + return nullptr; + } + close(fds[0]); + + args->result = wait4(child, &args->status, __WNOTHREAD | __WCLONE, nullptr); + if (args->result < 0) { + args->err = errno; + } + return nullptr; +} + } // namespace +TEST(WaitRusage, PtraceTracemeChildIsWaitableWithWclone) { + int fds[2] = {}; + ASSERT_EQ(0, pipe(fds)) << strerror(errno); + + pid_t child = fork(); + ASSERT_GE(child, 0) << strerror(errno); + if (child == 0) { + close(fds[0]); + if (syscall(SYS_ptrace, PTRACE_TRACEME, 0, 0, 0) != 0) { + _exit(2); + } + if (write(fds[1], "x", 1) != 1) { + _exit(3); + } + close(fds[1]); + _exit(0); + } + + close(fds[1]); + char byte = 0; + ASSERT_EQ(1, read(fds[0], &byte, 1)) << strerror(errno); + close(fds[0]); + + int status = 0; + ASSERT_EQ(child, wait4(child, &status, __WCLONE, nullptr)) << strerror(errno); + ASSERT_TRUE(WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + errno = 0; + EXPECT_EQ(-1, wait4(child, nullptr, WNOHANG, nullptr)); + EXPECT_EQ(ECHILD, errno); +} + +TEST(WaitRusage, RepeatedPtraceTracemeFailsWithEperm) { + pid_t child = fork(); + ASSERT_GE(child, 0) << strerror(errno); + if (child == 0) { + if (syscall(SYS_ptrace, PTRACE_TRACEME, 0, 0, 0) != 0) { + _exit(2); + } + errno = 0; + if (syscall(SYS_ptrace, PTRACE_TRACEME, 0, 0, 0) != -1 || + errno != EPERM) { + _exit(3); + } + _exit(0); + } + + int status = 0; + ASSERT_EQ(child, wait4(child, &status, __WALL, nullptr)) << strerror(errno); + ASSERT_TRUE(WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(WaitRusage, WcloneDoesNotReapOrdinaryForkChild) { + pid_t child = fork(); + ASSERT_GE(child, 0) << strerror(errno); + if (child == 0) { + _exit(0); + } + + int status = 0; + errno = 0; + EXPECT_EQ(-1, wait4(child, &status, WNOHANG | __WCLONE, nullptr)); + EXPECT_EQ(ECHILD, errno); + + ASSERT_EQ(child, wait4(child, &status, 0, nullptr)) << strerror(errno); + ASSERT_TRUE(WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(WaitRusage, Wait4AndWaitpidReturnEncodedExitStatus) { + pid_t child = fork(); + ASSERT_GE(child, 0) << strerror(errno); + if (child == 0) { + _exit(42); + } + + int status = 0; + ASSERT_EQ(child, waitpid(child, &status, 0)) << strerror(errno); + ExpectEncodedExitStatus(status, 42); + + child = fork(); + ASSERT_GE(child, 0) << strerror(errno); + if (child == 0) { + _exit(21); + } + + status = 0; + ASSERT_EQ(child, wait4(child, &status, 0, nullptr)) << strerror(errno); + ExpectEncodedExitStatus(status, 21); + + child = fork(); + ASSERT_GE(child, 0) << strerror(errno); + if (child == 0) { + _exit(33); + } + + status = 0; + ASSERT_EQ(child, wait4(-1, &status, 0, nullptr)) << strerror(errno); + ExpectEncodedExitStatus(status, 33); + + child = fork(); + ASSERT_GE(child, 0) << strerror(errno); + if (child == 0) { + _exit(34); + } + + status = 0; + ASSERT_EQ(child, wait4(0, &status, 0, nullptr)) << strerror(errno); + ExpectEncodedExitStatus(status, 34); + + int fds[2] = {}; + ASSERT_EQ(0, pipe(fds)) << strerror(errno); + child = fork(); + ASSERT_GE(child, 0) << strerror(errno); + if (child == 0) { + close(fds[1]); + char byte = 0; + if (read(fds[0], &byte, 1) < 0) { + _exit(2); + } + close(fds[0]); + _exit(35); + } + + close(fds[0]); + ASSERT_EQ(0, setpgid(child, child)) << strerror(errno); + ASSERT_EQ(1, write(fds[1], "x", 1)) << strerror(errno); + close(fds[1]); + + status = 0; + ASSERT_EQ(child, wait4(-child, &status, 0, nullptr)) << strerror(errno); + ExpectEncodedExitStatus(status, 35); +} + +TEST(WaitRusage, WaitidPidExitedChildWithoutWexitedReturnsEchild) { + pid_t child = fork(); + ASSERT_GE(child, 0) << strerror(errno); + if (child == 0) { + _exit(7); + } + + siginfo_t si {}; + bool observed_exit = false; + for (int i = 0; i < 1000; ++i) { + memset(&si, 0, sizeof(si)); + ASSERT_EQ(0, syscall(SYS_waitid, P_PID, child, &si, + WEXITED | WNOWAIT | WNOHANG, nullptr)) + << strerror(errno); + if (si.si_pid == child) { + observed_exit = true; + break; + } + usleep(1000); + } + ASSERT_TRUE(observed_exit); + EXPECT_EQ(CLD_EXITED, si.si_code); + EXPECT_EQ(7, si.si_status); + + memset(&si, 0x5a, sizeof(si)); + errno = 0; + EXPECT_EQ(-1, + syscall(SYS_waitid, P_PID, child, &si, WSTOPPED | WNOHANG, nullptr)); + EXPECT_EQ(ECHILD, errno); + + int status = 0; + ASSERT_EQ(child, waitpid(child, &status, 0)) << strerror(errno); + ExpectEncodedExitStatus(status, 7); +} + +TEST(WaitRusage, WnothreadWaitsForChildForkedByCurrentThread) { + int ready_pipe[2] = {}; + int release_pipe[2] = {}; + ASSERT_EQ(0, pipe(ready_pipe)) << strerror(errno); + ASSERT_EQ(0, pipe(release_pipe)) << strerror(errno); + + ThreadForkArgs args; + args.ready_fd = ready_pipe[1]; + args.release_fd = release_pipe[0]; + pthread_t thread {}; + ASSERT_EQ(0, pthread_create(&thread, nullptr, ForkChildFromThread, &args)) + << strerror(errno); + + char byte = 0; + ASSERT_EQ(1, read(ready_pipe[0], &byte, 1)) << strerror(errno); + close(ready_pipe[0]); + ASSERT_EQ(0, args.fork_errno) << strerror(args.fork_errno); + ASSERT_GT(args.child, 0); + + int status = 0; + errno = 0; + EXPECT_EQ(-1, wait4(args.child, &status, WNOHANG | __WNOTHREAD, nullptr)); + EXPECT_EQ(ECHILD, errno); + + ASSERT_EQ(1, write(release_pipe[1], "x", 1)) << strerror(errno); + close(release_pipe[1]); + ASSERT_EQ(0, pthread_join(thread, nullptr)) << strerror(errno); + close(ready_pipe[1]); + close(release_pipe[0]); + EXPECT_EQ(args.child, args.wait_result) << strerror(args.wait_errno); + ExpectEncodedExitStatus(args.wait_status, 17); +} + +TEST(WaitRusage, ThreadGroupLeaderWaitDelayedUntilSubthreadsExit) { + int ready_pipe[2] = {}; + int release_pipe[2] = {}; + ASSERT_EQ(0, pipe(ready_pipe)) << strerror(errno); + ASSERT_EQ(0, pipe(release_pipe)) << strerror(errno); + + pid_t child = fork(); + ASSERT_GE(child, 0) << strerror(errno); + if (child == 0) { + close(ready_pipe[0]); + close(release_pipe[1]); + BlockingThreadExitArgs args; + args.ready_fd = ready_pipe[1]; + args.release_fd = release_pipe[0]; + pthread_t thread {}; + if (pthread_create(&thread, nullptr, BlockThenExitThread, &args) != 0) { + syscall(SYS_exit, 2); + } + syscall(SYS_exit, 0); + } + + close(ready_pipe[1]); + close(release_pipe[0]); + char byte = 0; + ASSERT_EQ(1, read(ready_pipe[0], &byte, 1)) << strerror(errno); + close(ready_pipe[0]); + + usleep(50000); + siginfo_t si {}; + ASSERT_EQ(0, syscall(SYS_waitid, P_PID, child, &si, WSTOPPED | WNOHANG, + nullptr)) + << strerror(errno); + EXPECT_EQ(0, si.si_pid); + + int status = 0; + ASSERT_EQ(0, wait4(child, &status, WNOHANG, nullptr)) << strerror(errno); + + ASSERT_EQ(1, write(release_pipe[1], "x", 1)) << strerror(errno); + close(release_pipe[1]); + + ASSERT_EQ(child, wait4(child, &status, 0, nullptr)) << strerror(errno); + ASSERT_TRUE(WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(WaitRusage, ThreadForkedTracemeIsWaitableByForkingThreadWithWclone) { + ThreadPtraceForkArgs args; + pthread_t thread {}; + ASSERT_EQ(0, pthread_create(&thread, nullptr, ForkTracemeAndWaitFromThread, &args)) + << strerror(errno); + ASSERT_EQ(0, pthread_join(thread, nullptr)) << strerror(errno); + + EXPECT_GT(args.result, 0) << strerror(args.err); + ASSERT_TRUE(WIFEXITED(args.status)); + EXPECT_EQ(0, WEXITSTATUS(args.status)); +} + +TEST(WaitRusage, NaturalWaitCannotReapThreadTid) { + int ready_pipe[2] = {}; + int release_pipe[2] = {}; + ASSERT_EQ(0, pipe(ready_pipe)) << strerror(errno); + ASSERT_EQ(0, pipe(release_pipe)) << strerror(errno); + + ThreadTidArgs args; + args.ready_fd = ready_pipe[1]; + args.release_fd = release_pipe[0]; + pthread_t thread {}; + ASSERT_EQ(0, pthread_create(&thread, nullptr, ReportTidAndWait, &args)) + << strerror(errno); + + char byte = 0; + ASSERT_EQ(1, read(ready_pipe[0], &byte, 1)) << strerror(errno); + close(ready_pipe[0]); + ASSERT_GT(args.tid, 0); + + int status = 0; + errno = 0; + EXPECT_EQ(-1, wait4(args.tid, &status, WNOHANG | __WCLONE, nullptr)); + EXPECT_EQ(ECHILD, errno); + + errno = 0; + EXPECT_EQ(-1, wait4(args.tid, &status, WNOHANG | __WALL, nullptr)); + EXPECT_EQ(ECHILD, errno); + + ASSERT_EQ(1, write(release_pipe[1], "x", 1)) << strerror(errno); + close(release_pipe[1]); + ASSERT_EQ(0, pthread_join(thread, nullptr)) << strerror(errno); + close(ready_pipe[1]); + close(release_pipe[0]); +} + +TEST(WaitRusage, WaitidPidfdNegativeFdIsEinval) { + siginfo_t si {}; + errno = 0; + EXPECT_EQ(-1, + syscall(SYS_waitid, P_PIDFD, -1, &si, WEXITED | WNOHANG, nullptr)); + EXPECT_EQ(EINVAL, errno); +} + +TEST(WaitRusage, Wait4RejectsWnowaitWithoutReapingChild) { + pid_t child = fork(); + ASSERT_GE(child, 0) << strerror(errno); + if (child == 0) { + _exit(0); + } + + int status = 0; + errno = 0; + EXPECT_EQ(-1, wait4(child, &status, WNOWAIT, nullptr)); + EXPECT_EQ(EINVAL, errno); + + ASSERT_EQ(child, wait4(child, &status, 0, nullptr)) << strerror(errno); + ASSERT_TRUE(WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(WaitRusage, WnohangNoEventDoesNotTouchUserPointers) { + int fds[2] = {}; + ASSERT_EQ(0, pipe(fds)) << strerror(errno); + + pid_t child = fork(); + ASSERT_GE(child, 0) << strerror(errno); + if (child == 0) { + close(fds[1]); + char byte = 0; + if (read(fds[0], &byte, 1) < 0) { + _exit(2); + } + close(fds[0]); + _exit(0); + } + close(fds[0]); + + errno = 0; + EXPECT_EQ(0, wait4(child, reinterpret_cast(1), WNOHANG, + reinterpret_cast(1))) + << strerror(errno); + + ASSERT_EQ(1, write(fds[1], "x", 1)) << strerror(errno); + close(fds[1]); + + int status = 0; + ASSERT_EQ(child, wait4(child, &status, 0, nullptr)) << strerror(errno); + ASSERT_TRUE(WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + +TEST(WaitRusage, ExplicitSigignSigchldAutoreapsWithoutChildRusage) { + struct sigaction old_action {}; + struct sigaction ignore_action {}; + ignore_action.sa_handler = SIG_IGN; + sigemptyset(&ignore_action.sa_mask); + ASSERT_EQ(0, sigaction(SIGCHLD, &ignore_action, &old_action)) << strerror(errno); + + struct rusage before {}; + ASSERT_EQ(0, getrusage(RUSAGE_CHILDREN, &before)) << strerror(errno); + + pid_t child = fork(); + ASSERT_GE(child, 0) << strerror(errno); + if (child == 0) { + BusyForUsec(300000); + } + + errno = 0; + EXPECT_EQ(-1, wait4(child, nullptr, 0, nullptr)); + EXPECT_EQ(ECHILD, errno); + + struct rusage after {}; + ASSERT_EQ(0, getrusage(RUSAGE_CHILDREN, &after)) << strerror(errno); + EXPECT_EQ(RusageCpuUsec(before), RusageCpuUsec(after)); + + ASSERT_EQ(0, sigaction(SIGCHLD, &old_action, nullptr)) << strerror(errno); +} + TEST(WaitRusage, WNowaitDoesNotReapAndWait4AccountsChildUsage) { struct rusage before {}; ASSERT_EQ(0, getrusage(RUSAGE_CHILDREN, &before)) << strerror(errno); diff --git a/user/apps/tests/syscall/gvisor/blocklists/wait_test b/user/apps/tests/syscall/gvisor/blocklists/wait_test index 4bcfca438a..cc4627048c 100644 --- a/user/apps/tests/syscall/gvisor/blocklists/wait_test +++ b/user/apps/tests/syscall/gvisor/blocklists/wait_test @@ -1,7 +1,2 @@ # 缺少 SYS_PTRACE WaitTest.TraceeWALL -# 卡死 -Waiters/WaitAnyChildTest.IgnoredChildRusage/* - -# 缺少 /bin/true -Waiters/WaitSpecificChildTest.AfterChildExecve/*