using System; using System.Diagnostics; using System.IO; using System.Threading; using System.Threading.Tasks; namespace SHH.ProcessLaunchers { // ========================================================= // 内部核心类:单个受管进程 (封装了所有复杂逻辑) // ========================================================= /// /// 受管进程实例 (Internal Worker) /// 职责:管理【单个】进程的生命周期。 /// 功能:包含 启动/停止/自愈/熔断/监控 的核心状态机逻辑。 /// internal class ManagedProcess { #region --- 字段定义 (Fields) --- private readonly ProcessConfig _config; private readonly ProcessManager _manager; private readonly ILauncherLogger _logger; /// /// 实际的操作系统进程对象 /// private Process _process; /// /// 标记位:是否为有意的停止 /// True: 用户手动停止 (不自愈) /// False: 运行中 (若退出则触发自愈) /// private bool _isIntentionalStop = true; // --- 异步任务控制令牌 --- private CancellationTokenSource _delayCts; // 用于取消重启/熔断的倒计时 private CancellationTokenSource _monitorCts; // 用于取消资源监控循环 private CancellationTokenSource _schedulerCts; // 用于取消定时重启计划 // --- 运行时统计数据 --- private int _consecutiveFailures = 0; // 连续失败次数 (熔断计数器) private DateTime? _lastStartTime; // 最后启动时间 (用于计算稳定运行市场) private DateTime? _lastExitTime; // 最后退出时间 private DateTime? _nextRetryTime; // 下次自动重试的时间点 /// /// 当前生命周期状态 (对外只读) /// public ProcessStatus Status { get; private set; } = ProcessStatus.Stopped; /// /// 公开配置信息 /// public ProcessConfig Config => _config; #endregion #region --- 构造函数 --- public ManagedProcess(ProcessConfig config, ProcessManager manager, ILauncherLogger logger) { _config = config; _manager = manager; _logger = logger; } #endregion #region --- 外部指令 (External Commands) --- /// /// 执行启动逻辑 (入口) /// public void ExecuteStart(LogTrigger trigger, string reason) { // 如果已经在运行或启动中,则忽略 if (Status == ProcessStatus.Running || Status == ProcessStatus.Starting) return; // 1. 重置所有负面状态 (用户手动介入通常意味着修复了问题) _delayCts?.Cancel(); _isIntentionalStop = false; // 标记为"非有意停止" -> 开启守护模式 _consecutiveFailures = 0; _nextRetryTime = null; // 2. 记录日志 _logger.LogLifecycle(_config.Id, LogAction.Start, trigger, reason); // 3. 真正启动 LaunchProcess(); } /// /// 执行停止逻辑 (入口) /// public void ExecuteStop(LogTrigger trigger, string reason) { // 1. 标记为"有意停止" -> 阻止 HandleExitLogic 触发重启 _isIntentionalStop = true; // 2. 取消所有后台任务 _delayCts?.Cancel(); _monitorCts?.Cancel(); _schedulerCts?.Cancel(); _nextRetryTime = null; // 3. 记录日志 (仅当不是已经停止时) if (Status != ProcessStatus.Stopped) { _logger.LogLifecycle(_config.Id, LogAction.Stop, trigger, reason); } // 4. 强制杀进程 KillProcess(); UpdateStatus(ProcessStatus.Stopped); } /// /// 重置资源监控锁 /// public void ResetGuards() { if (_config.Guards != null) { foreach (var guard in _config.Guards) guard.Reset(); } } #endregion #region --- 核心启动逻辑 (Core Launch Logic) --- /// /// 启动进程的原子操作 /// private void LaunchProcess() { try { UpdateStatus(ProcessStatus.Starting); // 1. 路径检查 string path = Path.GetFullPath(_config.ExePath); if (!File.Exists(path)) { _logger.LogLifecycle(_config.Id, LogAction.Error, LogTrigger.System, "可执行文件未找到", path); // 关键点:文件丢失属于严重错误,直接进入退出决策逻辑(可能会触发熔断) HandleExitLogic(exitCode: -1); return; } // 2. 组装 ProcessStartInfo var psi = new ProcessStartInfo { FileName = path, Arguments = _config.Arguments, // 如果未配置工作目录,默认使用 EXE 所在目录 WorkingDirectory = string.IsNullOrEmpty(_config.WorkingDirectory) ? Path.GetDirectoryName(path) : _config.WorkingDirectory, // 窗口可见性控制 CreateNoWindow = !_config.Visible, // 必须为 false 才能重定向 IO流 UseShellExecute = false, // IO 重定向开关 RedirectStandardOutput = _config.EnableLogRedirect, RedirectStandardError = _config.EnableLogRedirect }; _process = new Process { StartInfo = psi, EnableRaisingEvents = true }; // 3. 绑定 IO 重定向事件 (异步读取流) if (_config.EnableLogRedirect) { _process.OutputDataReceived += (s, e) => { if (!string.IsNullOrEmpty(e.Data)) { // A. 记录到日志系统 _logger.LogConsole(_config.Id, e.Data, false); // B. 触发对外事件 (供 UI 实时刷新) _manager.DispatchOutput(_config.Id, e.Data, false); } }; _process.ErrorDataReceived += (s, e) => { if (!string.IsNullOrEmpty(e.Data)) { _logger.LogConsole(_config.Id, e.Data, true); _manager.DispatchOutput(_config.Id, e.Data, true); } }; } // 4. 绑定退出事件 (核心生命周期钩子) _process.Exited += (s, e) => { int code = -1; try { code = _process.ExitCode; } catch { } // 注意:Exited 是在后台线程触发的,转交 HandleExitLogic 处理 HandleExitLogic(code); }; // 5. 执行操作系统启动调用 if (!_process.Start()) { throw new Exception("Process.Start() 返回 false,启动失败"); } // 6. 开始异步读取流 (必须在 Start 之后调用) if (_config.EnableLogRedirect) { _process.BeginOutputReadLine(); _process.BeginErrorReadLine(); } // 7. 更新状态 _lastStartTime = DateTime.Now; UpdateStatus(ProcessStatus.Running); _logger.LogLifecycle(_config.Id, LogAction.Start, LogTrigger.System, "进程启动成功", new { PID = _process.Id }); // 8. 启动后挂载:资源监控循环 StartMonitoring(); // 9. 启动后挂载:计划任务 (如果有配置) if (_config.AutoRestartIntervalMinutes > 0) { ScheduleScheduledRestart(_config.AutoRestartIntervalMinutes * 60 * 1000); } } catch (Exception ex) { _logger.LogLifecycle(_config.Id, LogAction.Error, LogTrigger.System, $"启动过程异常: {ex.Message}"); HandleExitLogic(-1); } } #endregion #region --- 守护与监控逻辑 (Guard & Monitor) --- /// /// 启动资源监控后台任务 /// private void StartMonitoring() { // 1. 取消旧任务 _monitorCts?.Cancel(); _monitorCts = new CancellationTokenSource(); // 如果没有配置哨兵,直接返回 if (_config.Guards == null || _config.Guards.Count == 0) return; var token = _monitorCts.Token; // 2. 启动长运行 Task Task.Run(async () => { while (!token.IsCancellationRequested) { try { // 默认轮询间隔 3 秒 await Task.Delay(3000, token); // 每次检查前确认进程还活着 if (_process == null || _process.HasExited) break; // 遍历所有哨兵 foreach (var guard in _config.Guards) { var result = guard.Check(_process, out string reason); if (result == GuardResult.Warning) { // 警告级别:仅记录日志 (供客户端发邮件),不干涉进程 _logger.LogLifecycle(_config.Id, LogAction.ResourceCheck, LogTrigger.ResourceGuard, $"[警告] {reason}"); } else if (result == GuardResult.Critical) { // 严重级别:记录日志并执行重启 _logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.ResourceGuard, $"[严重] {reason} -> 执行管控重启"); // 杀掉进程 -> 触发 Exited -> 触发 HandleExitLogic -> 自动重启 KillProcess(); return; // 退出监控循环 } } } catch (TaskCanceledException) { break; } // 正常取消 catch (Exception ex) { _logger.LogConsole(_config.Id, $"监控线程异常: {ex.Message}", true); } } }, token); } /// /// 安排定时重启任务 /// private void ScheduleScheduledRestart(int delayMs) { _schedulerCts?.Cancel(); _schedulerCts = new CancellationTokenSource(); Task.Delay(delayMs, _schedulerCts.Token).ContinueWith(t => { // 只有当没被取消,且进程还在运行时,才执行重启 if (!t.IsCanceled && Status == ProcessStatus.Running) { _logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.Scheduler, "执行计划性重启 (AutoRestart)"); KillProcess(); // 触发自动重启 } }); } #endregion #region --- 决策大脑 (Decision Logic) --- /// /// 进程退出后的核心决策逻辑 (自愈 + 熔断) /// /// 进程退出码 private void HandleExitLogic(int exitCode) { // 1. 清理伴生任务 _monitorCts?.Cancel(); _schedulerCts?.Cancel(); // 2. 意图判断:如果是用户手动停的,或者是计划重启中的 Kill, // 这里需要判断 _isIntentionalStop。 // 注意:如果是用户 Stop,_isIntentionalStop 为 true,直接返回,不重启。 // 如果是 ResourceGuard 或 Scheduler 调用的 KillProcess,_isIntentionalStop 仍为 false,会走下面的重启逻辑。 if (_isIntentionalStop) return; _lastExitTime = DateTime.Now; _logger.LogLifecycle(_config.Id, LogAction.Crash, LogTrigger.System, "侦测到进程退出", new { ExitCode = exitCode }); // 3. 稳定性判定 (Stabilization Check) // 逻辑:如果进程活过了阈值(如60秒),说明这次退出可能是偶发意外,不是启动即崩。 // 此时应重置失败计数,给予它"重新做人"的机会。 double runDurationMs = _lastStartTime.HasValue ? (DateTime.Now - _lastStartTime.Value).TotalMilliseconds : 0; if (runDurationMs > _config.StabilityThresholdMs) { if (_consecutiveFailures > 0) _logger.LogConsole(_config.Id, $"运行稳定({runDurationMs / 1000:F0}s),重置失败计数", false); _consecutiveFailures = 0; } else { _consecutiveFailures++; } // 4. 熔断判定 (Circuit Breaker) // 如果连续失败次数超过阈值,不再立即重启,而是进入长冷却。 if (_consecutiveFailures >= _config.MaxConsecutiveFailures) { EnterCoolingDown(); } else { EnterShortRetry(); } } /// /// 进入短时重试流程 /// private void EnterShortRetry() { int delay = _config.RestartDelayMs; UpdateStatus(ProcessStatus.PendingRestart); _nextRetryTime = DateTime.Now.AddMilliseconds(delay); _logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.System, $"准备自动重启 ({_consecutiveFailures}/{_config.MaxConsecutiveFailures})", new { DelayMs = delay }); // 异步等待后执行 WaitAndExec(delay, () => LaunchProcess()); } /// /// 进入熔断冷却流程 /// private void EnterCoolingDown() { int delay = _config.CircuitBreakerDelayMs; UpdateStatus(ProcessStatus.CoolingDown); _nextRetryTime = DateTime.Now.AddMilliseconds(delay); _logger.LogLifecycle(_config.Id, LogAction.CircuitBreak, LogTrigger.System, "触发熔断保护", new { Minutes = delay / 1000 / 60 }); // 冷却结束后,尝试恢复 WaitAndExec(delay, () => { _logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.System, "熔断冷却结束,尝试恢复"); LaunchProcess(); }); } /// /// 通用延时执行辅助方法 /// private void WaitAndExec(int delayMs, Action action) { _delayCts = new CancellationTokenSource(); Task.Delay(delayMs, _delayCts.Token).ContinueWith(t => { // 只有未被取消才执行 if (!t.IsCanceled) action(); }, TaskScheduler.Default); } #endregion #region --- 工具方法 (Helpers) --- /// /// 强制杀死进程 (Kill -9) /// private void KillProcess() { if (_process != null && !_process.HasExited) { try { // .NET Core 3.0+ 支持 Kill 整个进程树 (包含子进程) _process.Kill(); _process.WaitForExit(500); // 稍微等待资源释放 } catch { /* 忽略权限不足或竞态条件下的异常 */ } } } /// /// 更新状态并通知 Manager 分发事件 /// private void UpdateStatus(ProcessStatus status) { if (Status != status) { Status = status; // 回调 Manager 触发外部事件 _manager.DispatchStateChange(_config.Id, status); } } /// /// 生成当前状态快照 DTO /// public ProcessInfoSnapshot GetSnapshot() { int? pid = null; try { if (Status == ProcessStatus.Running) pid = _process?.Id; } catch { } string msg = ""; // 计算倒计时文本 if (Status == ProcessStatus.CoolingDown && _nextRetryTime.HasValue) { var span = _nextRetryTime.Value - DateTime.Now; msg = $"熔断中 (剩余 {span.Minutes}:{span.Seconds:D2})"; } return new ProcessInfoSnapshot { Id = _config.Id, DisplayName = _config.DisplayName, Pid = pid, Status = Status, LastStartTime = _lastStartTime, LastExitTime = _lastExitTime, ConsecutiveFailures = _consecutiveFailures, NextRetryTime = _nextRetryTime, Message = msg }; } #endregion } }