Files
Ayay/SHH.ProcessLaunchers/ManagedProcess.cs

486 lines
18 KiB
C#
Raw Permalink Normal View History

2026-01-03 08:44:38 +08:00
using System;
using System.Diagnostics;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
namespace SHH.ProcessLaunchers
{
// =========================================================
// 内部核心类:单个受管进程 (封装了所有复杂逻辑)
// =========================================================
/// <summary>
/// 受管进程实例 (Internal Worker)
/// <para>职责:管理【单个】进程的生命周期。</para>
/// <para>功能:包含 启动/停止/自愈/熔断/监控 的核心状态机逻辑。</para>
/// </summary>
internal class ManagedProcess
{
#region --- (Fields) ---
private readonly ProcessConfig _config;
private readonly ProcessManager _manager;
private readonly ILauncherLogger _logger;
/// <summary>
/// 实际的操作系统进程对象
/// </summary>
private Process _process;
/// <summary>
/// 标记位:是否为有意的停止
/// <para>True: 用户手动停止 (不自愈)</para>
/// <para>False: 运行中 (若退出则触发自愈)</para>
/// </summary>
private bool _isIntentionalStop = true;
// --- 异步任务控制令牌 ---
private CancellationTokenSource _delayCts; // 用于取消重启/熔断的倒计时
private CancellationTokenSource _monitorCts; // 用于取消资源监控循环
private CancellationTokenSource _schedulerCts; // 用于取消定时重启计划
// --- 运行时统计数据 ---
private int _consecutiveFailures = 0; // 连续失败次数 (熔断计数器)
private DateTime? _lastStartTime; // 最后启动时间 (用于计算稳定运行市场)
private DateTime? _lastExitTime; // 最后退出时间
private DateTime? _nextRetryTime; // 下次自动重试的时间点
/// <summary>
/// 当前生命周期状态 (对外只读)
/// </summary>
public ProcessStatus Status { get; private set; } = ProcessStatus.Stopped;
/// <summary>
/// 公开配置信息
/// </summary>
public ProcessConfig Config => _config;
#endregion
#region --- ---
public ManagedProcess(ProcessConfig config, ProcessManager manager, ILauncherLogger logger)
{
_config = config;
_manager = manager;
_logger = logger;
}
#endregion
#region --- (External Commands) ---
/// <summary>
/// 执行启动逻辑 (入口)
/// </summary>
public void ExecuteStart(LogTrigger trigger, string reason)
{
// 如果已经在运行或启动中,则忽略
if (Status == ProcessStatus.Running || Status == ProcessStatus.Starting) return;
// 1. 重置所有负面状态 (用户手动介入通常意味着修复了问题)
_delayCts?.Cancel();
_isIntentionalStop = false; // 标记为"非有意停止" -> 开启守护模式
_consecutiveFailures = 0;
_nextRetryTime = null;
// 2. 记录日志
_logger.LogLifecycle(_config.Id, LogAction.Start, trigger, reason);
// 3. 真正启动
LaunchProcess();
}
/// <summary>
/// 执行停止逻辑 (入口)
/// </summary>
public void ExecuteStop(LogTrigger trigger, string reason)
{
// 1. 标记为"有意停止" -> 阻止 HandleExitLogic 触发重启
_isIntentionalStop = true;
// 2. 取消所有后台任务
_delayCts?.Cancel();
_monitorCts?.Cancel();
_schedulerCts?.Cancel();
_nextRetryTime = null;
// 3. 记录日志 (仅当不是已经停止时)
if (Status != ProcessStatus.Stopped)
{
_logger.LogLifecycle(_config.Id, LogAction.Stop, trigger, reason);
}
// 4. 强制杀进程
KillProcess();
UpdateStatus(ProcessStatus.Stopped);
}
/// <summary>
/// 重置资源监控锁
/// </summary>
public void ResetGuards()
{
if (_config.Guards != null)
{
foreach (var guard in _config.Guards) guard.Reset();
}
}
#endregion
#region --- (Core Launch Logic) ---
/// <summary>
/// 启动进程的原子操作
/// </summary>
private void LaunchProcess()
{
try
{
UpdateStatus(ProcessStatus.Starting);
// 1. 路径检查
string path = Path.GetFullPath(_config.ExePath);
if (!File.Exists(path))
{
_logger.LogLifecycle(_config.Id, LogAction.Error, LogTrigger.System, "可执行文件未找到", path);
// 关键点:文件丢失属于严重错误,直接进入退出决策逻辑(可能会触发熔断)
HandleExitLogic(exitCode: -1);
return;
}
// 2. 组装 ProcessStartInfo
var psi = new ProcessStartInfo
{
FileName = path,
Arguments = _config.Arguments,
// 如果未配置工作目录,默认使用 EXE 所在目录
WorkingDirectory = string.IsNullOrEmpty(_config.WorkingDirectory) ? Path.GetDirectoryName(path) : _config.WorkingDirectory,
// 窗口可见性控制
CreateNoWindow = !_config.Visible,
// 必须为 false 才能重定向 IO流
UseShellExecute = false,
// IO 重定向开关
RedirectStandardOutput = _config.EnableLogRedirect,
RedirectStandardError = _config.EnableLogRedirect
};
_process = new Process { StartInfo = psi, EnableRaisingEvents = true };
// 3. 绑定 IO 重定向事件 (异步读取流)
if (_config.EnableLogRedirect)
{
_process.OutputDataReceived += (s, e) =>
{
if (!string.IsNullOrEmpty(e.Data))
{
// A. 记录到日志系统
_logger.LogConsole(_config.Id, e.Data, false);
// B. 触发对外事件 (供 UI 实时刷新)
_manager.DispatchOutput(_config.Id, e.Data, false);
}
};
_process.ErrorDataReceived += (s, e) =>
{
if (!string.IsNullOrEmpty(e.Data))
{
_logger.LogConsole(_config.Id, e.Data, true);
_manager.DispatchOutput(_config.Id, e.Data, true);
}
};
}
// 4. 绑定退出事件 (核心生命周期钩子)
_process.Exited += (s, e) =>
{
int code = -1;
try { code = _process.ExitCode; } catch { }
// 注意Exited 是在后台线程触发的,转交 HandleExitLogic 处理
HandleExitLogic(code);
};
// 5. 执行操作系统启动调用
if (!_process.Start())
{
throw new Exception("Process.Start() 返回 false启动失败");
}
// 6. 开始异步读取流 (必须在 Start 之后调用)
if (_config.EnableLogRedirect)
{
_process.BeginOutputReadLine();
_process.BeginErrorReadLine();
}
// 7. 更新状态
_lastStartTime = DateTime.Now;
UpdateStatus(ProcessStatus.Running);
_logger.LogLifecycle(_config.Id, LogAction.Start, LogTrigger.System, "进程启动成功", new { PID = _process.Id });
// 8. 启动后挂载:资源监控循环
StartMonitoring();
// 9. 启动后挂载:计划任务 (如果有配置)
if (_config.AutoRestartIntervalMinutes > 0)
{
ScheduleScheduledRestart(_config.AutoRestartIntervalMinutes * 60 * 1000);
}
}
catch (Exception ex)
{
_logger.LogLifecycle(_config.Id, LogAction.Error, LogTrigger.System, $"启动过程异常: {ex.Message}");
HandleExitLogic(-1);
}
}
#endregion
#region --- (Guard & Monitor) ---
/// <summary>
/// 启动资源监控后台任务
/// </summary>
private void StartMonitoring()
{
// 1. 取消旧任务
_monitorCts?.Cancel();
_monitorCts = new CancellationTokenSource();
// 如果没有配置哨兵,直接返回
if (_config.Guards == null || _config.Guards.Count == 0) return;
var token = _monitorCts.Token;
// 2. 启动长运行 Task
Task.Run(async () =>
{
while (!token.IsCancellationRequested)
{
try
{
// 默认轮询间隔 3 秒
await Task.Delay(3000, token);
// 每次检查前确认进程还活着
if (_process == null || _process.HasExited) break;
// 遍历所有哨兵
foreach (var guard in _config.Guards)
{
var result = guard.Check(_process, out string reason);
if (result == GuardResult.Warning)
{
// 警告级别:仅记录日志 (供客户端发邮件),不干涉进程
_logger.LogLifecycle(_config.Id, LogAction.ResourceCheck, LogTrigger.ResourceGuard, $"[警告] {reason}");
}
else if (result == GuardResult.Critical)
{
// 严重级别:记录日志并执行重启
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.ResourceGuard, $"[严重] {reason} -> 执行管控重启");
// 杀掉进程 -> 触发 Exited -> 触发 HandleExitLogic -> 自动重启
KillProcess();
return; // 退出监控循环
}
}
}
catch (TaskCanceledException) { break; } // 正常取消
catch (Exception ex)
{
_logger.LogConsole(_config.Id, $"监控线程异常: {ex.Message}", true);
}
}
}, token);
}
/// <summary>
/// 安排定时重启任务
/// </summary>
private void ScheduleScheduledRestart(int delayMs)
{
_schedulerCts?.Cancel();
_schedulerCts = new CancellationTokenSource();
Task.Delay(delayMs, _schedulerCts.Token).ContinueWith(t =>
{
// 只有当没被取消,且进程还在运行时,才执行重启
if (!t.IsCanceled && Status == ProcessStatus.Running)
{
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.Scheduler, "执行计划性重启 (AutoRestart)");
KillProcess(); // 触发自动重启
}
});
}
#endregion
#region --- (Decision Logic) ---
/// <summary>
/// 进程退出后的核心决策逻辑 (自愈 + 熔断)
/// </summary>
/// <param name="exitCode">进程退出码</param>
private void HandleExitLogic(int exitCode)
{
// 1. 清理伴生任务
_monitorCts?.Cancel();
_schedulerCts?.Cancel();
// 2. 意图判断:如果是用户手动停的,或者是计划重启中的 Kill
// 这里需要判断 _isIntentionalStop。
// 注意:如果是用户 Stop_isIntentionalStop 为 true直接返回不重启。
// 如果是 ResourceGuard 或 Scheduler 调用的 KillProcess_isIntentionalStop 仍为 false会走下面的重启逻辑。
if (_isIntentionalStop) return;
_lastExitTime = DateTime.Now;
_logger.LogLifecycle(_config.Id, LogAction.Crash, LogTrigger.System, "侦测到进程退出", new { ExitCode = exitCode });
// 3. 稳定性判定 (Stabilization Check)
// 逻辑:如果进程活过了阈值(如60秒),说明这次退出可能是偶发意外,不是启动即崩。
// 此时应重置失败计数,给予它"重新做人"的机会。
double runDurationMs = _lastStartTime.HasValue ? (DateTime.Now - _lastStartTime.Value).TotalMilliseconds : 0;
if (runDurationMs > _config.StabilityThresholdMs)
{
if (_consecutiveFailures > 0)
_logger.LogConsole(_config.Id, $"运行稳定({runDurationMs / 1000:F0}s),重置失败计数", false);
_consecutiveFailures = 0;
}
else
{
_consecutiveFailures++;
}
// 4. 熔断判定 (Circuit Breaker)
// 如果连续失败次数超过阈值,不再立即重启,而是进入长冷却。
if (_consecutiveFailures >= _config.MaxConsecutiveFailures)
{
EnterCoolingDown();
}
else
{
EnterShortRetry();
}
}
/// <summary>
/// 进入短时重试流程
/// </summary>
private void EnterShortRetry()
{
int delay = _config.RestartDelayMs;
UpdateStatus(ProcessStatus.PendingRestart);
_nextRetryTime = DateTime.Now.AddMilliseconds(delay);
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.System,
$"准备自动重启 ({_consecutiveFailures}/{_config.MaxConsecutiveFailures})", new { DelayMs = delay });
// 异步等待后执行
WaitAndExec(delay, () => LaunchProcess());
}
/// <summary>
/// 进入熔断冷却流程
/// </summary>
private void EnterCoolingDown()
{
int delay = _config.CircuitBreakerDelayMs;
UpdateStatus(ProcessStatus.CoolingDown);
_nextRetryTime = DateTime.Now.AddMilliseconds(delay);
_logger.LogLifecycle(_config.Id, LogAction.CircuitBreak, LogTrigger.System,
"触发熔断保护", new { Minutes = delay / 1000 / 60 });
// 冷却结束后,尝试恢复
WaitAndExec(delay, () =>
{
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.System, "熔断冷却结束,尝试恢复");
LaunchProcess();
});
}
/// <summary>
/// 通用延时执行辅助方法
/// </summary>
private void WaitAndExec(int delayMs, Action action)
{
_delayCts = new CancellationTokenSource();
Task.Delay(delayMs, _delayCts.Token).ContinueWith(t =>
{
// 只有未被取消才执行
if (!t.IsCanceled) action();
}, TaskScheduler.Default);
}
#endregion
#region --- (Helpers) ---
/// <summary>
/// 强制杀死进程 (Kill -9)
/// </summary>
private void KillProcess()
{
if (_process != null && !_process.HasExited)
{
try
{
// .NET Core 3.0+ 支持 Kill 整个进程树 (包含子进程)
_process.Kill();
_process.WaitForExit(500); // 稍微等待资源释放
}
catch { /* 忽略权限不足或竞态条件下的异常 */ }
}
}
/// <summary>
/// 更新状态并通知 Manager 分发事件
/// </summary>
private void UpdateStatus(ProcessStatus status)
{
if (Status != status)
{
Status = status;
// 回调 Manager 触发外部事件
_manager.DispatchStateChange(_config.Id, status);
}
}
/// <summary>
/// 生成当前状态快照 DTO
/// </summary>
public ProcessInfoSnapshot GetSnapshot()
{
int? pid = null;
try { if (Status == ProcessStatus.Running) pid = _process?.Id; } catch { }
string msg = "";
// 计算倒计时文本
if (Status == ProcessStatus.CoolingDown && _nextRetryTime.HasValue)
{
var span = _nextRetryTime.Value - DateTime.Now;
msg = $"熔断中 (剩余 {span.Minutes}:{span.Seconds:D2})";
}
return new ProcessInfoSnapshot
{
Id = _config.Id,
DisplayName = _config.DisplayName,
Pid = pid,
Status = Status,
LastStartTime = _lastStartTime,
LastExitTime = _lastExitTime,
ConsecutiveFailures = _consecutiveFailures,
NextRetryTime = _nextRetryTime,
Message = msg
};
}
#endregion
}
}