完成进程启动器接口的设计
This commit is contained in:
486
SHH.ProcessLaunchers/ManagedProcess.cs
Normal file
486
SHH.ProcessLaunchers/ManagedProcess.cs
Normal file
@@ -0,0 +1,486 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace SHH.ProcessLaunchers
|
||||
{
|
||||
// =========================================================
|
||||
// 内部核心类:单个受管进程 (封装了所有复杂逻辑)
|
||||
// =========================================================
|
||||
/// <summary>
|
||||
/// 受管进程实例 (Internal Worker)
|
||||
/// <para>职责:管理【单个】进程的生命周期。</para>
|
||||
/// <para>功能:包含 启动/停止/自愈/熔断/监控 的核心状态机逻辑。</para>
|
||||
/// </summary>
|
||||
internal class ManagedProcess
|
||||
{
|
||||
#region --- 字段定义 (Fields) ---
|
||||
|
||||
private readonly ProcessConfig _config;
|
||||
private readonly ProcessManager _manager;
|
||||
private readonly ILauncherLogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// 实际的操作系统进程对象
|
||||
/// </summary>
|
||||
private Process _process;
|
||||
|
||||
/// <summary>
|
||||
/// 标记位:是否为有意的停止
|
||||
/// <para>True: 用户手动停止 (不自愈)</para>
|
||||
/// <para>False: 运行中 (若退出则触发自愈)</para>
|
||||
/// </summary>
|
||||
private bool _isIntentionalStop = true;
|
||||
|
||||
// --- 异步任务控制令牌 ---
|
||||
private CancellationTokenSource _delayCts; // 用于取消重启/熔断的倒计时
|
||||
private CancellationTokenSource _monitorCts; // 用于取消资源监控循环
|
||||
private CancellationTokenSource _schedulerCts; // 用于取消定时重启计划
|
||||
|
||||
// --- 运行时统计数据 ---
|
||||
private int _consecutiveFailures = 0; // 连续失败次数 (熔断计数器)
|
||||
private DateTime? _lastStartTime; // 最后启动时间 (用于计算稳定运行市场)
|
||||
private DateTime? _lastExitTime; // 最后退出时间
|
||||
private DateTime? _nextRetryTime; // 下次自动重试的时间点
|
||||
|
||||
/// <summary>
|
||||
/// 当前生命周期状态 (对外只读)
|
||||
/// </summary>
|
||||
public ProcessStatus Status { get; private set; } = ProcessStatus.Stopped;
|
||||
|
||||
/// <summary>
|
||||
/// 公开配置信息
|
||||
/// </summary>
|
||||
public ProcessConfig Config => _config;
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 构造函数 ---
|
||||
|
||||
public ManagedProcess(ProcessConfig config, ProcessManager manager, ILauncherLogger logger)
|
||||
{
|
||||
_config = config;
|
||||
_manager = manager;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 外部指令 (External Commands) ---
|
||||
|
||||
/// <summary>
|
||||
/// 执行启动逻辑 (入口)
|
||||
/// </summary>
|
||||
public void ExecuteStart(LogTrigger trigger, string reason)
|
||||
{
|
||||
// 如果已经在运行或启动中,则忽略
|
||||
if (Status == ProcessStatus.Running || Status == ProcessStatus.Starting) return;
|
||||
|
||||
// 1. 重置所有负面状态 (用户手动介入通常意味着修复了问题)
|
||||
_delayCts?.Cancel();
|
||||
_isIntentionalStop = false; // 标记为"非有意停止" -> 开启守护模式
|
||||
_consecutiveFailures = 0;
|
||||
_nextRetryTime = null;
|
||||
|
||||
// 2. 记录日志
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Start, trigger, reason);
|
||||
|
||||
// 3. 真正启动
|
||||
LaunchProcess();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 执行停止逻辑 (入口)
|
||||
/// </summary>
|
||||
public void ExecuteStop(LogTrigger trigger, string reason)
|
||||
{
|
||||
// 1. 标记为"有意停止" -> 阻止 HandleExitLogic 触发重启
|
||||
_isIntentionalStop = true;
|
||||
|
||||
// 2. 取消所有后台任务
|
||||
_delayCts?.Cancel();
|
||||
_monitorCts?.Cancel();
|
||||
_schedulerCts?.Cancel();
|
||||
_nextRetryTime = null;
|
||||
|
||||
// 3. 记录日志 (仅当不是已经停止时)
|
||||
if (Status != ProcessStatus.Stopped)
|
||||
{
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Stop, trigger, reason);
|
||||
}
|
||||
|
||||
// 4. 强制杀进程
|
||||
KillProcess();
|
||||
UpdateStatus(ProcessStatus.Stopped);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 重置资源监控锁
|
||||
/// </summary>
|
||||
public void ResetGuards()
|
||||
{
|
||||
if (_config.Guards != null)
|
||||
{
|
||||
foreach (var guard in _config.Guards) guard.Reset();
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 核心启动逻辑 (Core Launch Logic) ---
|
||||
|
||||
/// <summary>
|
||||
/// 启动进程的原子操作
|
||||
/// </summary>
|
||||
private void LaunchProcess()
|
||||
{
|
||||
try
|
||||
{
|
||||
UpdateStatus(ProcessStatus.Starting);
|
||||
|
||||
// 1. 路径检查
|
||||
string path = Path.GetFullPath(_config.ExePath);
|
||||
if (!File.Exists(path))
|
||||
{
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Error, LogTrigger.System, "可执行文件未找到", path);
|
||||
// 关键点:文件丢失属于严重错误,直接进入退出决策逻辑(可能会触发熔断)
|
||||
HandleExitLogic(exitCode: -1);
|
||||
return;
|
||||
}
|
||||
|
||||
// 2. 组装 ProcessStartInfo
|
||||
var psi = new ProcessStartInfo
|
||||
{
|
||||
FileName = path,
|
||||
Arguments = _config.Arguments,
|
||||
// 如果未配置工作目录,默认使用 EXE 所在目录
|
||||
WorkingDirectory = string.IsNullOrEmpty(_config.WorkingDirectory) ? Path.GetDirectoryName(path) : _config.WorkingDirectory,
|
||||
|
||||
// 窗口可见性控制
|
||||
CreateNoWindow = !_config.Visible,
|
||||
|
||||
// 必须为 false 才能重定向 IO流
|
||||
UseShellExecute = false,
|
||||
|
||||
// IO 重定向开关
|
||||
RedirectStandardOutput = _config.EnableLogRedirect,
|
||||
RedirectStandardError = _config.EnableLogRedirect
|
||||
};
|
||||
|
||||
_process = new Process { StartInfo = psi, EnableRaisingEvents = true };
|
||||
|
||||
// 3. 绑定 IO 重定向事件 (异步读取流)
|
||||
if (_config.EnableLogRedirect)
|
||||
{
|
||||
_process.OutputDataReceived += (s, e) =>
|
||||
{
|
||||
if (!string.IsNullOrEmpty(e.Data))
|
||||
{
|
||||
// A. 记录到日志系统
|
||||
_logger.LogConsole(_config.Id, e.Data, false);
|
||||
// B. 触发对外事件 (供 UI 实时刷新)
|
||||
_manager.DispatchOutput(_config.Id, e.Data, false);
|
||||
}
|
||||
};
|
||||
_process.ErrorDataReceived += (s, e) =>
|
||||
{
|
||||
if (!string.IsNullOrEmpty(e.Data))
|
||||
{
|
||||
_logger.LogConsole(_config.Id, e.Data, true);
|
||||
_manager.DispatchOutput(_config.Id, e.Data, true);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// 4. 绑定退出事件 (核心生命周期钩子)
|
||||
_process.Exited += (s, e) =>
|
||||
{
|
||||
int code = -1;
|
||||
try { code = _process.ExitCode; } catch { }
|
||||
// 注意:Exited 是在后台线程触发的,转交 HandleExitLogic 处理
|
||||
HandleExitLogic(code);
|
||||
};
|
||||
|
||||
// 5. 执行操作系统启动调用
|
||||
if (!_process.Start())
|
||||
{
|
||||
throw new Exception("Process.Start() 返回 false,启动失败");
|
||||
}
|
||||
|
||||
// 6. 开始异步读取流 (必须在 Start 之后调用)
|
||||
if (_config.EnableLogRedirect)
|
||||
{
|
||||
_process.BeginOutputReadLine();
|
||||
_process.BeginErrorReadLine();
|
||||
}
|
||||
|
||||
// 7. 更新状态
|
||||
_lastStartTime = DateTime.Now;
|
||||
UpdateStatus(ProcessStatus.Running);
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Start, LogTrigger.System, "进程启动成功", new { PID = _process.Id });
|
||||
|
||||
// 8. 启动后挂载:资源监控循环
|
||||
StartMonitoring();
|
||||
|
||||
// 9. 启动后挂载:计划任务 (如果有配置)
|
||||
if (_config.AutoRestartIntervalMinutes > 0)
|
||||
{
|
||||
ScheduleScheduledRestart(_config.AutoRestartIntervalMinutes * 60 * 1000);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Error, LogTrigger.System, $"启动过程异常: {ex.Message}");
|
||||
HandleExitLogic(-1);
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 守护与监控逻辑 (Guard & Monitor) ---
|
||||
|
||||
/// <summary>
|
||||
/// 启动资源监控后台任务
|
||||
/// </summary>
|
||||
private void StartMonitoring()
|
||||
{
|
||||
// 1. 取消旧任务
|
||||
_monitorCts?.Cancel();
|
||||
_monitorCts = new CancellationTokenSource();
|
||||
|
||||
// 如果没有配置哨兵,直接返回
|
||||
if (_config.Guards == null || _config.Guards.Count == 0) return;
|
||||
|
||||
var token = _monitorCts.Token;
|
||||
|
||||
// 2. 启动长运行 Task
|
||||
Task.Run(async () =>
|
||||
{
|
||||
while (!token.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
// 默认轮询间隔 3 秒
|
||||
await Task.Delay(3000, token);
|
||||
|
||||
// 每次检查前确认进程还活着
|
||||
if (_process == null || _process.HasExited) break;
|
||||
|
||||
// 遍历所有哨兵
|
||||
foreach (var guard in _config.Guards)
|
||||
{
|
||||
var result = guard.Check(_process, out string reason);
|
||||
|
||||
if (result == GuardResult.Warning)
|
||||
{
|
||||
// 警告级别:仅记录日志 (供客户端发邮件),不干涉进程
|
||||
_logger.LogLifecycle(_config.Id, LogAction.ResourceCheck, LogTrigger.ResourceGuard, $"[警告] {reason}");
|
||||
}
|
||||
else if (result == GuardResult.Critical)
|
||||
{
|
||||
// 严重级别:记录日志并执行重启
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.ResourceGuard, $"[严重] {reason} -> 执行管控重启");
|
||||
|
||||
// 杀掉进程 -> 触发 Exited -> 触发 HandleExitLogic -> 自动重启
|
||||
KillProcess();
|
||||
return; // 退出监控循环
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (TaskCanceledException) { break; } // 正常取消
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogConsole(_config.Id, $"监控线程异常: {ex.Message}", true);
|
||||
}
|
||||
}
|
||||
}, token);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 安排定时重启任务
|
||||
/// </summary>
|
||||
private void ScheduleScheduledRestart(int delayMs)
|
||||
{
|
||||
_schedulerCts?.Cancel();
|
||||
_schedulerCts = new CancellationTokenSource();
|
||||
|
||||
Task.Delay(delayMs, _schedulerCts.Token).ContinueWith(t =>
|
||||
{
|
||||
// 只有当没被取消,且进程还在运行时,才执行重启
|
||||
if (!t.IsCanceled && Status == ProcessStatus.Running)
|
||||
{
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.Scheduler, "执行计划性重启 (AutoRestart)");
|
||||
KillProcess(); // 触发自动重启
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 决策大脑 (Decision Logic) ---
|
||||
|
||||
/// <summary>
|
||||
/// 进程退出后的核心决策逻辑 (自愈 + 熔断)
|
||||
/// </summary>
|
||||
/// <param name="exitCode">进程退出码</param>
|
||||
private void HandleExitLogic(int exitCode)
|
||||
{
|
||||
// 1. 清理伴生任务
|
||||
_monitorCts?.Cancel();
|
||||
_schedulerCts?.Cancel();
|
||||
|
||||
// 2. 意图判断:如果是用户手动停的,或者是计划重启中的 Kill,
|
||||
// 这里需要判断 _isIntentionalStop。
|
||||
// 注意:如果是用户 Stop,_isIntentionalStop 为 true,直接返回,不重启。
|
||||
// 如果是 ResourceGuard 或 Scheduler 调用的 KillProcess,_isIntentionalStop 仍为 false,会走下面的重启逻辑。
|
||||
if (_isIntentionalStop) return;
|
||||
|
||||
_lastExitTime = DateTime.Now;
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Crash, LogTrigger.System, "侦测到进程退出", new { ExitCode = exitCode });
|
||||
|
||||
// 3. 稳定性判定 (Stabilization Check)
|
||||
// 逻辑:如果进程活过了阈值(如60秒),说明这次退出可能是偶发意外,不是启动即崩。
|
||||
// 此时应重置失败计数,给予它"重新做人"的机会。
|
||||
double runDurationMs = _lastStartTime.HasValue ? (DateTime.Now - _lastStartTime.Value).TotalMilliseconds : 0;
|
||||
|
||||
if (runDurationMs > _config.StabilityThresholdMs)
|
||||
{
|
||||
if (_consecutiveFailures > 0)
|
||||
_logger.LogConsole(_config.Id, $"运行稳定({runDurationMs / 1000:F0}s),重置失败计数", false);
|
||||
_consecutiveFailures = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
_consecutiveFailures++;
|
||||
}
|
||||
|
||||
// 4. 熔断判定 (Circuit Breaker)
|
||||
// 如果连续失败次数超过阈值,不再立即重启,而是进入长冷却。
|
||||
if (_consecutiveFailures >= _config.MaxConsecutiveFailures)
|
||||
{
|
||||
EnterCoolingDown();
|
||||
}
|
||||
else
|
||||
{
|
||||
EnterShortRetry();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 进入短时重试流程
|
||||
/// </summary>
|
||||
private void EnterShortRetry()
|
||||
{
|
||||
int delay = _config.RestartDelayMs;
|
||||
UpdateStatus(ProcessStatus.PendingRestart);
|
||||
_nextRetryTime = DateTime.Now.AddMilliseconds(delay);
|
||||
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.System,
|
||||
$"准备自动重启 ({_consecutiveFailures}/{_config.MaxConsecutiveFailures})", new { DelayMs = delay });
|
||||
|
||||
// 异步等待后执行
|
||||
WaitAndExec(delay, () => LaunchProcess());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 进入熔断冷却流程
|
||||
/// </summary>
|
||||
private void EnterCoolingDown()
|
||||
{
|
||||
int delay = _config.CircuitBreakerDelayMs;
|
||||
UpdateStatus(ProcessStatus.CoolingDown);
|
||||
_nextRetryTime = DateTime.Now.AddMilliseconds(delay);
|
||||
|
||||
_logger.LogLifecycle(_config.Id, LogAction.CircuitBreak, LogTrigger.System,
|
||||
"触发熔断保护", new { Minutes = delay / 1000 / 60 });
|
||||
|
||||
// 冷却结束后,尝试恢复
|
||||
WaitAndExec(delay, () =>
|
||||
{
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.System, "熔断冷却结束,尝试恢复");
|
||||
LaunchProcess();
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 通用延时执行辅助方法
|
||||
/// </summary>
|
||||
private void WaitAndExec(int delayMs, Action action)
|
||||
{
|
||||
_delayCts = new CancellationTokenSource();
|
||||
Task.Delay(delayMs, _delayCts.Token).ContinueWith(t =>
|
||||
{
|
||||
// 只有未被取消才执行
|
||||
if (!t.IsCanceled) action();
|
||||
}, TaskScheduler.Default);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 工具方法 (Helpers) ---
|
||||
|
||||
/// <summary>
|
||||
/// 强制杀死进程 (Kill -9)
|
||||
/// </summary>
|
||||
private void KillProcess()
|
||||
{
|
||||
if (_process != null && !_process.HasExited)
|
||||
{
|
||||
try
|
||||
{
|
||||
// .NET Core 3.0+ 支持 Kill 整个进程树 (包含子进程)
|
||||
_process.Kill();
|
||||
_process.WaitForExit(500); // 稍微等待资源释放
|
||||
}
|
||||
catch { /* 忽略权限不足或竞态条件下的异常 */ }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 更新状态并通知 Manager 分发事件
|
||||
/// </summary>
|
||||
private void UpdateStatus(ProcessStatus status)
|
||||
{
|
||||
if (Status != status)
|
||||
{
|
||||
Status = status;
|
||||
// 回调 Manager 触发外部事件
|
||||
_manager.DispatchStateChange(_config.Id, status);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 生成当前状态快照 DTO
|
||||
/// </summary>
|
||||
public ProcessInfoSnapshot GetSnapshot()
|
||||
{
|
||||
int? pid = null;
|
||||
try { if (Status == ProcessStatus.Running) pid = _process?.Id; } catch { }
|
||||
|
||||
string msg = "";
|
||||
// 计算倒计时文本
|
||||
if (Status == ProcessStatus.CoolingDown && _nextRetryTime.HasValue)
|
||||
{
|
||||
var span = _nextRetryTime.Value - DateTime.Now;
|
||||
msg = $"熔断中 (剩余 {span.Minutes}:{span.Seconds:D2})";
|
||||
}
|
||||
|
||||
return new ProcessInfoSnapshot
|
||||
{
|
||||
Id = _config.Id,
|
||||
DisplayName = _config.DisplayName,
|
||||
Pid = pid,
|
||||
Status = Status,
|
||||
LastStartTime = _lastStartTime,
|
||||
LastExitTime = _lastExitTime,
|
||||
ConsecutiveFailures = _consecutiveFailures,
|
||||
NextRetryTime = _nextRetryTime,
|
||||
Message = msg
|
||||
};
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user