完成进程启动器接口的设计

This commit is contained in:
2026-01-03 08:44:38 +08:00
parent dcf424a86e
commit 78061db9ef
11 changed files with 1369 additions and 2 deletions

View File

@@ -1,7 +1,7 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.14.36623.8 d17.14
# Visual Studio Version 18
VisualStudioVersion = 18.1.11312.151 d18.0
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SHH.CameraSdk", "SHH.CameraSdk\SHH.CameraSdk.csproj", "{21B70A94-43FC-4D17-AB83-9E4B5178397E}"
EndProject
@@ -13,6 +13,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SHH.NetMQ", "SHH.NetMQ\SHH.
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SHH.CameraDashboard", "SHH.CameraDashboard\SHH.CameraDashboard.csproj", "{03C249D7-BCF1-404D-AD09-7AB39BA263AD}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SHH.ProcessLaunchers", "SHH.ProcessLaunchers\SHH.ProcessLaunchers.csproj", "{E12F2D41-B7BB-4303-AD01-5DCD02D7FF3C}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -39,6 +41,10 @@ Global
{03C249D7-BCF1-404D-AD09-7AB39BA263AD}.Debug|Any CPU.Build.0 = Debug|Any CPU
{03C249D7-BCF1-404D-AD09-7AB39BA263AD}.Release|Any CPU.ActiveCfg = Release|Any CPU
{03C249D7-BCF1-404D-AD09-7AB39BA263AD}.Release|Any CPU.Build.0 = Release|Any CPU
{E12F2D41-B7BB-4303-AD01-5DCD02D7FF3C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{E12F2D41-B7BB-4303-AD01-5DCD02D7FF3C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{E12F2D41-B7BB-4303-AD01-5DCD02D7FF3C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{E12F2D41-B7BB-4303-AD01-5DCD02D7FF3C}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@@ -0,0 +1,72 @@
using SHH.ProcessLaunchers;
namespace SHH.ProcessLauncher
{
/// <summary>
/// 仪表盘日志适配器
/// <para>职责:实现 ILauncherLogger 接口,将启动器的底层日志转发给业务层的 LogHelper 和 UI 通知服务。</para>
/// </summary>
public class DashboardLogger : ILauncherLogger
{
#region --- ---
/// <summary>
/// 处理普通控制台日志 (StdOut/StdErr)
/// </summary>
public void LogConsole(string processId, string message, bool isError)
{
// 1. 在控制台/调试窗口刷屏显示
string prefix = isError ? "[ERR]" : "[INF]";
//// 这里可以直接输出,或者通过事件抛给 UI 层去绑定 TextBox
//// 为了演示,我们使用 LogHelper 的 Debug 方法
//LogHelper.Debug($"[Console] {prefix} <{processId}>: {message}");
}
/// <summary>
/// 处理关键生命周期事件
/// </summary>
public void LogLifecycle(string processId, LogAction action, LogTrigger trigger, string reason, object payload = null)
{
// 1. 生成高可读的结构化日志文本
string logText = $"[{action.ToString().ToUpper()}] 触发源:{trigger} | 原因:{reason}";
if (payload != null) logText += $" | 数据:{payload}";
// 2. 根据触发源 (LogTrigger) 决定业务系统的响应策略
switch (trigger)
{
//case LogTrigger.User:
// // 场景:用户点击了按钮
// // 策略:这是预期内操作,仅记录 Info 日志,不弹窗打扰用户
// LogHelper.Info(processId, logText);
// break;
//case LogTrigger.System:
// // 场景:崩溃自动重启、熔断恢复等
// // 策略:记录 Warn 日志,运维人员需要知道系统发生了自愈行为
// LogHelper.Warn(processId, logText);
// break;
//case LogTrigger.ResourceGuard:
// // 场景内存超限被杀、CPU 报警
// // 策略:这是严重问题,必须记录 Error 日志,并发送 UI 强提醒 (Toast/弹窗)
// LogHelper.Error(processId, logText);
// // 调用通知服务 (模拟)
// NotificationService.ShowWarning($"进程异常: {processId}", $"触发资源管控: {reason}");
// break;
//case LogTrigger.Scheduler:
// // 场景:定时重启
// LogHelper.Info(processId, $"[计划任务] {logText}");
// break;
//default:
// LogHelper.Info(processId, logText);
// break;
}
}
#endregion
}
}

View File

@@ -0,0 +1,53 @@
namespace SHH.ProcessLaunchers
{
/// <summary>
/// 启动器专用日志接口
/// <para>核心职责:解耦日志的生产与消费,支持结构化语义记录。</para>
/// <para>实现类可将日志转发至 UI 控制台、本地文件或远程日志中心。</para>
/// </summary>
public interface ILauncherLogger
{
/// <summary>
/// 记录普通控制台日志 (流式日志)
/// <para>用于接管子进程的 StdOut 和 StdErr</para>
/// </summary>
/// <param name="processId">进程唯一标识 (ProcessConfig.Id)</param>
/// <param name="message">日志内容</param>
/// <param name="isError">是否为错误流 (True=StdErr, False=StdOut)</param>
void LogConsole(string processId, string message, bool isError);
/// <summary>
/// 记录关键生命周期事件 (结构化日志)
/// <para>用于记录启停、崩溃、熔断等关键节点,供运维分析。</para>
/// </summary>
/// <param name="processId">进程唯一标识 (ProcessConfig.Id)</param>
/// <param name="action">动作类型 (Start/Stop/Crash...)</param>
/// <param name="trigger">触发源 (User/System...)</param>
/// <param name="reason">操作原因或备注 (必填,用于追溯)</param>
/// <param name="payload">附加上下文对象 (可选,如 { PID=123, ExitCode=-1 })</param>
void LogLifecycle(string processId, LogAction action, LogTrigger trigger, string reason, object payload = null);
}
/// <summary>
/// 默认空日志实现 (Null Object Pattern)
/// <para>用于在未注入 Logger 时防止 NullReferenceException保证程序健壮性。</para>
/// </summary>
public class NullLogger : ILauncherLogger
{
/// <summary>
/// 空实现:忽略控制台日志
/// </summary>
public void LogConsole(string processId, string message, bool isError)
{
// Do nothing
}
/// <summary>
/// 空实现:忽略生命周期日志
/// </summary>
public void LogLifecycle(string processId, LogAction action, LogTrigger trigger, string reason, object payload = null)
{
// Do nothing
}
}
}

View File

@@ -0,0 +1,176 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Threading.Tasks;
namespace SHH.ProcessLaunchers
{
/// <summary>
/// 进程管理器核心接口
/// </summary>
public interface IProcessManager
{
/// <summary>
/// 注册一个要管理的进程配置
/// </summary>
/// <param name="config">进程配置对象</param>
void Register(ProcessConfig config);
/// <summary>
/// 启动指定名称的进程
/// </summary>
/// <param name="name">配置中定义的 Name</param>
void Start(string name);
/// <summary>
/// 启动所有已注册的进程
/// </summary>
Task StartAllAsync();
/// <summary>
/// 停止指定进程 (优雅或强制)
/// </summary>
/// <param name="name">进程名称</param>
void Stop(string name);
/// <summary>
/// 停止所有进程
/// </summary>
void StopAll();
/// <summary>
/// 获取指定进程的资源监控复位接口
/// </summary>
/// <param name="processName">进程名称</param>
void ResetGuard(string processName);
/// <summary>
/// 获取所有进程的实时状态快照
/// </summary>
List<ProcessInfoSnapshot> GetSnapshot();
// --- 事件定义 ---
/// <summary>
/// 当接收到子进程的标准输出或错误流时触发
/// </summary>
event EventHandler<ProcessOutputEventArgs> OnOutputReceived;
/// <summary>
/// 当进程生命周期状态发生变化时触发
/// </summary>
event EventHandler<ProcessStateEventArgs> OnStateChanged;
}
public class MemoryGuard : IResourceGuard
{
public string Name => "MemoryGuard";
private readonly long _warningBytes;
private readonly long _criticalBytes;
private readonly TimeSpan _alertDuration; // 持续时间阈值 (如 3分钟)
// --- 内部状态 ---
private DateTime? _firstOverLimitTime; // 第一次检测到超限的时间
private bool _isAlertLatched = false; // 是否已经报过警 (自锁)
/// <summary>
/// 智能内存哨兵
/// </summary>
/// <param name="warningMb">警告阈值</param>
/// <param name="criticalMb">熔断阈值</param>
/// <param name="durationMinutes">必须持续超限多少分钟才报警</param>
public MemoryGuard(int warningMb, int criticalMb, int durationMinutes = 3)
{
_warningBytes = (long)warningMb * 1024 * 1024;
_criticalBytes = (long)criticalMb * 1024 * 1024;
_alertDuration = TimeSpan.FromMinutes(durationMinutes);
}
public GuardResult Check(Process process, out string reason)
{
reason = null;
try
{
process.Refresh();
long currentUsage = process.WorkingSet64;
// 1. 优先检查 Critical (熔断线)
// 逻辑:熔断涉及生死,不需要防抖,也不受“已报警”锁定的限制。
// 哪怕用户标记了已处置,只要内存爆了,必须重启。
if (currentUsage > _criticalBytes)
{
reason = $"[严重] 内存 {FormatSize(currentUsage)} > 熔断线 {FormatSize(_criticalBytes)} (立即执行管控)";
// 重启后,物理进程会变,下一次 Check 会是新进程,状态建议在重启时由外部重置,
// 或者这里不重置,依靠 ProcessManager 重启后重新创建 Guard 实例。
return GuardResult.Critical;
}
// 2. 检查 Warning (警告线) - 包含防抖和自锁逻辑
if (currentUsage > _warningBytes)
{
// A. 如果已经报过警 (已锁定),则不再报,保持沉默
if (_isAlertLatched)
{
return GuardResult.Normal;
}
// B. 如果是刚发现超限,记录时间
if (_firstOverLimitTime == null)
{
_firstOverLimitTime = DateTime.Now;
// 这里可以选做:记录一条 Info 日志,告诉用户"正在观察中"
// reason = $"内存超限 {FormatSize(currentUsage)},开始计时观察...";
return GuardResult.Normal; // 暂时不报 Warning
}
// C. 检查持续时间
var duration = DateTime.Now - _firstOverLimitTime.Value;
if (duration >= _alertDuration)
{
// 满足持续时间 -> 触发报警并锁定
_isAlertLatched = true;
_firstOverLimitTime = null; // 计时归零
reason = $"[报警] 内存 {FormatSize(currentUsage)} > 阈值 {FormatSize(_warningBytes)} 且持续超过 {_alertDuration.TotalMinutes}分钟";
return GuardResult.Warning; // 抛出信号,发邮件!
}
else
{
// 还没到时间
return GuardResult.Normal;
}
}
else
{
// 3. 内存正常
// 逻辑如果之前在计时比如超了1分钟现在降下来了则计时器清零。
// 但如果已经报过警 (_isAlertLatched=true),则保持锁定,不自动复位。
// 除非用户手动点 Reset。
if (_firstOverLimitTime != null)
{
_firstOverLimitTime = null; // 波动防抖:由于降下来了,重置观察计时
}
return GuardResult.Normal;
}
}
catch
{
return GuardResult.Normal;
}
}
/// <summary>
/// 用户点击“已处置”时调用
/// </summary>
public void Reset()
{
_isAlertLatched = false;
_firstOverLimitTime = null;
}
private string FormatSize(long bytes) => $"{bytes / 1024 / 1024}MB";
}
}

View File

@@ -0,0 +1,29 @@
using System.Diagnostics;
namespace SHH.ProcessLaunchers
{
/// <summary>
/// 资源哨兵接口 (策略模式)
/// </summary>
public interface IResourceGuard
{
/// <summary>
/// 哨兵名称 (如 MemoryGuard)
/// </summary>
string Name { get; }
/// <summary>
/// 执行健康检查
/// </summary>
/// <param name="process">正在运行的进程对象</param>
/// <param name="reason">[输出] 如果异常,返回详细原因描述</param>
/// <returns>检查结果 (Normal/Warning/Critical)</returns>
GuardResult Check(Process process, out string reason);
/// <summary>
/// 人工复位/标记已处置
/// <para>用户在 UI 点击“已处置”后调用,用于清除内部的报警锁定状态 (Latch)</para>
/// </summary>
void Reset();
}
}

View File

@@ -0,0 +1,486 @@
using System;
using System.Diagnostics;
using System.IO;
using System.Threading;
using System.Threading.Tasks;
namespace SHH.ProcessLaunchers
{
// =========================================================
// 内部核心类:单个受管进程 (封装了所有复杂逻辑)
// =========================================================
/// <summary>
/// 受管进程实例 (Internal Worker)
/// <para>职责:管理【单个】进程的生命周期。</para>
/// <para>功能:包含 启动/停止/自愈/熔断/监控 的核心状态机逻辑。</para>
/// </summary>
internal class ManagedProcess
{
#region --- (Fields) ---
private readonly ProcessConfig _config;
private readonly ProcessManager _manager;
private readonly ILauncherLogger _logger;
/// <summary>
/// 实际的操作系统进程对象
/// </summary>
private Process _process;
/// <summary>
/// 标记位:是否为有意的停止
/// <para>True: 用户手动停止 (不自愈)</para>
/// <para>False: 运行中 (若退出则触发自愈)</para>
/// </summary>
private bool _isIntentionalStop = true;
// --- 异步任务控制令牌 ---
private CancellationTokenSource _delayCts; // 用于取消重启/熔断的倒计时
private CancellationTokenSource _monitorCts; // 用于取消资源监控循环
private CancellationTokenSource _schedulerCts; // 用于取消定时重启计划
// --- 运行时统计数据 ---
private int _consecutiveFailures = 0; // 连续失败次数 (熔断计数器)
private DateTime? _lastStartTime; // 最后启动时间 (用于计算稳定运行市场)
private DateTime? _lastExitTime; // 最后退出时间
private DateTime? _nextRetryTime; // 下次自动重试的时间点
/// <summary>
/// 当前生命周期状态 (对外只读)
/// </summary>
public ProcessStatus Status { get; private set; } = ProcessStatus.Stopped;
/// <summary>
/// 公开配置信息
/// </summary>
public ProcessConfig Config => _config;
#endregion
#region --- ---
public ManagedProcess(ProcessConfig config, ProcessManager manager, ILauncherLogger logger)
{
_config = config;
_manager = manager;
_logger = logger;
}
#endregion
#region --- (External Commands) ---
/// <summary>
/// 执行启动逻辑 (入口)
/// </summary>
public void ExecuteStart(LogTrigger trigger, string reason)
{
// 如果已经在运行或启动中,则忽略
if (Status == ProcessStatus.Running || Status == ProcessStatus.Starting) return;
// 1. 重置所有负面状态 (用户手动介入通常意味着修复了问题)
_delayCts?.Cancel();
_isIntentionalStop = false; // 标记为"非有意停止" -> 开启守护模式
_consecutiveFailures = 0;
_nextRetryTime = null;
// 2. 记录日志
_logger.LogLifecycle(_config.Id, LogAction.Start, trigger, reason);
// 3. 真正启动
LaunchProcess();
}
/// <summary>
/// 执行停止逻辑 (入口)
/// </summary>
public void ExecuteStop(LogTrigger trigger, string reason)
{
// 1. 标记为"有意停止" -> 阻止 HandleExitLogic 触发重启
_isIntentionalStop = true;
// 2. 取消所有后台任务
_delayCts?.Cancel();
_monitorCts?.Cancel();
_schedulerCts?.Cancel();
_nextRetryTime = null;
// 3. 记录日志 (仅当不是已经停止时)
if (Status != ProcessStatus.Stopped)
{
_logger.LogLifecycle(_config.Id, LogAction.Stop, trigger, reason);
}
// 4. 强制杀进程
KillProcess();
UpdateStatus(ProcessStatus.Stopped);
}
/// <summary>
/// 重置资源监控锁
/// </summary>
public void ResetGuards()
{
if (_config.Guards != null)
{
foreach (var guard in _config.Guards) guard.Reset();
}
}
#endregion
#region --- (Core Launch Logic) ---
/// <summary>
/// 启动进程的原子操作
/// </summary>
private void LaunchProcess()
{
try
{
UpdateStatus(ProcessStatus.Starting);
// 1. 路径检查
string path = Path.GetFullPath(_config.ExePath);
if (!File.Exists(path))
{
_logger.LogLifecycle(_config.Id, LogAction.Error, LogTrigger.System, "可执行文件未找到", path);
// 关键点:文件丢失属于严重错误,直接进入退出决策逻辑(可能会触发熔断)
HandleExitLogic(exitCode: -1);
return;
}
// 2. 组装 ProcessStartInfo
var psi = new ProcessStartInfo
{
FileName = path,
Arguments = _config.Arguments,
// 如果未配置工作目录,默认使用 EXE 所在目录
WorkingDirectory = string.IsNullOrEmpty(_config.WorkingDirectory) ? Path.GetDirectoryName(path) : _config.WorkingDirectory,
// 窗口可见性控制
CreateNoWindow = !_config.Visible,
// 必须为 false 才能重定向 IO流
UseShellExecute = false,
// IO 重定向开关
RedirectStandardOutput = _config.EnableLogRedirect,
RedirectStandardError = _config.EnableLogRedirect
};
_process = new Process { StartInfo = psi, EnableRaisingEvents = true };
// 3. 绑定 IO 重定向事件 (异步读取流)
if (_config.EnableLogRedirect)
{
_process.OutputDataReceived += (s, e) =>
{
if (!string.IsNullOrEmpty(e.Data))
{
// A. 记录到日志系统
_logger.LogConsole(_config.Id, e.Data, false);
// B. 触发对外事件 (供 UI 实时刷新)
_manager.DispatchOutput(_config.Id, e.Data, false);
}
};
_process.ErrorDataReceived += (s, e) =>
{
if (!string.IsNullOrEmpty(e.Data))
{
_logger.LogConsole(_config.Id, e.Data, true);
_manager.DispatchOutput(_config.Id, e.Data, true);
}
};
}
// 4. 绑定退出事件 (核心生命周期钩子)
_process.Exited += (s, e) =>
{
int code = -1;
try { code = _process.ExitCode; } catch { }
// 注意Exited 是在后台线程触发的,转交 HandleExitLogic 处理
HandleExitLogic(code);
};
// 5. 执行操作系统启动调用
if (!_process.Start())
{
throw new Exception("Process.Start() 返回 false启动失败");
}
// 6. 开始异步读取流 (必须在 Start 之后调用)
if (_config.EnableLogRedirect)
{
_process.BeginOutputReadLine();
_process.BeginErrorReadLine();
}
// 7. 更新状态
_lastStartTime = DateTime.Now;
UpdateStatus(ProcessStatus.Running);
_logger.LogLifecycle(_config.Id, LogAction.Start, LogTrigger.System, "进程启动成功", new { PID = _process.Id });
// 8. 启动后挂载:资源监控循环
StartMonitoring();
// 9. 启动后挂载:计划任务 (如果有配置)
if (_config.AutoRestartIntervalMinutes > 0)
{
ScheduleScheduledRestart(_config.AutoRestartIntervalMinutes * 60 * 1000);
}
}
catch (Exception ex)
{
_logger.LogLifecycle(_config.Id, LogAction.Error, LogTrigger.System, $"启动过程异常: {ex.Message}");
HandleExitLogic(-1);
}
}
#endregion
#region --- (Guard & Monitor) ---
/// <summary>
/// 启动资源监控后台任务
/// </summary>
private void StartMonitoring()
{
// 1. 取消旧任务
_monitorCts?.Cancel();
_monitorCts = new CancellationTokenSource();
// 如果没有配置哨兵,直接返回
if (_config.Guards == null || _config.Guards.Count == 0) return;
var token = _monitorCts.Token;
// 2. 启动长运行 Task
Task.Run(async () =>
{
while (!token.IsCancellationRequested)
{
try
{
// 默认轮询间隔 3 秒
await Task.Delay(3000, token);
// 每次检查前确认进程还活着
if (_process == null || _process.HasExited) break;
// 遍历所有哨兵
foreach (var guard in _config.Guards)
{
var result = guard.Check(_process, out string reason);
if (result == GuardResult.Warning)
{
// 警告级别:仅记录日志 (供客户端发邮件),不干涉进程
_logger.LogLifecycle(_config.Id, LogAction.ResourceCheck, LogTrigger.ResourceGuard, $"[警告] {reason}");
}
else if (result == GuardResult.Critical)
{
// 严重级别:记录日志并执行重启
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.ResourceGuard, $"[严重] {reason} -> 执行管控重启");
// 杀掉进程 -> 触发 Exited -> 触发 HandleExitLogic -> 自动重启
KillProcess();
return; // 退出监控循环
}
}
}
catch (TaskCanceledException) { break; } // 正常取消
catch (Exception ex)
{
_logger.LogConsole(_config.Id, $"监控线程异常: {ex.Message}", true);
}
}
}, token);
}
/// <summary>
/// 安排定时重启任务
/// </summary>
private void ScheduleScheduledRestart(int delayMs)
{
_schedulerCts?.Cancel();
_schedulerCts = new CancellationTokenSource();
Task.Delay(delayMs, _schedulerCts.Token).ContinueWith(t =>
{
// 只有当没被取消,且进程还在运行时,才执行重启
if (!t.IsCanceled && Status == ProcessStatus.Running)
{
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.Scheduler, "执行计划性重启 (AutoRestart)");
KillProcess(); // 触发自动重启
}
});
}
#endregion
#region --- (Decision Logic) ---
/// <summary>
/// 进程退出后的核心决策逻辑 (自愈 + 熔断)
/// </summary>
/// <param name="exitCode">进程退出码</param>
private void HandleExitLogic(int exitCode)
{
// 1. 清理伴生任务
_monitorCts?.Cancel();
_schedulerCts?.Cancel();
// 2. 意图判断:如果是用户手动停的,或者是计划重启中的 Kill
// 这里需要判断 _isIntentionalStop。
// 注意:如果是用户 Stop_isIntentionalStop 为 true直接返回不重启。
// 如果是 ResourceGuard 或 Scheduler 调用的 KillProcess_isIntentionalStop 仍为 false会走下面的重启逻辑。
if (_isIntentionalStop) return;
_lastExitTime = DateTime.Now;
_logger.LogLifecycle(_config.Id, LogAction.Crash, LogTrigger.System, "侦测到进程退出", new { ExitCode = exitCode });
// 3. 稳定性判定 (Stabilization Check)
// 逻辑:如果进程活过了阈值(如60秒),说明这次退出可能是偶发意外,不是启动即崩。
// 此时应重置失败计数,给予它"重新做人"的机会。
double runDurationMs = _lastStartTime.HasValue ? (DateTime.Now - _lastStartTime.Value).TotalMilliseconds : 0;
if (runDurationMs > _config.StabilityThresholdMs)
{
if (_consecutiveFailures > 0)
_logger.LogConsole(_config.Id, $"运行稳定({runDurationMs / 1000:F0}s),重置失败计数", false);
_consecutiveFailures = 0;
}
else
{
_consecutiveFailures++;
}
// 4. 熔断判定 (Circuit Breaker)
// 如果连续失败次数超过阈值,不再立即重启,而是进入长冷却。
if (_consecutiveFailures >= _config.MaxConsecutiveFailures)
{
EnterCoolingDown();
}
else
{
EnterShortRetry();
}
}
/// <summary>
/// 进入短时重试流程
/// </summary>
private void EnterShortRetry()
{
int delay = _config.RestartDelayMs;
UpdateStatus(ProcessStatus.PendingRestart);
_nextRetryTime = DateTime.Now.AddMilliseconds(delay);
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.System,
$"准备自动重启 ({_consecutiveFailures}/{_config.MaxConsecutiveFailures})", new { DelayMs = delay });
// 异步等待后执行
WaitAndExec(delay, () => LaunchProcess());
}
/// <summary>
/// 进入熔断冷却流程
/// </summary>
private void EnterCoolingDown()
{
int delay = _config.CircuitBreakerDelayMs;
UpdateStatus(ProcessStatus.CoolingDown);
_nextRetryTime = DateTime.Now.AddMilliseconds(delay);
_logger.LogLifecycle(_config.Id, LogAction.CircuitBreak, LogTrigger.System,
"触发熔断保护", new { Minutes = delay / 1000 / 60 });
// 冷却结束后,尝试恢复
WaitAndExec(delay, () =>
{
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.System, "熔断冷却结束,尝试恢复");
LaunchProcess();
});
}
/// <summary>
/// 通用延时执行辅助方法
/// </summary>
private void WaitAndExec(int delayMs, Action action)
{
_delayCts = new CancellationTokenSource();
Task.Delay(delayMs, _delayCts.Token).ContinueWith(t =>
{
// 只有未被取消才执行
if (!t.IsCanceled) action();
}, TaskScheduler.Default);
}
#endregion
#region --- (Helpers) ---
/// <summary>
/// 强制杀死进程 (Kill -9)
/// </summary>
private void KillProcess()
{
if (_process != null && !_process.HasExited)
{
try
{
// .NET Core 3.0+ 支持 Kill 整个进程树 (包含子进程)
_process.Kill();
_process.WaitForExit(500); // 稍微等待资源释放
}
catch { /* 忽略权限不足或竞态条件下的异常 */ }
}
}
/// <summary>
/// 更新状态并通知 Manager 分发事件
/// </summary>
private void UpdateStatus(ProcessStatus status)
{
if (Status != status)
{
Status = status;
// 回调 Manager 触发外部事件
_manager.DispatchStateChange(_config.Id, status);
}
}
/// <summary>
/// 生成当前状态快照 DTO
/// </summary>
public ProcessInfoSnapshot GetSnapshot()
{
int? pid = null;
try { if (Status == ProcessStatus.Running) pid = _process?.Id; } catch { }
string msg = "";
// 计算倒计时文本
if (Status == ProcessStatus.CoolingDown && _nextRetryTime.HasValue)
{
var span = _nextRetryTime.Value - DateTime.Now;
msg = $"熔断中 (剩余 {span.Minutes}:{span.Seconds:D2})";
}
return new ProcessInfoSnapshot
{
Id = _config.Id,
DisplayName = _config.DisplayName,
Pid = pid,
Status = Status,
LastStartTime = _lastStartTime,
LastExitTime = _lastExitTime,
ConsecutiveFailures = _consecutiveFailures,
NextRetryTime = _nextRetryTime,
Message = msg
};
}
#endregion
}
}

View File

@@ -0,0 +1,107 @@
using System.Collections.Generic;
namespace SHH.ProcessLaunchers
{
/// <summary>
/// 进程启动配置项
/// </summary>
public class ProcessConfig
{
#region --- (Identity) ---
// <summary>
/// [核心变更] 唯一标识符 (Key)
/// <para>用于管理器内部索引,不可重复。例如: "Streamer_01", "Streamer_02"</para>
/// </summary>
public string Id { get; set; } = string.Empty;
/// <summary>
/// [核心变更] 通用显示名称 (Category/Type)
/// <para>描述这是一类什么程序。例如: "VideoStreamingService"</para>
/// <para>多个实例可以拥有相同的 DisplayName。</para>
/// </summary>
public string DisplayName { get; set; } = string.Empty;
/// <summary>
/// 描述备注 (可选)
/// <para>例如: "负责处理 192.168.1.10 的视频流"</para>
/// </summary>
public string Description { get; set; } = string.Empty;
#endregion
#region --- (Launch Args) ---
/// <summary>可执行文件路径 (绝对路径或相对路径)</summary>
public string ExePath { get; set; } = string.Empty;
/// <summary>启动参数字符串 (例如 "--id=1 --debug")</summary>
public string Arguments { get; set; } = string.Empty;
/// <summary>工作目录 (默认为 Exe 所在目录)</summary>
public string WorkingDirectory { get; set; } = string.Empty;
#endregion
#region --- ---
/// <summary>
/// 是否显示程序窗口
/// <para>True: 弹出控制台窗口或UI | False: 后台静默运行 (CreateNoWindow=true)</para>
/// </summary>
public bool Visible { get; set; } = false;
/// <summary>
/// 是否接管标准输出/错误流 (RedirectStandardOutput)
/// <para>True: 启动器将捕获 Console.WriteLine 内容并通过日志接口转发。</para>
/// <para>注意: 如果 Visible=true建议设为 false否则控制台窗口将是黑屏。</para>
/// </summary>
public bool EnableLogRedirect { get; set; } = true;
#endregion
#region --- ---
/// <summary>意外退出后的常规重启延迟 (毫秒),默认 3000ms</summary>
public int RestartDelayMs { get; set; } = 3000;
/// <summary>连续失败阈值 (达到此次数后触发熔断),默认 3 次</summary>
public int MaxConsecutiveFailures { get; set; } = 3;
/// <summary>熔断冷却时长 (毫秒),默认 30分钟 (1800000ms)</summary>
public int CircuitBreakerDelayMs { get; set; } = 30 * 60 * 1000;
/// <summary>
/// 稳定运行判定阈值 (毫秒)
/// <para>如果进程存活时间超过此值,则视为启动成功,重置失败计数器。</para>
/// </summary>
public int StabilityThresholdMs { get; set; } = 60 * 1000;
/// <summary>
/// 自动重启间隔 (分钟)。0 表示不启用定时重启。
/// <para>用于防止内存碎片或长期运行的不稳定性。</para>
/// </summary>
public int AutoRestartIntervalMinutes { get; set; } = 0;
#endregion
#region --- ---
/// <summary>
/// [新增] 启动顺序权重
/// <para>数字越小越先启动 (0, 1, 2...)</para>
/// </summary>
public int StartupOrder { get; set; } = 0;
/// <summary>
/// [新增] 启动后等待时长 (毫秒)
/// <para>当前进程启动后,等待多久再启动下一个进程。用于防止瞬间 CPU 峰值或依赖等待。</para>
/// </summary>
public int PostStartupDelayMs { get; set; } = 3000;
#endregion
/// <summary>资源哨兵列表 (内存监控、心跳监控等)</summary>
public List<IResourceGuard> Guards { get; set; } = new List<IResourceGuard>();
}
}

View File

@@ -0,0 +1,135 @@
using System;
namespace SHH.ProcessLaunchers
{
/// <summary>
/// 进程输出事件参数 (StdOut/StdErr)
/// </summary>
public class ProcessOutputEventArgs : EventArgs
{
/// <summary>进程唯一标识 (ID)</summary>
public string ProcessId { get; set; } = string.Empty;
/// <summary>来源进程名称</summary>
public string ProcessName { get; set; } = string.Empty;
/// <summary>输出内容</summary>
public string Content { get; set; } = string.Empty;
/// <summary>是否为错误流 (StdErr)</summary>
public bool IsError { get; set; }
}
/// <summary>
/// 进程状态变更事件参数
/// </summary>
public class ProcessStateEventArgs : EventArgs
{
/// <summary>进程唯一标识 (ID)</summary>
public string ProcessId { get; set; } = string.Empty;
/// <summary>来源进程名称</summary>
public string ProcessName { get; set; } = string.Empty;
/// <summary>变更后的新状态</summary>
public ProcessStatus State { get; set; }
}
/// <summary>
/// 进程生命周期状态枚举
/// </summary>
public enum ProcessStatus
{
/// <summary>已停止 (初始状态或用户手动停止)</summary>
Stopped,
/// <summary>启动中 (正在初始化进程对象)</summary>
Starting,
/// <summary>运行中 (PID 已存在)</summary>
Running,
/// <summary>等待重启 (崩溃后的短暂停留默认3秒)</summary>
PendingRestart,
/// <summary>熔断冷却中 (连续失败多次后的长时间等待默认30分钟)</summary>
CoolingDown
}
/// <summary>
/// 资源哨兵检查结果枚举
/// </summary>
public enum GuardResult
{
/// <summary>一切正常</summary>
Normal,
/// <summary>警告 (有点问题,建议记录日志或发邮件,但不杀进程)</summary>
Warning,
/// <summary>严重故障 (必须立即重启进程以保护系统)</summary>
Critical
}
/// <summary>
/// 操作归因:定义是谁/什么触发了这个动作
/// <para>用于后续分析是人为操作还是系统自愈</para>
/// </summary>
public enum LogTrigger
{
/// <summary>
/// 用户手动干预 (UI点击、API调用)
/// <para>优先级:最高。通常视为预期内操作。</para>
/// </summary>
User,
/// <summary>
/// 启动器自愈行为 (崩溃重启、初始化启动、熔断恢复)
/// <para>优先级:高。代表系统正在尝试维持服务。</para>
/// </summary>
System,
/// <summary>
/// 资源哨兵触发 (内存/CPU超限)
/// <para>优先级:紧急。代表出现了亚健康状态或强制管控。</para>
/// </summary>
ResourceGuard,
/// <summary>
/// 定时任务/计划调度
/// <para>优先级:中。代表按计划执行的任务。</para>
/// </summary>
Scheduler
}
/// <summary>
/// 核心动作类型
/// <para>定义进程生命周期中发生了什么具体的事件</para>
/// </summary>
public enum LogAction
{
/// <summary>启动进程</summary>
Start,
/// <summary>停止进程</summary>
Stop,
/// <summary>重启进程</summary>
Restart,
/// <summary>侦测到意外退出</summary>
Crash,
/// <summary>标准输出流 (StdOut) - 通常是程序打印的普通日志</summary>
Output,
/// <summary>标准错误流 (StdErr) - 程序打印的异常或错误</summary>
Error,
/// <summary>触发熔断保护 (停止重试)</summary>
CircuitBreak,
/// <summary>资源检查警告 (如内存超限报警,但不重启)</summary>
ResourceCheck
}
}

View File

@@ -0,0 +1,42 @@
using System;
namespace SHH.ProcessLaunchers
{
/// <summary>
/// 进程信息快照 (用于 UI 数据绑定)
/// </summary>
public class ProcessInfoSnapshot
{
/// <summary>唯一标识 (例如: "Streamer_01")</summary>
public string Id { get; set; } = string.Empty;
// <summary>
/// 通用名称/类别 (例如: "视频取流服务")
/// <para>用于 UI 分组或显示图标</para>
public string DisplayName { get; set; } = string.Empty;
/// <summary>详细描述 (例如: "西门 1 号机位")</summary>
public string Description { get; set; } = string.Empty;
/// <summary>操作系统进程 ID (运行中才有)</summary>
public int? Pid { get; set; }
/// <summary>当前生命周期状态</summary>
public ProcessStatus Status { get; set; }
/// <summary>最近一次启动时间</summary>
public DateTime? LastStartTime { get; set; }
/// <summary>最近一次退出时间</summary>
public DateTime? LastExitTime { get; set; }
/// <summary>当前连续失败次数 (用于熔断判定)</summary>
public int ConsecutiveFailures { get; set; }
/// <summary>预计下次尝试启动的时间 (用于 UI 显示倒计时)</summary>
public DateTime? NextRetryTime { get; set; }
/// <summary>附加状态信息 (如熔断倒计时文本)</summary>
public string Message { get; set; } = string.Empty;
}
}

View File

@@ -0,0 +1,254 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
namespace SHH.ProcessLaunchers
{
/// <summary>
/// 进程管理器 (核心实现类)
/// <para>核心职责:作为对外统一入口 (Facade),维护所有受管进程的容器。</para>
/// <para>主要功能:负责路由外部指令(启动/停止)到具体的进程实例,并处理事件分发。</para>
/// </summary>
public class ProcessManager : IProcessManager, IDisposable
{
#region --- 1. (Fields & Events) ---
/// <summary>
/// 线程安全的进程容器
/// <para>Key: ProcessConfig.Id (唯一标识)</para>
/// <para>Value: ManagedProcess (受管实例)</para>
/// </summary>
private readonly ConcurrentDictionary<string, ManagedProcess> _processes
= new ConcurrentDictionary<string, ManagedProcess>();
/// <summary>
/// 日志服务接口 (依赖注入)
/// </summary>
private readonly ILauncherLogger _logger;
// ---------------------------------------------------------
// 对外暴露的事件定义
// ---------------------------------------------------------
/// <summary>
/// 对外事件:当接收到任意子进程的标准输出/错误流时触发
/// </summary>
public event EventHandler<ProcessOutputEventArgs> OnOutputReceived;
/// <summary>
/// 对外事件:当任意子进程的状态发生变更时触发
/// </summary>
public event EventHandler<ProcessStateEventArgs> OnStateChanged;
#endregion
#region --- 2. (Constructor & Dispose) ---
/// <summary>
/// 初始化进程管理器实例
/// </summary>
/// <param name="logger">日志实现类 (若外部未传入,则内部自动使用 NullLogger 以防止空引用异常)</param>
public ProcessManager(ILauncherLogger logger = null)
{
// 规范化:使用空合并运算符确保 _logger 永不为 null
_logger = logger ?? new NullLogger();
}
/// <summary>
/// 销毁资源,停止所有进程并清理事件订阅
/// </summary>
public void Dispose()
{
// 1. 停止所有子进程 (触发 Kill 操作,清理进程树)
StopAll();
// 2. 清空内部容器引用
_processes.Clear();
// 3. 移除所有外部事件订阅,防止 UI 端因未解绑而导致的内存泄露
OnOutputReceived = null;
OnStateChanged = null;
}
#endregion
#region --- 3. API (Public Methods) ---
/// <summary>
/// 注册一个新的进程配置到管理器中
/// </summary>
/// <param name="config">进程配置对象 (包含 Exe路径、参数、熔断策略等)</param>
/// <exception cref="ArgumentException">当 Id 为空时抛出</exception>
/// <exception cref="InvalidOperationException">当 Id 已存在时抛出</exception>
public void Register(ProcessConfig config)
{
// 1. 基础参数校验:确保 Id 存在
if (string.IsNullOrWhiteSpace(config.Id))
throw new ArgumentException("进程配置无效:必须包含唯一的 Id");
// 2. 防重复注册校验:确保字典中没有相同的 Key
if (_processes.ContainsKey(config.Id))
throw new InvalidOperationException($"进程 Id '{config.Id}' 已存在,禁止重复注册。");
// 3. 实例化受管进程对象 (传入 this 指针是为了后续回调 DispatchXXX 方法)
var process = new ManagedProcess(config, this, _logger);
// 4. 加入线程安全字典
if (_processes.TryAdd(config.Id, process))
{
_logger.LogLifecycle(config.Id, LogAction.Output, LogTrigger.System,
$"进程配置已注册: {config.DisplayName}");
}
}
/// <summary>
/// 启动指定 ID 的进程
/// </summary>
/// <param name="id">进程的唯一标识符 (ProcessConfig.Id)</param>
public void Start(string id)
{
// 尝试获取指定 ID 的进程实例
if (_processes.TryGetValue(id, out var p))
{
// 调用内部实例的启动逻辑,操作归因标记为"User" (用户手动)
p.ExecuteStart(LogTrigger.User, "用户手动启动指令");
}
else
{
// 如果找不到,记录错误日志
_logger.LogLifecycle(id, LogAction.Error, LogTrigger.User, "启动失败:未找到指定 ID 的进程配置");
}
}
/// <summary>
/// [异步] 有序批量启动所有进程
/// <para>按照 StartupOrder 从小到大排序启动,并支持启动间隙延时 (PostStartupDelayMs)。</para>
/// </summary>
/// <returns>异步任务</returns>
public async Task StartAllAsync()
{
_logger.LogLifecycle("ALL", LogAction.Start, LogTrigger.User, "执行有序批量启动");
// 1. 数据准备:从字典取出所有进程,并按配置进行排序
// 排序规则StartupOrder (小->大) -> Id (字母序) 以保证启动顺序的确定性
var sortedList = _processes.Values
.OrderBy(p => p.Config.StartupOrder) // 按用户指定的权重排
.ThenBy(p => p.Config.Id) // 权重一样时按 ID 排
.ToList();
// 2. 顺序执行启动循环
foreach (var p in sortedList)
{
// 同步调用启动指令(注意:这里不等待进程完全 Ready只负责拉起进程
p.ExecuteStart(LogTrigger.User, "有序批量启动");
// 3. 处理启动间隙延迟 (错峰启动)
// 作用:防止多个重型进程同时启动导致 CPU/IO 瞬间拥堵
int delay = p.Config.PostStartupDelayMs;
if (delay > 0)
{
// 异步等待指定毫秒数,释放线程控制权
await Task.Delay(delay);
}
}
_logger.LogLifecycle("ALL", LogAction.Start, LogTrigger.User, "有序批量启动完成");
}
/// <summary>
/// 停止指定 ID 的进程
/// </summary>
/// <param name="id">进程的唯一标识符</param>
public void Stop(string id)
{
if (_processes.TryGetValue(id, out var p))
{
p.ExecuteStop(LogTrigger.User, "用户手动停止指令");
}
}
/// <summary>
/// 批量停止所有进程 (并发执行)
/// </summary>
public void StopAll()
{
_logger.LogLifecycle("ALL", LogAction.Stop, LogTrigger.User, "执行批量停止");
// 遍历所有进程,使用 Task.Run 并发执行停止,提高效率,无需等待
foreach (var p in _processes.Values)
{
Task.Run(() => p.ExecuteStop(LogTrigger.User, "批量停止"));
}
}
/// <summary>
/// 重置/复位指定进程的资源报警状态
/// <para>当用户在 UI 上点击"已处置"后调用此方法,解除报警锁定。</para>
/// </summary>
/// <param name="id">进程的唯一标识符</param>
public void ResetGuard(string id)
{
if (_processes.TryGetValue(id, out var p))
{
// 调用内部复位逻辑,清除报警锁定状态
p.ResetGuards();
_logger.LogLifecycle(id, LogAction.ResourceCheck, LogTrigger.User, "用户手动复位资源报警锁");
}
}
/// <summary>
/// 获取当前所有进程的实时状态快照
/// <para>用于 UI 列表的数据绑定或定时刷新。</para>
/// </summary>
/// <returns>进程信息快照列表</returns>
public List<ProcessInfoSnapshot> GetSnapshot()
{
// 将字典中的所有受管对象转为 DTO 快照列表
return _processes.Values.Select(p => p.GetSnapshot()).ToList();
}
#endregion
#region --- 4. (Internal Dispatchers) ---
// 说明C# 的 event 只能在定义类内部 Invoke。
// 为了让内部类 ManagedProcess 也能触发 Manager 的对外事件,我们提供了这几个 internal 方法。
// 这些方法充当了内部类与外部事件之间的桥梁。
/// <summary>
/// 分发状态变更事件 (供 ManagedProcess 内部调用)
/// </summary>
/// <param name="processId">进程 ID</param>
/// <param name="newState">新的状态</param>
internal void DispatchStateChange(string processId, ProcessStatus newState)
{
// 线程安全地触发事件
OnStateChanged?.Invoke(this, new ProcessStateEventArgs
{
ProcessId = processId,
State = newState
});
}
/// <summary>
/// 分发日志输出事件 (供 ManagedProcess 内部调用)
/// </summary>
/// <param name="processId">进程 ID</param>
/// <param name="content">日志内容</param>
/// <param name="isError">是否为错误流</param>
internal void DispatchOutput(string processId, string content, bool isError)
{
// 线程安全地触发事件
OnOutputReceived?.Invoke(this, new ProcessOutputEventArgs
{
ProcessId = processId,
Content = content,
IsError = isError
});
}
#endregion
}
}

View File

@@ -0,0 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>netstandard2.0</TargetFramework>
</PropertyGroup>
</Project>