完成进程启动器接口的设计
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.14.36623.8 d17.14
|
||||
# Visual Studio Version 18
|
||||
VisualStudioVersion = 18.1.11312.151 d18.0
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SHH.CameraSdk", "SHH.CameraSdk\SHH.CameraSdk.csproj", "{21B70A94-43FC-4D17-AB83-9E4B5178397E}"
|
||||
EndProject
|
||||
@@ -13,6 +13,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SHH.NetMQ", "SHH.NetMQ\SHH.
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SHH.CameraDashboard", "SHH.CameraDashboard\SHH.CameraDashboard.csproj", "{03C249D7-BCF1-404D-AD09-7AB39BA263AD}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SHH.ProcessLaunchers", "SHH.ProcessLaunchers\SHH.ProcessLaunchers.csproj", "{E12F2D41-B7BB-4303-AD01-5DCD02D7FF3C}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -39,6 +41,10 @@ Global
|
||||
{03C249D7-BCF1-404D-AD09-7AB39BA263AD}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{03C249D7-BCF1-404D-AD09-7AB39BA263AD}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{03C249D7-BCF1-404D-AD09-7AB39BA263AD}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{E12F2D41-B7BB-4303-AD01-5DCD02D7FF3C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{E12F2D41-B7BB-4303-AD01-5DCD02D7FF3C}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{E12F2D41-B7BB-4303-AD01-5DCD02D7FF3C}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{E12F2D41-B7BB-4303-AD01-5DCD02D7FF3C}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
72
SHH.ProcessLaunchers/DashboardLogger.cs
Normal file
72
SHH.ProcessLaunchers/DashboardLogger.cs
Normal file
@@ -0,0 +1,72 @@
|
||||
using SHH.ProcessLaunchers;
|
||||
|
||||
namespace SHH.ProcessLauncher
|
||||
{
|
||||
/// <summary>
|
||||
/// 仪表盘日志适配器
|
||||
/// <para>职责:实现 ILauncherLogger 接口,将启动器的底层日志转发给业务层的 LogHelper 和 UI 通知服务。</para>
|
||||
/// </summary>
|
||||
public class DashboardLogger : ILauncherLogger
|
||||
{
|
||||
#region --- 接口实现 ---
|
||||
|
||||
/// <summary>
|
||||
/// 处理普通控制台日志 (StdOut/StdErr)
|
||||
/// </summary>
|
||||
public void LogConsole(string processId, string message, bool isError)
|
||||
{
|
||||
// 1. 在控制台/调试窗口刷屏显示
|
||||
string prefix = isError ? "[ERR]" : "[INF]";
|
||||
|
||||
//// 这里可以直接输出,或者通过事件抛给 UI 层去绑定 TextBox
|
||||
//// 为了演示,我们使用 LogHelper 的 Debug 方法
|
||||
//LogHelper.Debug($"[Console] {prefix} <{processId}>: {message}");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 处理关键生命周期事件
|
||||
/// </summary>
|
||||
public void LogLifecycle(string processId, LogAction action, LogTrigger trigger, string reason, object payload = null)
|
||||
{
|
||||
// 1. 生成高可读的结构化日志文本
|
||||
string logText = $"[{action.ToString().ToUpper()}] 触发源:{trigger} | 原因:{reason}";
|
||||
if (payload != null) logText += $" | 数据:{payload}";
|
||||
|
||||
// 2. 根据触发源 (LogTrigger) 决定业务系统的响应策略
|
||||
switch (trigger)
|
||||
{
|
||||
//case LogTrigger.User:
|
||||
// // 场景:用户点击了按钮
|
||||
// // 策略:这是预期内操作,仅记录 Info 日志,不弹窗打扰用户
|
||||
// LogHelper.Info(processId, logText);
|
||||
// break;
|
||||
|
||||
//case LogTrigger.System:
|
||||
// // 场景:崩溃自动重启、熔断恢复等
|
||||
// // 策略:记录 Warn 日志,运维人员需要知道系统发生了自愈行为
|
||||
// LogHelper.Warn(processId, logText);
|
||||
// break;
|
||||
|
||||
//case LogTrigger.ResourceGuard:
|
||||
// // 场景:内存超限被杀、CPU 报警
|
||||
// // 策略:这是严重问题,必须记录 Error 日志,并发送 UI 强提醒 (Toast/弹窗)
|
||||
// LogHelper.Error(processId, logText);
|
||||
|
||||
// // 调用通知服务 (模拟)
|
||||
// NotificationService.ShowWarning($"进程异常: {processId}", $"触发资源管控: {reason}");
|
||||
// break;
|
||||
|
||||
//case LogTrigger.Scheduler:
|
||||
// // 场景:定时重启
|
||||
// LogHelper.Info(processId, $"[计划任务] {logText}");
|
||||
// break;
|
||||
|
||||
//default:
|
||||
// LogHelper.Info(processId, logText);
|
||||
// break;
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
53
SHH.ProcessLaunchers/ILauncherLogger.cs
Normal file
53
SHH.ProcessLaunchers/ILauncherLogger.cs
Normal file
@@ -0,0 +1,53 @@
|
||||
namespace SHH.ProcessLaunchers
|
||||
{
|
||||
/// <summary>
|
||||
/// 启动器专用日志接口
|
||||
/// <para>核心职责:解耦日志的生产与消费,支持结构化语义记录。</para>
|
||||
/// <para>实现类可将日志转发至 UI 控制台、本地文件或远程日志中心。</para>
|
||||
/// </summary>
|
||||
public interface ILauncherLogger
|
||||
{
|
||||
/// <summary>
|
||||
/// 记录普通控制台日志 (流式日志)
|
||||
/// <para>用于接管子进程的 StdOut 和 StdErr</para>
|
||||
/// </summary>
|
||||
/// <param name="processId">进程唯一标识 (ProcessConfig.Id)</param>
|
||||
/// <param name="message">日志内容</param>
|
||||
/// <param name="isError">是否为错误流 (True=StdErr, False=StdOut)</param>
|
||||
void LogConsole(string processId, string message, bool isError);
|
||||
|
||||
/// <summary>
|
||||
/// 记录关键生命周期事件 (结构化日志)
|
||||
/// <para>用于记录启停、崩溃、熔断等关键节点,供运维分析。</para>
|
||||
/// </summary>
|
||||
/// <param name="processId">进程唯一标识 (ProcessConfig.Id)</param>
|
||||
/// <param name="action">动作类型 (Start/Stop/Crash...)</param>
|
||||
/// <param name="trigger">触发源 (User/System...)</param>
|
||||
/// <param name="reason">操作原因或备注 (必填,用于追溯)</param>
|
||||
/// <param name="payload">附加上下文对象 (可选,如 { PID=123, ExitCode=-1 })</param>
|
||||
void LogLifecycle(string processId, LogAction action, LogTrigger trigger, string reason, object payload = null);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 默认空日志实现 (Null Object Pattern)
|
||||
/// <para>用于在未注入 Logger 时防止 NullReferenceException,保证程序健壮性。</para>
|
||||
/// </summary>
|
||||
public class NullLogger : ILauncherLogger
|
||||
{
|
||||
/// <summary>
|
||||
/// 空实现:忽略控制台日志
|
||||
/// </summary>
|
||||
public void LogConsole(string processId, string message, bool isError)
|
||||
{
|
||||
// Do nothing
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 空实现:忽略生命周期日志
|
||||
/// </summary>
|
||||
public void LogLifecycle(string processId, LogAction action, LogTrigger trigger, string reason, object payload = null)
|
||||
{
|
||||
// Do nothing
|
||||
}
|
||||
}
|
||||
}
|
||||
176
SHH.ProcessLaunchers/IProcessManager.cs
Normal file
176
SHH.ProcessLaunchers/IProcessManager.cs
Normal file
@@ -0,0 +1,176 @@
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace SHH.ProcessLaunchers
|
||||
{
|
||||
/// <summary>
|
||||
/// 进程管理器核心接口
|
||||
/// </summary>
|
||||
public interface IProcessManager
|
||||
{
|
||||
/// <summary>
|
||||
/// 注册一个要管理的进程配置
|
||||
/// </summary>
|
||||
/// <param name="config">进程配置对象</param>
|
||||
void Register(ProcessConfig config);
|
||||
|
||||
/// <summary>
|
||||
/// 启动指定名称的进程
|
||||
/// </summary>
|
||||
/// <param name="name">配置中定义的 Name</param>
|
||||
void Start(string name);
|
||||
|
||||
/// <summary>
|
||||
/// 启动所有已注册的进程
|
||||
/// </summary>
|
||||
Task StartAllAsync();
|
||||
|
||||
/// <summary>
|
||||
/// 停止指定进程 (优雅或强制)
|
||||
/// </summary>
|
||||
/// <param name="name">进程名称</param>
|
||||
void Stop(string name);
|
||||
|
||||
/// <summary>
|
||||
/// 停止所有进程
|
||||
/// </summary>
|
||||
void StopAll();
|
||||
|
||||
/// <summary>
|
||||
/// 获取指定进程的资源监控复位接口
|
||||
/// </summary>
|
||||
/// <param name="processName">进程名称</param>
|
||||
void ResetGuard(string processName);
|
||||
|
||||
/// <summary>
|
||||
/// 获取所有进程的实时状态快照
|
||||
/// </summary>
|
||||
List<ProcessInfoSnapshot> GetSnapshot();
|
||||
|
||||
// --- 事件定义 ---
|
||||
|
||||
/// <summary>
|
||||
/// 当接收到子进程的标准输出或错误流时触发
|
||||
/// </summary>
|
||||
event EventHandler<ProcessOutputEventArgs> OnOutputReceived;
|
||||
|
||||
/// <summary>
|
||||
/// 当进程生命周期状态发生变化时触发
|
||||
/// </summary>
|
||||
event EventHandler<ProcessStateEventArgs> OnStateChanged;
|
||||
}
|
||||
|
||||
public class MemoryGuard : IResourceGuard
|
||||
{
|
||||
public string Name => "MemoryGuard";
|
||||
|
||||
private readonly long _warningBytes;
|
||||
private readonly long _criticalBytes;
|
||||
private readonly TimeSpan _alertDuration; // 持续时间阈值 (如 3分钟)
|
||||
|
||||
// --- 内部状态 ---
|
||||
private DateTime? _firstOverLimitTime; // 第一次检测到超限的时间
|
||||
private bool _isAlertLatched = false; // 是否已经报过警 (自锁)
|
||||
|
||||
/// <summary>
|
||||
/// 智能内存哨兵
|
||||
/// </summary>
|
||||
/// <param name="warningMb">警告阈值</param>
|
||||
/// <param name="criticalMb">熔断阈值</param>
|
||||
/// <param name="durationMinutes">必须持续超限多少分钟才报警</param>
|
||||
public MemoryGuard(int warningMb, int criticalMb, int durationMinutes = 3)
|
||||
{
|
||||
_warningBytes = (long)warningMb * 1024 * 1024;
|
||||
_criticalBytes = (long)criticalMb * 1024 * 1024;
|
||||
_alertDuration = TimeSpan.FromMinutes(durationMinutes);
|
||||
}
|
||||
|
||||
public GuardResult Check(Process process, out string reason)
|
||||
{
|
||||
reason = null;
|
||||
try
|
||||
{
|
||||
process.Refresh();
|
||||
long currentUsage = process.WorkingSet64;
|
||||
|
||||
// 1. 优先检查 Critical (熔断线)
|
||||
// 逻辑:熔断涉及生死,不需要防抖,也不受“已报警”锁定的限制。
|
||||
// 哪怕用户标记了已处置,只要内存爆了,必须重启。
|
||||
if (currentUsage > _criticalBytes)
|
||||
{
|
||||
reason = $"[严重] 内存 {FormatSize(currentUsage)} > 熔断线 {FormatSize(_criticalBytes)} (立即执行管控)";
|
||||
// 重启后,物理进程会变,下一次 Check 会是新进程,状态建议在重启时由外部重置,
|
||||
// 或者这里不重置,依靠 ProcessManager 重启后重新创建 Guard 实例。
|
||||
return GuardResult.Critical;
|
||||
}
|
||||
|
||||
// 2. 检查 Warning (警告线) - 包含防抖和自锁逻辑
|
||||
if (currentUsage > _warningBytes)
|
||||
{
|
||||
// A. 如果已经报过警 (已锁定),则不再报,保持沉默
|
||||
if (_isAlertLatched)
|
||||
{
|
||||
return GuardResult.Normal;
|
||||
}
|
||||
|
||||
// B. 如果是刚发现超限,记录时间
|
||||
if (_firstOverLimitTime == null)
|
||||
{
|
||||
_firstOverLimitTime = DateTime.Now;
|
||||
// 这里可以选做:记录一条 Info 日志,告诉用户"正在观察中"
|
||||
// reason = $"内存超限 {FormatSize(currentUsage)},开始计时观察...";
|
||||
return GuardResult.Normal; // 暂时不报 Warning
|
||||
}
|
||||
|
||||
// C. 检查持续时间
|
||||
var duration = DateTime.Now - _firstOverLimitTime.Value;
|
||||
if (duration >= _alertDuration)
|
||||
{
|
||||
// 满足持续时间 -> 触发报警并锁定
|
||||
_isAlertLatched = true;
|
||||
_firstOverLimitTime = null; // 计时归零
|
||||
|
||||
reason = $"[报警] 内存 {FormatSize(currentUsage)} > 阈值 {FormatSize(_warningBytes)} 且持续超过 {_alertDuration.TotalMinutes}分钟";
|
||||
return GuardResult.Warning; // 抛出信号,发邮件!
|
||||
}
|
||||
else
|
||||
{
|
||||
// 还没到时间
|
||||
return GuardResult.Normal;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// 3. 内存正常
|
||||
// 逻辑:如果之前在计时(比如超了1分钟),现在降下来了,则计时器清零。
|
||||
// 但如果已经报过警 (_isAlertLatched=true),则保持锁定,不自动复位。
|
||||
// 除非用户手动点 Reset。
|
||||
|
||||
if (_firstOverLimitTime != null)
|
||||
{
|
||||
_firstOverLimitTime = null; // 波动防抖:由于降下来了,重置观察计时
|
||||
}
|
||||
|
||||
return GuardResult.Normal;
|
||||
}
|
||||
}
|
||||
catch
|
||||
{
|
||||
return GuardResult.Normal;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 用户点击“已处置”时调用
|
||||
/// </summary>
|
||||
public void Reset()
|
||||
{
|
||||
_isAlertLatched = false;
|
||||
_firstOverLimitTime = null;
|
||||
}
|
||||
|
||||
private string FormatSize(long bytes) => $"{bytes / 1024 / 1024}MB";
|
||||
}
|
||||
}
|
||||
29
SHH.ProcessLaunchers/IResourceGuard.cs
Normal file
29
SHH.ProcessLaunchers/IResourceGuard.cs
Normal file
@@ -0,0 +1,29 @@
|
||||
using System.Diagnostics;
|
||||
|
||||
namespace SHH.ProcessLaunchers
|
||||
{
|
||||
/// <summary>
|
||||
/// 资源哨兵接口 (策略模式)
|
||||
/// </summary>
|
||||
public interface IResourceGuard
|
||||
{
|
||||
/// <summary>
|
||||
/// 哨兵名称 (如 MemoryGuard)
|
||||
/// </summary>
|
||||
string Name { get; }
|
||||
|
||||
/// <summary>
|
||||
/// 执行健康检查
|
||||
/// </summary>
|
||||
/// <param name="process">正在运行的进程对象</param>
|
||||
/// <param name="reason">[输出] 如果异常,返回详细原因描述</param>
|
||||
/// <returns>检查结果 (Normal/Warning/Critical)</returns>
|
||||
GuardResult Check(Process process, out string reason);
|
||||
|
||||
/// <summary>
|
||||
/// 人工复位/标记已处置
|
||||
/// <para>用户在 UI 点击“已处置”后调用,用于清除内部的报警锁定状态 (Latch)</para>
|
||||
/// </summary>
|
||||
void Reset();
|
||||
}
|
||||
}
|
||||
486
SHH.ProcessLaunchers/ManagedProcess.cs
Normal file
486
SHH.ProcessLaunchers/ManagedProcess.cs
Normal file
@@ -0,0 +1,486 @@
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using System.Threading;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace SHH.ProcessLaunchers
|
||||
{
|
||||
// =========================================================
|
||||
// 内部核心类:单个受管进程 (封装了所有复杂逻辑)
|
||||
// =========================================================
|
||||
/// <summary>
|
||||
/// 受管进程实例 (Internal Worker)
|
||||
/// <para>职责:管理【单个】进程的生命周期。</para>
|
||||
/// <para>功能:包含 启动/停止/自愈/熔断/监控 的核心状态机逻辑。</para>
|
||||
/// </summary>
|
||||
internal class ManagedProcess
|
||||
{
|
||||
#region --- 字段定义 (Fields) ---
|
||||
|
||||
private readonly ProcessConfig _config;
|
||||
private readonly ProcessManager _manager;
|
||||
private readonly ILauncherLogger _logger;
|
||||
|
||||
/// <summary>
|
||||
/// 实际的操作系统进程对象
|
||||
/// </summary>
|
||||
private Process _process;
|
||||
|
||||
/// <summary>
|
||||
/// 标记位:是否为有意的停止
|
||||
/// <para>True: 用户手动停止 (不自愈)</para>
|
||||
/// <para>False: 运行中 (若退出则触发自愈)</para>
|
||||
/// </summary>
|
||||
private bool _isIntentionalStop = true;
|
||||
|
||||
// --- 异步任务控制令牌 ---
|
||||
private CancellationTokenSource _delayCts; // 用于取消重启/熔断的倒计时
|
||||
private CancellationTokenSource _monitorCts; // 用于取消资源监控循环
|
||||
private CancellationTokenSource _schedulerCts; // 用于取消定时重启计划
|
||||
|
||||
// --- 运行时统计数据 ---
|
||||
private int _consecutiveFailures = 0; // 连续失败次数 (熔断计数器)
|
||||
private DateTime? _lastStartTime; // 最后启动时间 (用于计算稳定运行市场)
|
||||
private DateTime? _lastExitTime; // 最后退出时间
|
||||
private DateTime? _nextRetryTime; // 下次自动重试的时间点
|
||||
|
||||
/// <summary>
|
||||
/// 当前生命周期状态 (对外只读)
|
||||
/// </summary>
|
||||
public ProcessStatus Status { get; private set; } = ProcessStatus.Stopped;
|
||||
|
||||
/// <summary>
|
||||
/// 公开配置信息
|
||||
/// </summary>
|
||||
public ProcessConfig Config => _config;
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 构造函数 ---
|
||||
|
||||
public ManagedProcess(ProcessConfig config, ProcessManager manager, ILauncherLogger logger)
|
||||
{
|
||||
_config = config;
|
||||
_manager = manager;
|
||||
_logger = logger;
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 外部指令 (External Commands) ---
|
||||
|
||||
/// <summary>
|
||||
/// 执行启动逻辑 (入口)
|
||||
/// </summary>
|
||||
public void ExecuteStart(LogTrigger trigger, string reason)
|
||||
{
|
||||
// 如果已经在运行或启动中,则忽略
|
||||
if (Status == ProcessStatus.Running || Status == ProcessStatus.Starting) return;
|
||||
|
||||
// 1. 重置所有负面状态 (用户手动介入通常意味着修复了问题)
|
||||
_delayCts?.Cancel();
|
||||
_isIntentionalStop = false; // 标记为"非有意停止" -> 开启守护模式
|
||||
_consecutiveFailures = 0;
|
||||
_nextRetryTime = null;
|
||||
|
||||
// 2. 记录日志
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Start, trigger, reason);
|
||||
|
||||
// 3. 真正启动
|
||||
LaunchProcess();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 执行停止逻辑 (入口)
|
||||
/// </summary>
|
||||
public void ExecuteStop(LogTrigger trigger, string reason)
|
||||
{
|
||||
// 1. 标记为"有意停止" -> 阻止 HandleExitLogic 触发重启
|
||||
_isIntentionalStop = true;
|
||||
|
||||
// 2. 取消所有后台任务
|
||||
_delayCts?.Cancel();
|
||||
_monitorCts?.Cancel();
|
||||
_schedulerCts?.Cancel();
|
||||
_nextRetryTime = null;
|
||||
|
||||
// 3. 记录日志 (仅当不是已经停止时)
|
||||
if (Status != ProcessStatus.Stopped)
|
||||
{
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Stop, trigger, reason);
|
||||
}
|
||||
|
||||
// 4. 强制杀进程
|
||||
KillProcess();
|
||||
UpdateStatus(ProcessStatus.Stopped);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 重置资源监控锁
|
||||
/// </summary>
|
||||
public void ResetGuards()
|
||||
{
|
||||
if (_config.Guards != null)
|
||||
{
|
||||
foreach (var guard in _config.Guards) guard.Reset();
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 核心启动逻辑 (Core Launch Logic) ---
|
||||
|
||||
/// <summary>
|
||||
/// 启动进程的原子操作
|
||||
/// </summary>
|
||||
private void LaunchProcess()
|
||||
{
|
||||
try
|
||||
{
|
||||
UpdateStatus(ProcessStatus.Starting);
|
||||
|
||||
// 1. 路径检查
|
||||
string path = Path.GetFullPath(_config.ExePath);
|
||||
if (!File.Exists(path))
|
||||
{
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Error, LogTrigger.System, "可执行文件未找到", path);
|
||||
// 关键点:文件丢失属于严重错误,直接进入退出决策逻辑(可能会触发熔断)
|
||||
HandleExitLogic(exitCode: -1);
|
||||
return;
|
||||
}
|
||||
|
||||
// 2. 组装 ProcessStartInfo
|
||||
var psi = new ProcessStartInfo
|
||||
{
|
||||
FileName = path,
|
||||
Arguments = _config.Arguments,
|
||||
// 如果未配置工作目录,默认使用 EXE 所在目录
|
||||
WorkingDirectory = string.IsNullOrEmpty(_config.WorkingDirectory) ? Path.GetDirectoryName(path) : _config.WorkingDirectory,
|
||||
|
||||
// 窗口可见性控制
|
||||
CreateNoWindow = !_config.Visible,
|
||||
|
||||
// 必须为 false 才能重定向 IO流
|
||||
UseShellExecute = false,
|
||||
|
||||
// IO 重定向开关
|
||||
RedirectStandardOutput = _config.EnableLogRedirect,
|
||||
RedirectStandardError = _config.EnableLogRedirect
|
||||
};
|
||||
|
||||
_process = new Process { StartInfo = psi, EnableRaisingEvents = true };
|
||||
|
||||
// 3. 绑定 IO 重定向事件 (异步读取流)
|
||||
if (_config.EnableLogRedirect)
|
||||
{
|
||||
_process.OutputDataReceived += (s, e) =>
|
||||
{
|
||||
if (!string.IsNullOrEmpty(e.Data))
|
||||
{
|
||||
// A. 记录到日志系统
|
||||
_logger.LogConsole(_config.Id, e.Data, false);
|
||||
// B. 触发对外事件 (供 UI 实时刷新)
|
||||
_manager.DispatchOutput(_config.Id, e.Data, false);
|
||||
}
|
||||
};
|
||||
_process.ErrorDataReceived += (s, e) =>
|
||||
{
|
||||
if (!string.IsNullOrEmpty(e.Data))
|
||||
{
|
||||
_logger.LogConsole(_config.Id, e.Data, true);
|
||||
_manager.DispatchOutput(_config.Id, e.Data, true);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// 4. 绑定退出事件 (核心生命周期钩子)
|
||||
_process.Exited += (s, e) =>
|
||||
{
|
||||
int code = -1;
|
||||
try { code = _process.ExitCode; } catch { }
|
||||
// 注意:Exited 是在后台线程触发的,转交 HandleExitLogic 处理
|
||||
HandleExitLogic(code);
|
||||
};
|
||||
|
||||
// 5. 执行操作系统启动调用
|
||||
if (!_process.Start())
|
||||
{
|
||||
throw new Exception("Process.Start() 返回 false,启动失败");
|
||||
}
|
||||
|
||||
// 6. 开始异步读取流 (必须在 Start 之后调用)
|
||||
if (_config.EnableLogRedirect)
|
||||
{
|
||||
_process.BeginOutputReadLine();
|
||||
_process.BeginErrorReadLine();
|
||||
}
|
||||
|
||||
// 7. 更新状态
|
||||
_lastStartTime = DateTime.Now;
|
||||
UpdateStatus(ProcessStatus.Running);
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Start, LogTrigger.System, "进程启动成功", new { PID = _process.Id });
|
||||
|
||||
// 8. 启动后挂载:资源监控循环
|
||||
StartMonitoring();
|
||||
|
||||
// 9. 启动后挂载:计划任务 (如果有配置)
|
||||
if (_config.AutoRestartIntervalMinutes > 0)
|
||||
{
|
||||
ScheduleScheduledRestart(_config.AutoRestartIntervalMinutes * 60 * 1000);
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Error, LogTrigger.System, $"启动过程异常: {ex.Message}");
|
||||
HandleExitLogic(-1);
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 守护与监控逻辑 (Guard & Monitor) ---
|
||||
|
||||
/// <summary>
|
||||
/// 启动资源监控后台任务
|
||||
/// </summary>
|
||||
private void StartMonitoring()
|
||||
{
|
||||
// 1. 取消旧任务
|
||||
_monitorCts?.Cancel();
|
||||
_monitorCts = new CancellationTokenSource();
|
||||
|
||||
// 如果没有配置哨兵,直接返回
|
||||
if (_config.Guards == null || _config.Guards.Count == 0) return;
|
||||
|
||||
var token = _monitorCts.Token;
|
||||
|
||||
// 2. 启动长运行 Task
|
||||
Task.Run(async () =>
|
||||
{
|
||||
while (!token.IsCancellationRequested)
|
||||
{
|
||||
try
|
||||
{
|
||||
// 默认轮询间隔 3 秒
|
||||
await Task.Delay(3000, token);
|
||||
|
||||
// 每次检查前确认进程还活着
|
||||
if (_process == null || _process.HasExited) break;
|
||||
|
||||
// 遍历所有哨兵
|
||||
foreach (var guard in _config.Guards)
|
||||
{
|
||||
var result = guard.Check(_process, out string reason);
|
||||
|
||||
if (result == GuardResult.Warning)
|
||||
{
|
||||
// 警告级别:仅记录日志 (供客户端发邮件),不干涉进程
|
||||
_logger.LogLifecycle(_config.Id, LogAction.ResourceCheck, LogTrigger.ResourceGuard, $"[警告] {reason}");
|
||||
}
|
||||
else if (result == GuardResult.Critical)
|
||||
{
|
||||
// 严重级别:记录日志并执行重启
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.ResourceGuard, $"[严重] {reason} -> 执行管控重启");
|
||||
|
||||
// 杀掉进程 -> 触发 Exited -> 触发 HandleExitLogic -> 自动重启
|
||||
KillProcess();
|
||||
return; // 退出监控循环
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (TaskCanceledException) { break; } // 正常取消
|
||||
catch (Exception ex)
|
||||
{
|
||||
_logger.LogConsole(_config.Id, $"监控线程异常: {ex.Message}", true);
|
||||
}
|
||||
}
|
||||
}, token);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 安排定时重启任务
|
||||
/// </summary>
|
||||
private void ScheduleScheduledRestart(int delayMs)
|
||||
{
|
||||
_schedulerCts?.Cancel();
|
||||
_schedulerCts = new CancellationTokenSource();
|
||||
|
||||
Task.Delay(delayMs, _schedulerCts.Token).ContinueWith(t =>
|
||||
{
|
||||
// 只有当没被取消,且进程还在运行时,才执行重启
|
||||
if (!t.IsCanceled && Status == ProcessStatus.Running)
|
||||
{
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.Scheduler, "执行计划性重启 (AutoRestart)");
|
||||
KillProcess(); // 触发自动重启
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 决策大脑 (Decision Logic) ---
|
||||
|
||||
/// <summary>
|
||||
/// 进程退出后的核心决策逻辑 (自愈 + 熔断)
|
||||
/// </summary>
|
||||
/// <param name="exitCode">进程退出码</param>
|
||||
private void HandleExitLogic(int exitCode)
|
||||
{
|
||||
// 1. 清理伴生任务
|
||||
_monitorCts?.Cancel();
|
||||
_schedulerCts?.Cancel();
|
||||
|
||||
// 2. 意图判断:如果是用户手动停的,或者是计划重启中的 Kill,
|
||||
// 这里需要判断 _isIntentionalStop。
|
||||
// 注意:如果是用户 Stop,_isIntentionalStop 为 true,直接返回,不重启。
|
||||
// 如果是 ResourceGuard 或 Scheduler 调用的 KillProcess,_isIntentionalStop 仍为 false,会走下面的重启逻辑。
|
||||
if (_isIntentionalStop) return;
|
||||
|
||||
_lastExitTime = DateTime.Now;
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Crash, LogTrigger.System, "侦测到进程退出", new { ExitCode = exitCode });
|
||||
|
||||
// 3. 稳定性判定 (Stabilization Check)
|
||||
// 逻辑:如果进程活过了阈值(如60秒),说明这次退出可能是偶发意外,不是启动即崩。
|
||||
// 此时应重置失败计数,给予它"重新做人"的机会。
|
||||
double runDurationMs = _lastStartTime.HasValue ? (DateTime.Now - _lastStartTime.Value).TotalMilliseconds : 0;
|
||||
|
||||
if (runDurationMs > _config.StabilityThresholdMs)
|
||||
{
|
||||
if (_consecutiveFailures > 0)
|
||||
_logger.LogConsole(_config.Id, $"运行稳定({runDurationMs / 1000:F0}s),重置失败计数", false);
|
||||
_consecutiveFailures = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
_consecutiveFailures++;
|
||||
}
|
||||
|
||||
// 4. 熔断判定 (Circuit Breaker)
|
||||
// 如果连续失败次数超过阈值,不再立即重启,而是进入长冷却。
|
||||
if (_consecutiveFailures >= _config.MaxConsecutiveFailures)
|
||||
{
|
||||
EnterCoolingDown();
|
||||
}
|
||||
else
|
||||
{
|
||||
EnterShortRetry();
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 进入短时重试流程
|
||||
/// </summary>
|
||||
private void EnterShortRetry()
|
||||
{
|
||||
int delay = _config.RestartDelayMs;
|
||||
UpdateStatus(ProcessStatus.PendingRestart);
|
||||
_nextRetryTime = DateTime.Now.AddMilliseconds(delay);
|
||||
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.System,
|
||||
$"准备自动重启 ({_consecutiveFailures}/{_config.MaxConsecutiveFailures})", new { DelayMs = delay });
|
||||
|
||||
// 异步等待后执行
|
||||
WaitAndExec(delay, () => LaunchProcess());
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 进入熔断冷却流程
|
||||
/// </summary>
|
||||
private void EnterCoolingDown()
|
||||
{
|
||||
int delay = _config.CircuitBreakerDelayMs;
|
||||
UpdateStatus(ProcessStatus.CoolingDown);
|
||||
_nextRetryTime = DateTime.Now.AddMilliseconds(delay);
|
||||
|
||||
_logger.LogLifecycle(_config.Id, LogAction.CircuitBreak, LogTrigger.System,
|
||||
"触发熔断保护", new { Minutes = delay / 1000 / 60 });
|
||||
|
||||
// 冷却结束后,尝试恢复
|
||||
WaitAndExec(delay, () =>
|
||||
{
|
||||
_logger.LogLifecycle(_config.Id, LogAction.Restart, LogTrigger.System, "熔断冷却结束,尝试恢复");
|
||||
LaunchProcess();
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 通用延时执行辅助方法
|
||||
/// </summary>
|
||||
private void WaitAndExec(int delayMs, Action action)
|
||||
{
|
||||
_delayCts = new CancellationTokenSource();
|
||||
Task.Delay(delayMs, _delayCts.Token).ContinueWith(t =>
|
||||
{
|
||||
// 只有未被取消才执行
|
||||
if (!t.IsCanceled) action();
|
||||
}, TaskScheduler.Default);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 工具方法 (Helpers) ---
|
||||
|
||||
/// <summary>
|
||||
/// 强制杀死进程 (Kill -9)
|
||||
/// </summary>
|
||||
private void KillProcess()
|
||||
{
|
||||
if (_process != null && !_process.HasExited)
|
||||
{
|
||||
try
|
||||
{
|
||||
// .NET Core 3.0+ 支持 Kill 整个进程树 (包含子进程)
|
||||
_process.Kill();
|
||||
_process.WaitForExit(500); // 稍微等待资源释放
|
||||
}
|
||||
catch { /* 忽略权限不足或竞态条件下的异常 */ }
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 更新状态并通知 Manager 分发事件
|
||||
/// </summary>
|
||||
private void UpdateStatus(ProcessStatus status)
|
||||
{
|
||||
if (Status != status)
|
||||
{
|
||||
Status = status;
|
||||
// 回调 Manager 触发外部事件
|
||||
_manager.DispatchStateChange(_config.Id, status);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 生成当前状态快照 DTO
|
||||
/// </summary>
|
||||
public ProcessInfoSnapshot GetSnapshot()
|
||||
{
|
||||
int? pid = null;
|
||||
try { if (Status == ProcessStatus.Running) pid = _process?.Id; } catch { }
|
||||
|
||||
string msg = "";
|
||||
// 计算倒计时文本
|
||||
if (Status == ProcessStatus.CoolingDown && _nextRetryTime.HasValue)
|
||||
{
|
||||
var span = _nextRetryTime.Value - DateTime.Now;
|
||||
msg = $"熔断中 (剩余 {span.Minutes}:{span.Seconds:D2})";
|
||||
}
|
||||
|
||||
return new ProcessInfoSnapshot
|
||||
{
|
||||
Id = _config.Id,
|
||||
DisplayName = _config.DisplayName,
|
||||
Pid = pid,
|
||||
Status = Status,
|
||||
LastStartTime = _lastStartTime,
|
||||
LastExitTime = _lastExitTime,
|
||||
ConsecutiveFailures = _consecutiveFailures,
|
||||
NextRetryTime = _nextRetryTime,
|
||||
Message = msg
|
||||
};
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
107
SHH.ProcessLaunchers/ProcessConfig.cs
Normal file
107
SHH.ProcessLaunchers/ProcessConfig.cs
Normal file
@@ -0,0 +1,107 @@
|
||||
using System.Collections.Generic;
|
||||
|
||||
namespace SHH.ProcessLaunchers
|
||||
{
|
||||
/// <summary>
|
||||
/// 进程启动配置项
|
||||
/// </summary>
|
||||
public class ProcessConfig
|
||||
{
|
||||
#region --- 身份标识 (Identity) ---
|
||||
|
||||
// <summary>
|
||||
/// [核心变更] 唯一标识符 (Key)
|
||||
/// <para>用于管理器内部索引,不可重复。例如: "Streamer_01", "Streamer_02"</para>
|
||||
/// </summary>
|
||||
public string Id { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// [核心变更] 通用显示名称 (Category/Type)
|
||||
/// <para>描述这是一类什么程序。例如: "VideoStreamingService"</para>
|
||||
/// <para>多个实例可以拥有相同的 DisplayName。</para>
|
||||
/// </summary>
|
||||
public string DisplayName { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>
|
||||
/// 描述备注 (可选)
|
||||
/// <para>例如: "负责处理 192.168.1.10 的视频流"</para>
|
||||
/// </summary>
|
||||
public string Description { get; set; } = string.Empty;
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 启动参数 (Launch Args) ---
|
||||
|
||||
/// <summary>可执行文件路径 (绝对路径或相对路径)</summary>
|
||||
public string ExePath { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>启动参数字符串 (例如 "--id=1 --debug")</summary>
|
||||
public string Arguments { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>工作目录 (默认为 Exe 所在目录)</summary>
|
||||
public string WorkingDirectory { get; set; } = string.Empty;
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 表现层配置 ---
|
||||
|
||||
/// <summary>
|
||||
/// 是否显示程序窗口
|
||||
/// <para>True: 弹出控制台窗口或UI | False: 后台静默运行 (CreateNoWindow=true)</para>
|
||||
/// </summary>
|
||||
public bool Visible { get; set; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// 是否接管标准输出/错误流 (RedirectStandardOutput)
|
||||
/// <para>True: 启动器将捕获 Console.WriteLine 内容并通过日志接口转发。</para>
|
||||
/// <para>注意: 如果 Visible=true,建议设为 false,否则控制台窗口将是黑屏。</para>
|
||||
/// </summary>
|
||||
public bool EnableLogRedirect { get; set; } = true;
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 守护策略配置 ---
|
||||
|
||||
/// <summary>意外退出后的常规重启延迟 (毫秒),默认 3000ms</summary>
|
||||
public int RestartDelayMs { get; set; } = 3000;
|
||||
|
||||
/// <summary>连续失败阈值 (达到此次数后触发熔断),默认 3 次</summary>
|
||||
public int MaxConsecutiveFailures { get; set; } = 3;
|
||||
|
||||
/// <summary>熔断冷却时长 (毫秒),默认 30分钟 (1800000ms)</summary>
|
||||
public int CircuitBreakerDelayMs { get; set; } = 30 * 60 * 1000;
|
||||
|
||||
/// <summary>
|
||||
/// 稳定运行判定阈值 (毫秒)
|
||||
/// <para>如果进程存活时间超过此值,则视为启动成功,重置失败计数器。</para>
|
||||
/// </summary>
|
||||
public int StabilityThresholdMs { get; set; } = 60 * 1000;
|
||||
|
||||
/// <summary>
|
||||
/// 自动重启间隔 (分钟)。0 表示不启用定时重启。
|
||||
/// <para>用于防止内存碎片或长期运行的不稳定性。</para>
|
||||
/// </summary>
|
||||
public int AutoRestartIntervalMinutes { get; set; } = 0;
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 排序启动 ---
|
||||
|
||||
/// <summary>
|
||||
/// [新增] 启动顺序权重
|
||||
/// <para>数字越小越先启动 (0, 1, 2...)</para>
|
||||
/// </summary>
|
||||
public int StartupOrder { get; set; } = 0;
|
||||
|
||||
/// <summary>
|
||||
/// [新增] 启动后等待时长 (毫秒)
|
||||
/// <para>当前进程启动后,等待多久再启动下一个进程。用于防止瞬间 CPU 峰值或依赖等待。</para>
|
||||
/// </summary>
|
||||
public int PostStartupDelayMs { get; set; } = 3000;
|
||||
|
||||
#endregion
|
||||
|
||||
/// <summary>资源哨兵列表 (内存监控、心跳监控等)</summary>
|
||||
public List<IResourceGuard> Guards { get; set; } = new List<IResourceGuard>();
|
||||
}
|
||||
}
|
||||
135
SHH.ProcessLaunchers/ProcessEventArgs.cs
Normal file
135
SHH.ProcessLaunchers/ProcessEventArgs.cs
Normal file
@@ -0,0 +1,135 @@
|
||||
using System;
|
||||
|
||||
namespace SHH.ProcessLaunchers
|
||||
{
|
||||
/// <summary>
|
||||
/// 进程输出事件参数 (StdOut/StdErr)
|
||||
/// </summary>
|
||||
public class ProcessOutputEventArgs : EventArgs
|
||||
{
|
||||
/// <summary>进程唯一标识 (ID)</summary>
|
||||
public string ProcessId { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>来源进程名称</summary>
|
||||
public string ProcessName { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>输出内容</summary>
|
||||
public string Content { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>是否为错误流 (StdErr)</summary>
|
||||
public bool IsError { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 进程状态变更事件参数
|
||||
/// </summary>
|
||||
public class ProcessStateEventArgs : EventArgs
|
||||
{
|
||||
/// <summary>进程唯一标识 (ID)</summary>
|
||||
public string ProcessId { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>来源进程名称</summary>
|
||||
public string ProcessName { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>变更后的新状态</summary>
|
||||
public ProcessStatus State { get; set; }
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 进程生命周期状态枚举
|
||||
/// </summary>
|
||||
public enum ProcessStatus
|
||||
{
|
||||
/// <summary>已停止 (初始状态或用户手动停止)</summary>
|
||||
Stopped,
|
||||
|
||||
/// <summary>启动中 (正在初始化进程对象)</summary>
|
||||
Starting,
|
||||
|
||||
/// <summary>运行中 (PID 已存在)</summary>
|
||||
Running,
|
||||
|
||||
/// <summary>等待重启 (崩溃后的短暂停留,默认3秒)</summary>
|
||||
PendingRestart,
|
||||
|
||||
/// <summary>熔断冷却中 (连续失败多次后的长时间等待,默认30分钟)</summary>
|
||||
CoolingDown
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 资源哨兵检查结果枚举
|
||||
/// </summary>
|
||||
public enum GuardResult
|
||||
{
|
||||
/// <summary>一切正常</summary>
|
||||
Normal,
|
||||
|
||||
/// <summary>警告 (有点问题,建议记录日志或发邮件,但不杀进程)</summary>
|
||||
Warning,
|
||||
|
||||
/// <summary>严重故障 (必须立即重启进程以保护系统)</summary>
|
||||
Critical
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 操作归因:定义是谁/什么触发了这个动作
|
||||
/// <para>用于后续分析是人为操作还是系统自愈</para>
|
||||
/// </summary>
|
||||
public enum LogTrigger
|
||||
{
|
||||
/// <summary>
|
||||
/// 用户手动干预 (UI点击、API调用)
|
||||
/// <para>优先级:最高。通常视为预期内操作。</para>
|
||||
/// </summary>
|
||||
User,
|
||||
|
||||
/// <summary>
|
||||
/// 启动器自愈行为 (崩溃重启、初始化启动、熔断恢复)
|
||||
/// <para>优先级:高。代表系统正在尝试维持服务。</para>
|
||||
/// </summary>
|
||||
System,
|
||||
|
||||
/// <summary>
|
||||
/// 资源哨兵触发 (内存/CPU超限)
|
||||
/// <para>优先级:紧急。代表出现了亚健康状态或强制管控。</para>
|
||||
/// </summary>
|
||||
ResourceGuard,
|
||||
|
||||
/// <summary>
|
||||
/// 定时任务/计划调度
|
||||
/// <para>优先级:中。代表按计划执行的任务。</para>
|
||||
/// </summary>
|
||||
Scheduler
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 核心动作类型
|
||||
/// <para>定义进程生命周期中发生了什么具体的事件</para>
|
||||
/// </summary>
|
||||
public enum LogAction
|
||||
{
|
||||
/// <summary>启动进程</summary>
|
||||
Start,
|
||||
|
||||
/// <summary>停止进程</summary>
|
||||
Stop,
|
||||
|
||||
/// <summary>重启进程</summary>
|
||||
Restart,
|
||||
|
||||
/// <summary>侦测到意外退出</summary>
|
||||
Crash,
|
||||
|
||||
/// <summary>标准输出流 (StdOut) - 通常是程序打印的普通日志</summary>
|
||||
Output,
|
||||
|
||||
/// <summary>标准错误流 (StdErr) - 程序打印的异常或错误</summary>
|
||||
Error,
|
||||
|
||||
/// <summary>触发熔断保护 (停止重试)</summary>
|
||||
CircuitBreak,
|
||||
|
||||
/// <summary>资源检查警告 (如内存超限报警,但不重启)</summary>
|
||||
ResourceCheck
|
||||
}
|
||||
}
|
||||
42
SHH.ProcessLaunchers/ProcessInfoSnapshot.cs
Normal file
42
SHH.ProcessLaunchers/ProcessInfoSnapshot.cs
Normal file
@@ -0,0 +1,42 @@
|
||||
using System;
|
||||
|
||||
namespace SHH.ProcessLaunchers
|
||||
{
|
||||
/// <summary>
|
||||
/// 进程信息快照 (用于 UI 数据绑定)
|
||||
/// </summary>
|
||||
public class ProcessInfoSnapshot
|
||||
{
|
||||
/// <summary>唯一标识 (例如: "Streamer_01")</summary>
|
||||
public string Id { get; set; } = string.Empty;
|
||||
|
||||
// <summary>
|
||||
/// 通用名称/类别 (例如: "视频取流服务")
|
||||
/// <para>用于 UI 分组或显示图标</para>
|
||||
public string DisplayName { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>详细描述 (例如: "西门 1 号机位")</summary>
|
||||
public string Description { get; set; } = string.Empty;
|
||||
|
||||
/// <summary>操作系统进程 ID (运行中才有)</summary>
|
||||
public int? Pid { get; set; }
|
||||
|
||||
/// <summary>当前生命周期状态</summary>
|
||||
public ProcessStatus Status { get; set; }
|
||||
|
||||
/// <summary>最近一次启动时间</summary>
|
||||
public DateTime? LastStartTime { get; set; }
|
||||
|
||||
/// <summary>最近一次退出时间</summary>
|
||||
public DateTime? LastExitTime { get; set; }
|
||||
|
||||
/// <summary>当前连续失败次数 (用于熔断判定)</summary>
|
||||
public int ConsecutiveFailures { get; set; }
|
||||
|
||||
/// <summary>预计下次尝试启动的时间 (用于 UI 显示倒计时)</summary>
|
||||
public DateTime? NextRetryTime { get; set; }
|
||||
|
||||
/// <summary>附加状态信息 (如熔断倒计时文本)</summary>
|
||||
public string Message { get; set; } = string.Empty;
|
||||
}
|
||||
}
|
||||
254
SHH.ProcessLaunchers/ProcessManager.cs
Normal file
254
SHH.ProcessLaunchers/ProcessManager.cs
Normal file
@@ -0,0 +1,254 @@
|
||||
using System;
|
||||
using System.Collections.Concurrent;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace SHH.ProcessLaunchers
|
||||
{
|
||||
/// <summary>
|
||||
/// 进程管理器 (核心实现类)
|
||||
/// <para>核心职责:作为对外统一入口 (Facade),维护所有受管进程的容器。</para>
|
||||
/// <para>主要功能:负责路由外部指令(启动/停止)到具体的进程实例,并处理事件分发。</para>
|
||||
/// </summary>
|
||||
public class ProcessManager : IProcessManager, IDisposable
|
||||
{
|
||||
#region --- 1. 字段与事件 (Fields & Events) ---
|
||||
|
||||
/// <summary>
|
||||
/// 线程安全的进程容器
|
||||
/// <para>Key: ProcessConfig.Id (唯一标识)</para>
|
||||
/// <para>Value: ManagedProcess (受管实例)</para>
|
||||
/// </summary>
|
||||
private readonly ConcurrentDictionary<string, ManagedProcess> _processes
|
||||
= new ConcurrentDictionary<string, ManagedProcess>();
|
||||
|
||||
/// <summary>
|
||||
/// 日志服务接口 (依赖注入)
|
||||
/// </summary>
|
||||
private readonly ILauncherLogger _logger;
|
||||
|
||||
// ---------------------------------------------------------
|
||||
// 对外暴露的事件定义
|
||||
// ---------------------------------------------------------
|
||||
|
||||
/// <summary>
|
||||
/// 对外事件:当接收到任意子进程的标准输出/错误流时触发
|
||||
/// </summary>
|
||||
public event EventHandler<ProcessOutputEventArgs> OnOutputReceived;
|
||||
|
||||
/// <summary>
|
||||
/// 对外事件:当任意子进程的状态发生变更时触发
|
||||
/// </summary>
|
||||
public event EventHandler<ProcessStateEventArgs> OnStateChanged;
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 2. 构造与析构 (Constructor & Dispose) ---
|
||||
|
||||
/// <summary>
|
||||
/// 初始化进程管理器实例
|
||||
/// </summary>
|
||||
/// <param name="logger">日志实现类 (若外部未传入,则内部自动使用 NullLogger 以防止空引用异常)</param>
|
||||
public ProcessManager(ILauncherLogger logger = null)
|
||||
{
|
||||
// 规范化:使用空合并运算符确保 _logger 永不为 null
|
||||
_logger = logger ?? new NullLogger();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 销毁资源,停止所有进程并清理事件订阅
|
||||
/// </summary>
|
||||
public void Dispose()
|
||||
{
|
||||
// 1. 停止所有子进程 (触发 Kill 操作,清理进程树)
|
||||
StopAll();
|
||||
|
||||
// 2. 清空内部容器引用
|
||||
_processes.Clear();
|
||||
|
||||
// 3. 移除所有外部事件订阅,防止 UI 端因未解绑而导致的内存泄露
|
||||
OnOutputReceived = null;
|
||||
OnStateChanged = null;
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 3. 公共 API 实现 (Public Methods) ---
|
||||
|
||||
/// <summary>
|
||||
/// 注册一个新的进程配置到管理器中
|
||||
/// </summary>
|
||||
/// <param name="config">进程配置对象 (包含 Exe路径、参数、熔断策略等)</param>
|
||||
/// <exception cref="ArgumentException">当 Id 为空时抛出</exception>
|
||||
/// <exception cref="InvalidOperationException">当 Id 已存在时抛出</exception>
|
||||
public void Register(ProcessConfig config)
|
||||
{
|
||||
// 1. 基础参数校验:确保 Id 存在
|
||||
if (string.IsNullOrWhiteSpace(config.Id))
|
||||
throw new ArgumentException("进程配置无效:必须包含唯一的 Id");
|
||||
|
||||
// 2. 防重复注册校验:确保字典中没有相同的 Key
|
||||
if (_processes.ContainsKey(config.Id))
|
||||
throw new InvalidOperationException($"进程 Id '{config.Id}' 已存在,禁止重复注册。");
|
||||
|
||||
// 3. 实例化受管进程对象 (传入 this 指针是为了后续回调 DispatchXXX 方法)
|
||||
var process = new ManagedProcess(config, this, _logger);
|
||||
|
||||
// 4. 加入线程安全字典
|
||||
if (_processes.TryAdd(config.Id, process))
|
||||
{
|
||||
_logger.LogLifecycle(config.Id, LogAction.Output, LogTrigger.System,
|
||||
$"进程配置已注册: {config.DisplayName}");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 启动指定 ID 的进程
|
||||
/// </summary>
|
||||
/// <param name="id">进程的唯一标识符 (ProcessConfig.Id)</param>
|
||||
public void Start(string id)
|
||||
{
|
||||
// 尝试获取指定 ID 的进程实例
|
||||
if (_processes.TryGetValue(id, out var p))
|
||||
{
|
||||
// 调用内部实例的启动逻辑,操作归因标记为"User" (用户手动)
|
||||
p.ExecuteStart(LogTrigger.User, "用户手动启动指令");
|
||||
}
|
||||
else
|
||||
{
|
||||
// 如果找不到,记录错误日志
|
||||
_logger.LogLifecycle(id, LogAction.Error, LogTrigger.User, "启动失败:未找到指定 ID 的进程配置");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// [异步] 有序批量启动所有进程
|
||||
/// <para>按照 StartupOrder 从小到大排序启动,并支持启动间隙延时 (PostStartupDelayMs)。</para>
|
||||
/// </summary>
|
||||
/// <returns>异步任务</returns>
|
||||
public async Task StartAllAsync()
|
||||
{
|
||||
_logger.LogLifecycle("ALL", LogAction.Start, LogTrigger.User, "执行有序批量启动");
|
||||
|
||||
// 1. 数据准备:从字典取出所有进程,并按配置进行排序
|
||||
// 排序规则:StartupOrder (小->大) -> Id (字母序) 以保证启动顺序的确定性
|
||||
var sortedList = _processes.Values
|
||||
.OrderBy(p => p.Config.StartupOrder) // 按用户指定的权重排
|
||||
.ThenBy(p => p.Config.Id) // 权重一样时按 ID 排
|
||||
.ToList();
|
||||
|
||||
// 2. 顺序执行启动循环
|
||||
foreach (var p in sortedList)
|
||||
{
|
||||
// 同步调用启动指令(注意:这里不等待进程完全 Ready,只负责拉起进程)
|
||||
p.ExecuteStart(LogTrigger.User, "有序批量启动");
|
||||
|
||||
// 3. 处理启动间隙延迟 (错峰启动)
|
||||
// 作用:防止多个重型进程同时启动导致 CPU/IO 瞬间拥堵
|
||||
int delay = p.Config.PostStartupDelayMs;
|
||||
if (delay > 0)
|
||||
{
|
||||
// 异步等待指定毫秒数,释放线程控制权
|
||||
await Task.Delay(delay);
|
||||
}
|
||||
}
|
||||
|
||||
_logger.LogLifecycle("ALL", LogAction.Start, LogTrigger.User, "有序批量启动完成");
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 停止指定 ID 的进程
|
||||
/// </summary>
|
||||
/// <param name="id">进程的唯一标识符</param>
|
||||
public void Stop(string id)
|
||||
{
|
||||
if (_processes.TryGetValue(id, out var p))
|
||||
{
|
||||
p.ExecuteStop(LogTrigger.User, "用户手动停止指令");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 批量停止所有进程 (并发执行)
|
||||
/// </summary>
|
||||
public void StopAll()
|
||||
{
|
||||
_logger.LogLifecycle("ALL", LogAction.Stop, LogTrigger.User, "执行批量停止");
|
||||
|
||||
// 遍历所有进程,使用 Task.Run 并发执行停止,提高效率,无需等待
|
||||
foreach (var p in _processes.Values)
|
||||
{
|
||||
Task.Run(() => p.ExecuteStop(LogTrigger.User, "批量停止"));
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 重置/复位指定进程的资源报警状态
|
||||
/// <para>当用户在 UI 上点击"已处置"后调用此方法,解除报警锁定。</para>
|
||||
/// </summary>
|
||||
/// <param name="id">进程的唯一标识符</param>
|
||||
public void ResetGuard(string id)
|
||||
{
|
||||
if (_processes.TryGetValue(id, out var p))
|
||||
{
|
||||
// 调用内部复位逻辑,清除报警锁定状态
|
||||
p.ResetGuards();
|
||||
_logger.LogLifecycle(id, LogAction.ResourceCheck, LogTrigger.User, "用户手动复位资源报警锁");
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 获取当前所有进程的实时状态快照
|
||||
/// <para>用于 UI 列表的数据绑定或定时刷新。</para>
|
||||
/// </summary>
|
||||
/// <returns>进程信息快照列表</returns>
|
||||
public List<ProcessInfoSnapshot> GetSnapshot()
|
||||
{
|
||||
// 将字典中的所有受管对象转为 DTO 快照列表
|
||||
return _processes.Values.Select(p => p.GetSnapshot()).ToList();
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region --- 4. 内部事件分发 (Internal Dispatchers) ---
|
||||
|
||||
// 说明:C# 的 event 只能在定义类内部 Invoke。
|
||||
// 为了让内部类 ManagedProcess 也能触发 Manager 的对外事件,我们提供了这几个 internal 方法。
|
||||
// 这些方法充当了内部类与外部事件之间的桥梁。
|
||||
|
||||
/// <summary>
|
||||
/// 分发状态变更事件 (供 ManagedProcess 内部调用)
|
||||
/// </summary>
|
||||
/// <param name="processId">进程 ID</param>
|
||||
/// <param name="newState">新的状态</param>
|
||||
internal void DispatchStateChange(string processId, ProcessStatus newState)
|
||||
{
|
||||
// 线程安全地触发事件
|
||||
OnStateChanged?.Invoke(this, new ProcessStateEventArgs
|
||||
{
|
||||
ProcessId = processId,
|
||||
State = newState
|
||||
});
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// 分发日志输出事件 (供 ManagedProcess 内部调用)
|
||||
/// </summary>
|
||||
/// <param name="processId">进程 ID</param>
|
||||
/// <param name="content">日志内容</param>
|
||||
/// <param name="isError">是否为错误流</param>
|
||||
internal void DispatchOutput(string processId, string content, bool isError)
|
||||
{
|
||||
// 线程安全地触发事件
|
||||
OnOutputReceived?.Invoke(this, new ProcessOutputEventArgs
|
||||
{
|
||||
ProcessId = processId,
|
||||
Content = content,
|
||||
IsError = isError
|
||||
});
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
7
SHH.ProcessLaunchers/SHH.ProcessLaunchers.csproj
Normal file
7
SHH.ProcessLaunchers/SHH.ProcessLaunchers.csproj
Normal file
@@ -0,0 +1,7 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>netstandard2.0</TargetFramework>
|
||||
</PropertyGroup>
|
||||
|
||||
</Project>
|
||||
Reference in New Issue
Block a user