183 lines
6.7 KiB
C#
183 lines
6.7 KiB
C#
using System.Collections.Concurrent;
|
||
using System.Diagnostics;
|
||
using System.Drawing;
|
||
using System.Net.NetworkInformation;
|
||
|
||
namespace SHH.CameraSdk;
|
||
|
||
/// <summary>
|
||
/// [状态代理] 网络连通性哨兵
|
||
/// 特性:
|
||
/// 1. 低耦合:不依赖具体驱动,只依赖接口
|
||
/// 2. 高性能:使用 Parallel.ForEachAsync 实现受控并行
|
||
/// 3. 智能策略:播放中不Ping,空闲时才Ping
|
||
/// 4. 稳定性:基于“持续断联时间”判定离线,防止网络瞬抖
|
||
/// </summary>
|
||
public class ConnectivitySentinel
|
||
{
|
||
private readonly CameraManager _manager; //
|
||
private readonly PeriodicTimer _timer;
|
||
private readonly CancellationTokenSource _cts = new();
|
||
|
||
// [关键] 状态缓存:用于“去重”上报
|
||
private readonly ConcurrentDictionary<long, bool> _lastStates = new();
|
||
|
||
// [新增] 故障计时器:记录设备“首次探测失败”的时间点
|
||
// Key: DeviceId, Value: 首次失败时间
|
||
private readonly ConcurrentDictionary<long, DateTime> _failureStartTimes = new();
|
||
|
||
// [关键配置] 最大并发度
|
||
private const int MAX_PARALLELISM = 16;
|
||
|
||
// [配置] 判定离线的持续时间阈值 (秒)
|
||
// 只有连续 Ping 不通超过 30秒,才认定为断线
|
||
private const int OFFLINE_DURATION_THRESHOLD = 30;
|
||
|
||
// [配置] 单次 Ping 的超时时间 (毫秒)
|
||
// 设为 1000ms,保证一轮检查快速结束,不依赖 Ping 的默认 5秒 超时
|
||
private const int PING_TIMEOUT = 1000;
|
||
|
||
public ConnectivitySentinel(CameraManager manager)
|
||
{
|
||
_manager = manager;
|
||
// 每 3 秒执行一轮全量巡检
|
||
_timer = new PeriodicTimer(TimeSpan.FromSeconds(3));
|
||
|
||
// 启动后台任务(不阻塞主线程)
|
||
_ = RunLoopAsync();
|
||
}
|
||
|
||
private async Task RunLoopAsync()
|
||
{
|
||
try
|
||
{
|
||
// 等待下一个 3秒 周期
|
||
while (await _timer.WaitForNextTickAsync(_cts.Token))
|
||
{
|
||
// 1. 获取当前所有设备的快照
|
||
var devices = _manager.GetAllDevices().Cast<IDeviceConnectivity>();
|
||
|
||
// 2. [核心回答] 受控并行执行
|
||
await Parallel.ForEachAsync(devices, new ParallelOptions
|
||
{
|
||
MaxDegreeOfParallelism = MAX_PARALLELISM,
|
||
CancellationToken = _cts.Token
|
||
},
|
||
async (device, token) =>
|
||
{
|
||
// 对每个设备执行独立检查
|
||
await CheckSingleDeviceAsync(device);
|
||
});
|
||
}
|
||
}
|
||
catch (OperationCanceledException) { /* 正常停止 */ }
|
||
}
|
||
|
||
private async Task CheckSingleDeviceAsync(IDeviceConnectivity device)
|
||
{
|
||
// 1. 获取“瞬时”连通性 (Raw Status)
|
||
bool isResponsive = false;
|
||
|
||
// [智能策略]:如果设备正在取流,优先检查帧心跳
|
||
if (device.Status == VideoSourceStatus.Playing || device.Status == VideoSourceStatus.Streaming)
|
||
{
|
||
long now = Environment.TickCount64;
|
||
// 5秒内有帧,就算瞬时在线
|
||
isResponsive = (now - device.LastFrameTick) < 5000;
|
||
|
||
// [双重保障] 如果帧心跳断了,立即 Ping 确认,防止只是解码卡死而非断网
|
||
if (!isResponsive)
|
||
{
|
||
isResponsive = await PingAsync(device.IpAddress);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
// [主动探测]:空闲或离线时,发射 ICMP Ping
|
||
isResponsive = await PingAsync(device.IpAddress);
|
||
}
|
||
|
||
// 2. [核心逻辑] 基于持续时间的稳定性判定 (Stable Status)
|
||
bool isLogicallyOnline;
|
||
|
||
if (isResponsive)
|
||
{
|
||
// --- 情况 A: 瞬时探测通了 ---
|
||
// 只要通一次,立即清除故障计时,认为设备在线
|
||
_failureStartTimes.TryRemove(device.Id, out _);
|
||
isLogicallyOnline = true;
|
||
}
|
||
else
|
||
{
|
||
// --- 情况 B: 瞬时探测失败 ---
|
||
// 记录或获取“首次失败时间”
|
||
var nowTime = DateTime.Now;
|
||
var firstFailureTime = _failureStartTimes.GetOrAdd(device.Id, nowTime);
|
||
|
||
// 计算已经持续失败了多久
|
||
var failureDuration = (nowTime - firstFailureTime).TotalSeconds;
|
||
|
||
if (failureDuration >= OFFLINE_DURATION_THRESHOLD)
|
||
{
|
||
// 只有持续失败超过 30秒,才“真的”判定为离线
|
||
isLogicallyOnline = false;
|
||
}
|
||
else
|
||
{
|
||
// 还没到 30秒,处于“抖动观察期”
|
||
// 策略:维持上一次的已知状态(如果之前是在线,就假装还在线;之前是离线,就继续离线)
|
||
// 这样可以防止网络微小抖动导致的 Status 频繁跳变
|
||
isLogicallyOnline = _lastStates.TryGetValue(device.Id, out bool last) ? last : true;
|
||
|
||
// 调试日志 (可选)
|
||
// Console.WriteLine($"[Sentinel] 设备 {device.Id} 瞬时异常,观察中: {failureDuration:F1}s / {OFFLINE_DURATION_THRESHOLD}s");
|
||
}
|
||
}
|
||
|
||
// [状态注入]:将经过时间滤波后的“稳定状态”注入回设备
|
||
device.SetNetworkStatus(isLogicallyOnline);
|
||
|
||
// 3. [状态去重与上报]
|
||
// 获取上一次上报的状态,默认为反状态以触发首次上报
|
||
bool lastReported = _lastStates.TryGetValue(device.Id, out bool val) ? val : !isLogicallyOnline;
|
||
|
||
if (lastReported != isLogicallyOnline)
|
||
{
|
||
// 记录新状态
|
||
_lastStates[device.Id] = isLogicallyOnline;
|
||
|
||
// 构造原因描述
|
||
string reason = isLogicallyOnline
|
||
? "网络探测恢复"
|
||
: $"持续断连超过{OFFLINE_DURATION_THRESHOLD}秒";
|
||
|
||
// ★★★ 核心动作:通知 Manager ★★★
|
||
_manager.NotifyStatusChange(device.Id, isLogicallyOnline, reason);
|
||
}
|
||
}
|
||
|
||
// 纯粹的 Ping 逻辑
|
||
private async Task<bool> PingAsync(string ip)
|
||
{
|
||
try
|
||
{
|
||
using var ping = new Ping();
|
||
// [修改] 超时设为 1000ms (1秒)
|
||
// 理由:我们要快速探测,不要等待 5秒。
|
||
// 即使 Ping 因为网络延迟用了 4秒 才返回,Ping 类也会在 1秒 时抛出超时,
|
||
// 这会被视为一次“瞬时失败”,然后由外层的 30秒 时间窗口来容错。
|
||
var reply = await ping.SendPingAsync(ip, PING_TIMEOUT);
|
||
return reply.Status == IPStatus.Success;
|
||
}
|
||
catch
|
||
{
|
||
return false;
|
||
}
|
||
}
|
||
|
||
public void Stop()
|
||
{
|
||
_cts.Cancel();
|
||
_timer.Dispose();
|
||
}
|
||
} |