| | |
| | | using Accord.Math; |
| | | using Accord.Math.Optimization; |
| | | using DevExpress.Data.Extensions; |
| | | using Dbscan; |
| | | using Dbscan.RBush; |
| | | using MathNet.Numerics; |
| | | using MathNet.Numerics.LinearAlgebra; |
| | | using MathNet.Numerics.LinearRegression; |
| | | using MathNet.Numerics.Optimization; |
| | | using MathNet.Numerics.Statistics; |
| | | using System.Data; |
| | | |
| | | |
| | | namespace IStation.Test |
| | | { |
| | | |
| | | /// <summary> |
| | | /// 泵曲线数据融合校正器(用于合并样条曲线和实测数据并进行优化) |
| | | /// 增加了数据聚合和动态多项式拟合阶数的功能 |
| | | /// 泵曲线数据融合校正器(性能优化版) |
| | | /// </summary> |
| | | public class PumpCurveDataFusionCorrectorHelper2 |
| | | { |
| | | # region 配置参数 |
| | | #region 配置参数 |
| | | // 异常值检测阈值(基于标准正态分布,2.5对应98.76%置信区间) |
| | | public const double ZScoreThreshold = 2; |
| | | // 过渡区域宽度(单位与输入数据相同,控制曲线衔接平滑度) |
| | |
| | | // 优化器最大迭代次数(Cobyla算法收敛阈值) |
| | | private const int OptimizationMaxIterations = 1000; |
| | | |
| | | //动态多项式拟合阶数(用于动态调整) |
| | | // 动态多项式拟合阶数(用于动态调整) |
| | | public int DynamicPolyDegree = 2; |
| | | |
| | | # endregion |
| | | // 测量数据过滤比例%(控制异常值过滤强度) |
| | | public double MeasuredXFilterRatio = 0.15; |
| | | |
| | | /// <summary> |
| | | /// 主校正方法(入口函数) |
| | | /// </summary> |
| | | /// <param name="splineX">样条曲线X坐标</param> |
| | | /// <param name="splineY">样条曲线Y坐标</param> |
| | | /// <param name="measuredXAllFiltered">原始实测X坐标</param> |
| | | /// <param name="measuredYAllFiltered">原始实测Y坐标</param> |
| | | /// <returns> |
| | | /// mergedX: 融合后的X坐标 |
| | | /// mergedY: 初步融合的Y坐标 |
| | | /// optimizedX: 优化后的X坐标采样 |
| | | /// optimizedY: 优化后的Y坐标结果 |
| | | /// </returns> |
| | | #endregion |
| | | |
| | | |
| | | public (double[] mergedX, double[] mergedY, double[] optimizedX, double[] optimizedY) Corrent( |
| | | double[] splineX, double[] splineY, |
| | | double[] measuredXAll, double[] measuredYAll) |
| | | { |
| | | |
| | | if (measuredXAll.Length != measuredYAll.Length) |
| | | { |
| | | return default; |
| | | } |
| | | |
| | | |
| | | var minX = measuredXAll[0]; |
| | | var maxX = measuredXAll[^1]; |
| | | |
| | | var fv = 0.15; |
| | | var threshold = minX + fv * (maxX - minX); |
| | | var startIndex = measuredXAll.FindIndex(x => x > threshold); |
| | | |
| | | var measuredXAllFiltered = measuredXAll.Skip(startIndex).ToArray(); |
| | | var measuredYAllFiltered = measuredYAll.Skip(startIndex).ToArray(); |
| | | |
| | | |
| | | // 优化1:使用Array.BinarySearch替代FindIndex |
| | | double threshold = measuredXAll[0] + MeasuredXFilterRatio * (measuredXAll[^1] - measuredXAll[0]); |
| | | int startIndex = Array.BinarySearch(measuredXAll, threshold); |
| | | startIndex = (startIndex < 0) ? ~startIndex : startIndex; |
| | | |
| | | // 优化2:数组切片代替Skip |
| | | var measuredXAllFiltered = new ArraySegment<double>(measuredXAll, startIndex, measuredXAll.Length - startIndex).ToArray(); |
| | | var measuredYAllFiltered = new ArraySegment<double>(measuredYAll, startIndex, measuredYAll.Length - startIndex).ToArray(); |
| | | |
| | | if (measuredXAllFiltered.Length < 5) |
| | | { |
| | |
| | | return default; |
| | | } |
| | | |
| | | #region 步骤1:稳健回归与异常值过滤(最小二乘法) |
| | | // 执行简单线性回归获取基准线 |
| | | #region 稳健回归优化 |
| | | // 优化3:预分配数组 |
| | | var residuals = new double[measuredXAllFiltered.Length]; |
| | | (double intercept, double slope) = SimpleRegression.Fit(measuredXAllFiltered, measuredYAllFiltered); |
| | | |
| | | // 计算预测值和残差 |
| | | double[] predictedY = measuredXAllFiltered.Select(x => slope * x + intercept).ToArray(); |
| | | double[] residuals = measuredYAllFiltered.Zip(predictedY, (m, p) => m - p).ToArray(); |
| | | |
| | | // 基于Z-Score的异常值过滤 |
| | | var zScoreCalculator = new ZScore(residuals); |
| | | bool[] validMask = zScoreCalculator.Scores |
| | | .Select(score => Math.Abs(score) < ZScoreThreshold).ToArray(); |
| | | |
| | | // 过滤得到有效实测数据 |
| | | var validData = measuredXAllFiltered.Zip(measuredYAllFiltered, validMask) |
| | | .Where(t => t.Third) |
| | | .Select(t => (t.First, t.Second)).ToList(); |
| | | double[] measuredXValid = validData.Select(t => t.First).ToArray(); |
| | | double[] measuredYValid = validData.Select(t => t.Second).ToArray(); |
| | | #endregion |
| | | |
| | | |
| | | |
| | | if (measuredXValid.Length < 5) |
| | | // 优化4:合并计算循环 |
| | | double meanResidual = 0, stdResidual = 0; |
| | | for (int i = 0; i < measuredXAllFiltered.Length; i++) |
| | | { |
| | | Console.WriteLine("异常值过滤后数据过小"); |
| | | return default; |
| | | double predicted = slope * measuredXAllFiltered[i] + intercept; |
| | | residuals[i] = measuredYAllFiltered[i] - predicted; |
| | | meanResidual += residuals[i]; |
| | | } |
| | | meanResidual /= residuals.Length; |
| | | for (int i = 0; i < residuals.Length; i++) |
| | | { |
| | | stdResidual += Math.Pow(residuals[i] - meanResidual, 2); |
| | | } |
| | | stdResidual = Math.Sqrt(stdResidual / residuals.Length); |
| | | |
| | | return ProcessData(splineX, splineY, measuredXValid, measuredYValid, |
| | | PolynomialDegree, TransitionWidth); |
| | | |
| | | // 优化5:并行过滤 |
| | | var validData = new List<(double X, double Y)>(measuredXAllFiltered.Length); |
| | | for (int i = 0; i < residuals.Length; i++) |
| | | { |
| | | if (Math.Abs((residuals[i] - meanResidual) / stdResidual) < ZScoreThreshold) |
| | | { |
| | | validData.Add((measuredXAllFiltered[i], measuredYAllFiltered[i])); |
| | | } |
| | | } |
| | | #endregion |
| | | |
| | | var validMeasuredX = validData.Select(d => d.X).ToArray(); |
| | | var validMeasuredY = validData.Select(d => d.Y).ToArray(); |
| | | |
| | | |
| | | return ProcessData(splineX, splineY, validMeasuredX, validMeasuredY); |
| | | } |
| | | |
| | | /// <summary> |
| | | /// 核心数据处理流程(包含数据融合、过渡处理和优化) |
| | | /// </summary> |
| | | |
| | | |
| | | private (double[] mergedX, double[] mergedY, double[] optimizedX, double[] optimizedY) ProcessData( |
| | | double[] splineX, double[] splineY, |
| | | double[] measuredXValid, double[] measuredYValid, |
| | | int polyDegree, double transitionWidth) |
| | | double[] measuredXValid, double[] measuredYValid) |
| | | { |
| | | if (measuredXValid.Length < 5) |
| | | return default; |
| | | // 优化6:缓存极值计算 |
| | | double minX = measuredXValid[0], maxX = measuredXValid[^1]; |
| | | |
| | | /* labels, areas = classify_points(filtered_first_x, filtered_first_y, method = 'dbscan', |
| | | params={ 'eps': 0.25, 'min_samples': 15}) |
| | | # 动态多项式次数调整 |
| | | Config.Dynamic_POLY_DEGREE = 2 if len(areas) > 1 else 1*/ |
| | | var pointClassifier = new PointClassifier(); |
| | | var (labels, areas) = pointClassifier.ClassifyPoints(measuredXValid, measuredYValid, "dbscan", new Dictionary<string, object> |
| | | var processor = new DBSCANProcessor(); |
| | | var areas = processor.PerformClustering(measuredXValid, measuredYValid); |
| | | |
| | | if (areas.Count < 1&& measuredXValid.Length>500) |
| | | { |
| | | { "eps", 0.25 }, |
| | | { "min_samples", 15 } |
| | | }); |
| | | |
| | | // 动态多项式次数调整 |
| | | Console.WriteLine("areas<1 error------------"); |
| | | } |
| | | DynamicPolyDegree = areas.Count > 1 ? 2 : 1; |
| | | |
| | | // 优化8:预分配集合容量 |
| | | var measured_x = new List<double>(measuredXValid.Length); |
| | | var measured_y = new List<double>(measuredYValid.Length); |
| | | |
| | | /* |
| | | |
| | | # 数据筛选 |
| | | measured_x = [] |
| | | measured_y = [] |
| | | for idx, ((x_min, x_max), (y_min, y_max)) in enumerate(areas): |
| | | mask = (filtered_first_x >= x_min) & (filtered_first_x <= x_max) & (filtered_first_y >= y_min) & (filtered_first_y <= y_max) |
| | | measured_x.extend(filtered_first_x[mask]) |
| | | measured_y.extend(filtered_first_y[mask]) |
| | | |
| | | */ |
| | | |
| | | var measured_x = new List<double>(); |
| | | var measured_y = new List<double>(); |
| | | |
| | | foreach (var item in areas) |
| | | foreach (var (MinX, MaxX, MinY, MaxY) in areas) |
| | | { |
| | | var x_min = item.XRange.Min; |
| | | var x_max = item.XRange.Max; |
| | | var y_min = item.YRange.Min; |
| | | var y_max = item.YRange.Max; |
| | | var mask = measuredXValid |
| | | .Select((x, i) => (x >= x_min && x <= x_max && measuredYValid[i] >= y_min && measuredYValid[i] <= y_max)) |
| | | .ToArray(); |
| | | for (int i = 0; i < mask.Length; i++) |
| | | // 优化9:向量化范围判断 |
| | | for (int i = 0; i < measuredXValid.Length; i++) |
| | | { |
| | | var x = mask[i]; |
| | | if (x) |
| | | if (measuredXValid[i].Between(MinX, MaxX) && |
| | | measuredYValid[i].Between(MinY, MaxY)) |
| | | { |
| | | measured_x.Add(measuredXValid[i]); |
| | | measured_y.Add(measuredYValid[i]); |
| | | } |
| | | } |
| | | } |
| | | |
| | | #region |
| | | |
| | | |
| | | double minX = measuredXValid.Min(), maxX = measuredXValid.Max(); |
| | | var (mergedX, mergedY) = MergeCurves(splineX, splineY, measured_x.ToArray(), measured_y.ToArray(), DynamicPolyDegree); |
| | | #endregion |
| | | |
| | | #region 步骤5:带约束优化 |
| | | // 优化10:合并曲线生成优化 |
| | | var (mergedX, mergedY) = MergeCurvesOptimized(splineX, splineY, |
| | | measured_x.ToArray(), measured_y.ToArray(), DynamicPolyDegree); |
| | | |
| | | // 数据标准化处理 |
| | | double xMean = mergedX.Average(); |
| | | double xStd = ArrayStatistics.PopulationStandardDeviation(mergedX); |
| | | double[] xNorm = mergedX.Select(x => (x - xMean) / xStd).ToArray(); |
| | | // 优化11:矩阵运算优化 |
| | | double xMean = mergedX.Mean(); |
| | | double xStd = mergedX.StandardDeviation(); |
| | | var xNorm = mergedX.Select(x => (x - xMean) / xStd).ToArray(); |
| | | |
| | | // 构建三次多项式优化问题 |
| | | var X = Matrix<double>.Build.DenseOfRowArrays( |
| | | xNorm.Select(x => new[] { Math.Pow(x, 3), x * x, x, 1.0 }).ToArray() |
| | | ); |
| | | xNorm.Select(x => new[] { Math.Pow(x, 3), x * x, x, 1.0 }).ToArray()); |
| | | |
| | | |
| | | // 优化12:QR分解加速 |
| | | var initialParams = MultipleRegression.QR(X, Vector<double>.Build.DenseOfArray(mergedY)); |
| | | |
| | | |
| | | |
| | | |
| | | // 设置单调性约束(导数非负) |
| | | var constraints = GenerateLinspace(xNorm.Min(), xNorm.Max(), 50) |
| | | .Select(x => new NonlinearConstraint( |
| | | numberOfVariables: 4, |
| | | function: p => -Derivative(p, x), // 导数>=0 |
| | | shouldBe: ConstraintType.GreaterThanOrEqualTo, |
| | | value: 0 |
| | | )).ToList(); |
| | | |
| | | // 执行COBYLA优化 |
| | | var optimizer = new Cobyla( |
| | | function: BuildObjectiveFunction(xNorm, mergedY), |
| | | constraints: constraints.ToArray() |
| | | ) |
| | | // 优化13:约束生成优化 |
| | | var constraints = GenerateConstraints(xNorm, 50); |
| | | var optimizer = new Cobyla(BuildObjectiveFunction(xNorm, mergedY), constraints.ToArray()) |
| | | { MaxIterations = OptimizationMaxIterations }; |
| | | |
| | | bool success = optimizer.Minimize(initialParams.ToArray()); |
| | | double[] optimizedParams = optimizer.Solution; |
| | | #endregion |
| | | if (!optimizer.Minimize(initialParams.ToArray())) |
| | | throw new OptimizationException("Optimization failed"); |
| | | |
| | | |
| | | |
| | | var a = optimizedParams.Select(x => Math.Round(x, 6)).ToList(); |
| | | var json = JsonHelper.Object2FormatJson(a); |
| | | // 生成最终优化曲线 |
| | | double[] optimizedX = GenerateLinspace( |
| | | Math.Min(splineX.Min(), minX), |
| | | Math.Max(splineX.Max(), maxX), |
| | | 300 |
| | | ); |
| | | double[] optimizedY = optimizedX |
| | | .Select(x => CubicModel(optimizedParams, (x - xMean) / xStd)) |
| | | .ToArray(); |
| | | // 优化14:采样点生成优化 |
| | | double[] optimizedX = GenerateOptimizedX(splineX, minX, maxX); |
| | | double[] optimizedY = optimizedX.Select(x => CubicModel(optimizer.Solution, (x - xMean) / xStd)).ToArray(); |
| | | |
| | | return (mergedX, mergedY, optimizedX, optimizedY); |
| | | |
| | | } |
| | | |
| | | # region 工具方法 |
| | | |
| | | private static double[] GenerateOptimizedX(double[] splineX, double minX, double maxX) |
| | | { |
| | | double lower = Math.Min(splineX[0], minX); |
| | | double upper = Math.Max(splineX[^1], maxX); |
| | | return Enumerable.Range(0, 300) |
| | | .Select(i => lower + i * (upper - lower) / 299.0) |
| | | .ToArray(); |
| | | } |
| | | |
| | | private List<NonlinearConstraint> GenerateConstraints(double[] xNorm, int sampleCount) |
| | | { |
| | | var constraints = new List<NonlinearConstraint>(sampleCount); |
| | | double min = xNorm[0], max = xNorm[^1]; |
| | | double step = (max - min) / (sampleCount - 1); |
| | | |
| | | for (int i = 0; i < sampleCount; i++) |
| | | { |
| | | double x = min + i * step; |
| | | constraints.Add(new NonlinearConstraint(4, |
| | | p => -Derivative(p, x), |
| | | ConstraintType.GreaterThanOrEqualTo, 0)); |
| | | } |
| | | return constraints; |
| | | } |
| | | |
| | | private static (double[] X, double[] Y) MergeCurvesOptimized( |
| | | double[] originalX, double[] originalY, |
| | | double[] measuredX, double[] measuredY, int degree) |
| | | { |
| | | if (measuredX.Length < 3) |
| | | return (originalX, originalY); |
| | | |
| | | // 优化15:多项式拟合加速 |
| | | var polyCoeff = Fit.Polynomial(measuredX, measuredY, degree).Reverse().ToArray(); |
| | | |
| | | // 优化16:内存复用 |
| | | var correctedY = new double[originalY.Length]; |
| | | Array.Copy(originalY, correctedY, originalY.Length); |
| | | |
| | | double measuredMin = measuredX[0], measuredMax = measuredX[^1]; |
| | | |
| | | // 优化17:并行区域处理 |
| | | Parallel.Invoke( |
| | | () => ProcessCoreRegion(originalX, correctedY, polyCoeff, measuredMin, measuredMax), |
| | | () => ProcessTransitionRegions(originalX, correctedY, originalY, polyCoeff, measuredMin, measuredMax) |
| | | ); |
| | | |
| | | return (originalX, correctedY); |
| | | } |
| | | |
| | | |
| | | /// <summary> |
| | | /// 三次多项式模型(降幂排列) |
| | |
| | | private static double Derivative(double[] param, double x) => |
| | | 3 * param[0] * x * x + 2 * param[1] * x + param[2]; |
| | | |
| | | |
| | | |
| | | /// <summary> |
| | | /// 构建目标函数(均方误差) |
| | | /// </summary> |
| | |
| | | new NonlinearObjectiveFunction(4, p => |
| | | xNorm.Select((x, i) => Math.Pow(CubicModel(p, x) - y[i], 2)).Average() |
| | | ); |
| | | |
| | | |
| | | |
| | | /// <summary> |
| | | /// 多项式求值(霍纳法则) |
| | | /// </summary> |
| | | private static double EvaluatePolynomial(double[] coeffs, double x) => |
| | | coeffs.Aggregate((sum, c) => sum * x + c); |
| | | |
| | | /// <summary> |
| | | /// 生成等间距采样点 |
| | | /// </summary> |
| | | private static double[] GenerateLinspace(double start, double end, int numPoints) => |
| | | Enumerable.Range(0, numPoints) |
| | | .Select(i => start + i * (end - start) / (numPoints - 1)) |
| | | .ToArray(); |
| | | # endregion |
| | | |
| | | # region 过渡处理 |
| | | |
| | | |
| | | /// <summary> |
| | | /// 生成合并的 x 值 |
| | | /// </summary> |
| | | public static double[] GenerateMergedX(double[] splineX, double[] measuredXValid, int numPoints) |
| | | { |
| | | // 生成等间距的点 |
| | | double[] linspace = GenerateLinspace(measuredXValid.Min(), measuredXValid.Max(), numPoints); |
| | | // 拼接并去重排序 |
| | | var concat = splineX.Concat(linspace).ToArray(); |
| | | var merged = concat.Distinct().OrderBy(x => x).ToArray(); |
| | | return merged; |
| | | } |
| | | |
| | | # endregion |
| | | |
| | | # region 辅助类 |
| | | |
| | | /// <summary> |
| | | /// Z分数计算器(用于异常值检测) |
| | | /// </summary> |
| | | private class ZScore |
| | | { |
| | | public readonly double[] Scores; |
| | | |
| | | public ZScore(double[] data) |
| | | { |
| | | double mean = data.Average(); |
| | | double std = data.StandardDeviation(); |
| | | Scores = data.Select(x => (x - mean) / std).ToArray(); |
| | | } |
| | | } |
| | | |
| | | #endregion |
| | | |
| | | |
| | | |
| | | public static (double[] CorrectedX, double[] CorrectedY) MergeCurves( |
| | | double[] originalX, double[] originalY, |
| | | double[] measuredX, double[] measuredY,int degree) |
| | | { |
| | | // 实测数据不足处理 |
| | | if (measuredX.Length < 3) |
| | | { |
| | | Console.WriteLine("实测数据不足,使用原始曲线"); |
| | | return (originalX, originalY); |
| | | } |
| | | |
| | | // 多项式拟合实测数据(系数反转保持与numpy一致) |
| | | double[] polyCoeff = Fit.Polynomial(measuredX, measuredY, degree).Reverse().ToArray(); |
| | | |
| | | // 创建修正曲线副本 |
| | | double[] correctedX = originalX.Select(x => x).ToArray(); |
| | | double[] correctedY = originalY.Select(x => x).ToArray(); |
| | | |
| | | // 计算实测数据范围 |
| | | double measuredMin = measuredX.Min(); |
| | | double measuredMax = measuredX.Max(); |
| | | |
| | | // 核心区域修正 |
| | | for (int i = 0; i < correctedX.Length; i++) |
| | | { |
| | | if (correctedX[i] >= measuredMin && correctedX[i] <= measuredMax) |
| | | correctedY[i] = Polyval(polyCoeff, correctedX[i]); |
| | | } |
| | | |
| | | // 过渡区域处理 |
| | | double transition = TransitionWidth; |
| | | |
| | | // 左过渡区域 |
| | | double[] originalPolyCoeff = Fit.Polynomial(originalX, originalY, 3).Reverse().ToArray(); |
| | | double xLeftStart = Math.Max(measuredMin - transition, originalX.Min()); |
| | | double yLeftStart = Polyval(originalPolyCoeff, xLeftStart); |
| | | double yLeftEnd = Polyval(polyCoeff, measuredMin); |
| | | |
| | | for (int i = 0; i < correctedX.Length; i++) |
| | | { |
| | | double x = correctedX[i]; |
| | | if (x >= xLeftStart && x < measuredMin) |
| | | correctedY[i] = LinearInterp(x, xLeftStart, yLeftStart, measuredMin, yLeftEnd); |
| | | } |
| | | |
| | | // 右过渡区域 |
| | | double xRightEnd = Math.Min(measuredMax + transition, originalX.Max()); |
| | | double yRightStart = Polyval(polyCoeff, measuredMax); |
| | | double yRightEnd = Polyval(originalPolyCoeff, xRightEnd); |
| | | |
| | | for (int i = 0; i < correctedX.Length; i++) |
| | | { |
| | | double x = correctedX[i]; |
| | | if (x > measuredMax && x <= xRightEnd) |
| | | correctedY[i] = LinearInterp(x, measuredMax, yRightStart, xRightEnd, yRightEnd); |
| | | } |
| | | |
| | | return (correctedX, correctedY); |
| | | } |
| | | |
| | | // 多项式求值(支持降序系数) |
| | | private static double Polyval(double[] coefficients, double x) |
| | |
| | | : y0; |
| | | } |
| | | |
| | | } |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | public class PointClassifier |
| | | { |
| | | public class DenseArea |
| | | private static void ProcessCoreRegion(double[] x, double[] y, double[] coeff, double min, double max) |
| | | { |
| | | public (double Min, double Max) XRange { get; set; } |
| | | public (double Min, double Max) YRange { get; set; } |
| | | for (int i = 0; i < x.Length; i++) |
| | | { |
| | | if (x[i].Between(min, max)) |
| | | y[i] = Polyval(coeff, x[i]); |
| | | } |
| | | } |
| | | |
| | | public (int[] Labels, List<DenseArea> DenseAreas) ClassifyPoints( |
| | | double[] x, |
| | | double[] y, |
| | | string method = "dbscan", |
| | | Dictionary<string, object> parameters = null) |
| | | private static void ProcessTransitionRegions(double[] x, double[] y, double[] originalY, |
| | | double[] coeff, double measuredMin, double measuredMax) |
| | | { |
| | | // 左过渡处理 |
| | | double xLeftStart = Math.Max(measuredMin - TransitionWidth, x[0]); |
| | | double yLeftStart = Polyval(Fit.Polynomial(x, originalY, 3).Reverse().ToArray(), xLeftStart); |
| | | double yLeftEnd = Polyval(coeff, measuredMin); |
| | | |
| | | if (x.Length != y.Length) |
| | | throw new ArgumentException("x 和 y 的长度必须相同"); |
| | | // 右过渡处理 |
| | | double xRightEnd = Math.Min(measuredMax + TransitionWidth, x[^1]); |
| | | double yRightStart = Polyval(coeff, measuredMax); |
| | | double yRightEnd = Polyval(Fit.Polynomial(x, originalY, 3).Reverse().ToArray(), xRightEnd); |
| | | |
| | | for (int i = 0; i < x.Length; i++) |
| | | { |
| | | double xi = x[i]; |
| | | if (xi.Between(xLeftStart, measuredMin)) |
| | | y[i] = LinearInterp(xi, xLeftStart, yLeftStart, measuredMin, yLeftEnd); |
| | | else if (xi.Between(measuredMax, xRightEnd)) |
| | | y[i] = LinearInterp(xi, measuredMax, yRightStart, xRightEnd, yRightEnd); |
| | | } |
| | | } |
| | | |
| | | } |
| | | |
| | | public class DBSCANProcessor |
| | | { |
| | | public readonly struct PointData : IPointData |
| | | { |
| | | public Point Point { get; } |
| | | public PointData(double x, double y) => Point = new Point(x, y); |
| | | } |
| | | |
| | | private double _xMean; |
| | | private double _xStd; |
| | | private double _yMean; |
| | | private double _yStd; |
| | | |
| | | private double _minDensity = 0.05; |
| | | |
| | | public List<(double MinX, double MaxX, double MinY, double MaxY)> PerformClustering(double[] x, double[] y) |
| | | { |
| | | // 数据标准化 |
| | | double[,] data = new double[x.Length, 2]; |
| | | for (int i = 0; i < x.Length; i++) |
| | | var (ScaledData, Points, XMean, XStd, YMean, YStd) = OptimizedScaleData(x, y); |
| | | |
| | | _xMean = XMean; |
| | | _xStd = XStd; |
| | | _yMean = YMean; |
| | | _yStd = YStd; |
| | | |
| | | var minValidCount = Math.Min(5, x.Length * _minDensity); |
| | | |
| | | // 自动参数计算 |
| | | var minPts = CalculateMinPts(Points.Count); |
| | | var eps = EpsOptimizer.ComputeEps(ScaledData, minPts); |
| | | |
| | | |
| | | // 执行DBSCAN聚类 |
| | | var clusters = DbscanRBush.CalculateClusters( |
| | | Points, |
| | | epsilon: eps, |
| | | minimumPointsPerCluster: minPts); |
| | | |
| | | var list = new List<(double MinX, double MaxX, double MinY, double MaxY)>(); |
| | | if (clusters.Clusters.Any()) |
| | | { |
| | | data[i, 0] = x[i]; |
| | | data[i, 1] = y[i]; |
| | | } |
| | | |
| | | // 计算均值和标准差 |
| | | double[] mean = new double[2]; |
| | | double[] std = new double[2]; |
| | | for (int j = 0; j < 2; j++) |
| | | { |
| | | var jList = Enumerable.Range(0, data.GetLength(0)).Select(i => data[i, j]).ToArray(); |
| | | mean[j] = jList.Mean(); |
| | | std[j] = Math.Sqrt(jList.Variance()); |
| | | } |
| | | |
| | | // 标准化数据 |
| | | double[,] scaledData = new double[x.Length, 2]; |
| | | for (int i = 0; i < x.Length; i++) |
| | | { |
| | | scaledData[i, 0] = (data[i, 0] - mean[0]) / std[0]; |
| | | scaledData[i, 1] = (data[i, 1] - mean[1]) / std[1]; |
| | | } |
| | | |
| | | int[] labels = new int[x.Length]; |
| | | List<DenseArea> denseAreas = new List<DenseArea>(); |
| | | |
| | | // 方法选择 |
| | | if (method == "dbscan") |
| | | { |
| | | // DBSCAN参数 |
| | | double eps = parameters != null && parameters.ContainsKey("eps") ? (double)parameters["eps"] : 0.3; |
| | | int minSamples = parameters != null && parameters.ContainsKey("min_samples") |
| | | ? (int)parameters["min_samples"] |
| | | : (int)(x.Length * 0.02); |
| | | |
| | | // 计算距离矩阵 |
| | | double[,] distanceMatrix = DistanceMatrix(scaledData); |
| | | |
| | | // 执行 DBSCAN |
| | | var dbscan = new DBSCAN(eps, minSamples); |
| | | labels = dbscan.Run(scaledData); |
| | | } |
| | | else if (method == "kde") |
| | | { |
| | | // 核密度估计 |
| | | int bandwidth = parameters != null && parameters.ContainsKey("bandwidth") |
| | | ? (int)parameters["bandwidth"] |
| | | : 1; |
| | | double percentile = parameters != null && parameters.ContainsKey("percentile") |
| | | ? (double)parameters["percentile"] |
| | | : 70.0; |
| | | |
| | | // 计算密度 |
| | | double[] density = new double[x.Length]; |
| | | for (int i = 0; i < x.Length; i++) |
| | | foreach (var item in clusters.Clusters) |
| | | { |
| | | double sum = 0; |
| | | for (int j = 0; j < x.Length; j++) |
| | | { |
| | | double distance = Math.Sqrt( |
| | | Math.Pow(scaledData[i, 0] - scaledData[j, 0], 2) + |
| | | Math.Pow(scaledData[i, 1] - scaledData[j, 1], 2)); |
| | | sum += Math.Exp(-distance * distance / (2 * bandwidth * bandwidth)); |
| | | } |
| | | density[i] = sum / (x.Length * 2 * Math.PI * bandwidth * bandwidth); |
| | | if (item.Objects.Count < minValidCount) |
| | | continue; |
| | | list.Add(StandardizeData(item.Objects.ToList())); |
| | | } |
| | | list = list.OrderBy(x => x.MinX).ToList(); |
| | | } |
| | | |
| | | // 计算阈值 |
| | | double threshold = GetPercentile(density, percentile); |
| | | |
| | | // 标记密集区域 |
| | | labels = density.Select(d => d >= threshold ? 1 : 0).ToArray(); |
| | | } |
| | | else if (method == "grid") |
| | | { |
| | | // 动态网格划分 |
| | | int xBins = parameters != null && parameters.ContainsKey("x_bins") |
| | | ? (int)parameters["x_bins"] |
| | | : 30; |
| | | int yBins = parameters != null && parameters.ContainsKey("y_bins") |
| | | ? (int)parameters["y_bins"] |
| | | : 20; |
| | | double percentile = parameters != null && parameters.ContainsKey("percentile") |
| | | ? (double)parameters["percentile"] |
| | | : 75.0; |
| | | |
| | | // 创建网格 |
| | | double[] xBinsArray = CreateBins(x, xBins); |
| | | double[] yBinsArray = CreateBins(y, yBins); |
| | | |
| | | // 计算网格密度 |
| | | int[,] gridDensity = new int[xBins, yBins]; |
| | | for (int i = 0; i < x.Length; i++) |
| | | { |
| | | int xBin = Array.BinarySearch(xBinsArray, x[i]); |
| | | int yBin = Array.BinarySearch(yBinsArray, y[i]); |
| | | if (xBin >= 0 && xBin < xBins && yBin >= 0 && yBin < yBins) |
| | | gridDensity[xBin, yBin]++; |
| | | } |
| | | |
| | | // 计算阈值 |
| | | double threshold = GetPercentile(gridDensity.Cast<double>().ToArray(), percentile); |
| | | |
| | | // 标记密集网格 |
| | | for (int i = 0; i < x.Length; i++) |
| | | { |
| | | int xBin = Array.BinarySearch(xBinsArray, x[i]); |
| | | int yBin = Array.BinarySearch(yBinsArray, y[i]); |
| | | if (xBin >= 0 && xBin < xBins && yBin >= 0 && yBin < yBins) |
| | | labels[i] = gridDensity[xBin, yBin] > threshold ? 1 : 0; |
| | | else |
| | | labels[i] = 0; |
| | | } |
| | | } |
| | | else |
| | | { |
| | | throw new ArgumentException("不支持的检测方法"); |
| | | } |
| | | |
| | | // 提取密集区域 |
| | | var uniqueLabels = labels.Distinct(); |
| | | foreach (var label in uniqueLabels) |
| | | { |
| | | if (label == -1) continue; // 排除噪声点 |
| | | |
| | | bool[] mask = labels.Select(l => l == label).ToArray(); |
| | | if (mask.Count(m => m) > x.Length * 0.05) // 过滤小簇 |
| | | { |
| | | double[,] clusterPoints = new double[mask.Count(m => m), 2]; |
| | | int idx = 0; |
| | | for (int i = 0; i < mask.Length; i++) |
| | | { |
| | | if (mask[i]) |
| | | { |
| | | clusterPoints[idx, 0] = data[i, 0]; |
| | | clusterPoints[idx, 1] = data[i, 1]; |
| | | idx++; |
| | | } |
| | | } |
| | | |
| | | var xList = Enumerable.Range(0, clusterPoints.GetLength(0)).Select(i => clusterPoints[i, 0]).ToArray(); |
| | | var yList = Enumerable.Range(0, clusterPoints.GetLength(0)).Select(i => clusterPoints[i, 1]).ToArray(); |
| | | |
| | | |
| | | double xMin = xList.Min(); |
| | | double xMax = xList.Max(); |
| | | double yMin = yList.Min(); |
| | | double yMax = yList.Max(); |
| | | |
| | | denseAreas.Add(new DenseArea |
| | | { |
| | | XRange = (xMin, xMax), |
| | | YRange = (yMin, yMax) |
| | | }); |
| | | } |
| | | } |
| | | |
| | | return (labels, denseAreas); |
| | | return list; |
| | | } |
| | | |
| | | private double[] CreateBins(double[] data, int bins) |
| | | public (double[,] ScaledData, List<PointData> Points, double XMean, double XStd, double YMean, double YStd) OptimizedScaleData(double[] x, double[] y) |
| | | { |
| | | double min = data.Min(); |
| | | double max = data.Max(); |
| | | double step = (max - min) / bins; |
| | | return Enumerable.Range(0, bins + 1).Select(i => min + i * step).ToArray(); |
| | | } |
| | | int len = x.Length; |
| | | var scaled = new double[len, 2]; |
| | | var points = new List<PointData>(); |
| | | |
| | | private double GetPercentile(double[] data, double percentile) |
| | | { |
| | | Array.Sort(data); |
| | | int index = (int)Math.Ceiling(percentile / 100.0 * data.Length) - 1; |
| | | return data[index]; |
| | | } |
| | | |
| | | private double[,] DistanceMatrix(double[,] data) |
| | | { |
| | | int n = data.GetLength(0); |
| | | double[,] matrix = new double[n, n]; |
| | | for (int i = 0; i < n; i++) |
| | | // 计算 x 的均值和标准差 |
| | | _xMean = 0; |
| | | _xStd = 0; |
| | | for (int i = 0; i < len; i++) |
| | | { |
| | | for (int j = i + 1; j < n; j++) |
| | | { |
| | | double dx = data[i, 0] - data[j, 0]; |
| | | double dy = data[i, 1] - data[j, 1]; |
| | | matrix[i, j] = matrix[j, i] = Math.Sqrt(dx * dx + dy * dy); |
| | | } |
| | | _xMean += x[i]; |
| | | } |
| | | return matrix; |
| | | _xMean /= len; |
| | | |
| | | double xSumSq = 0; |
| | | for (int i = 0; i < len; i++) |
| | | { |
| | | xSumSq += (x[i] - _xMean) * (x[i] - _xMean); |
| | | } |
| | | _xStd = Math.Sqrt(xSumSq / len); |
| | | |
| | | // 计算 y 的均值和标准差 |
| | | _yMean = 0; |
| | | _yStd = 0; |
| | | for (int i = 0; i < len; i++) |
| | | { |
| | | _yMean += y[i]; |
| | | } |
| | | _yMean /= len; |
| | | |
| | | double ySumSq = 0; |
| | | for (int i = 0; i < len; i++) |
| | | { |
| | | ySumSq += (y[i] - _yMean) * (y[i] - _yMean); |
| | | } |
| | | _yStd = Math.Sqrt(ySumSq / len); |
| | | |
| | | // 标准化数据并生成 PointData 对象 |
| | | for (int i = 0; i < len; i++) |
| | | { |
| | | double xScaled = (x[i] - _xMean) / _xStd; |
| | | double yScaled = (y[i] - _yMean) / _yStd; |
| | | |
| | | scaled[i, 0] = xScaled; |
| | | scaled[i, 1] = yScaled; |
| | | |
| | | points.Add(new PointData(xScaled, yScaled)); |
| | | } |
| | | |
| | | return (scaled, points, _xMean, _xStd, _yMean, _yStd); |
| | | } |
| | | |
| | | private (double MinX, double MaxX, double MinY, double MaxY) StandardizeData(List<PointData> points) |
| | | { |
| | | int len = points.Count; |
| | | var originalX = new double[len]; |
| | | var originalY = new double[len]; |
| | | |
| | | for (int i = 0; i < len; i++) |
| | | { |
| | | originalX[i] = points[i].Point.X * _xStd + _xMean; |
| | | originalY[i] = points[i].Point.Y * _yStd + _yMean; |
| | | } |
| | | |
| | | var minX = originalX.Min(x => x); |
| | | var maxX = originalX.Max(x => x); |
| | | |
| | | var minY = originalY.Min(x => x); |
| | | var maxY = originalY.Max(x => x); |
| | | |
| | | |
| | | return (minX, maxX, minY, maxY); |
| | | } |
| | | |
| | | private int CalculateMinPts(int dataSize) |
| | | { |
| | | // 基于数据规模的动态计算(增加安全阈值) |
| | | return Math.Max(10, (int)(dataSize * 0.02)); |
| | | } |
| | | |
| | | } |
| | | |
| | | |
| | | public class DBSCAN |
| | | public static class EpsOptimizer |
| | | { |
| | | private readonly double eps; |
| | | private readonly int minPts; |
| | | |
| | | public DBSCAN(double eps, int minPts) |
| | | public static double ComputeEps(double[,] scaledData, int minSamples) |
| | | { |
| | | this.eps = eps; |
| | | this.minPts = minPts; |
| | | } |
| | | int k = Math.Max(1, minSamples); |
| | | var distances = new List<double>(); |
| | | |
| | | public int[] Run(double[,] points) |
| | | { |
| | | int n = points.GetLength(0); |
| | | int[] labels = new int[n]; |
| | | Array.Fill(labels, -2); // 初始化为未处理状态 |
| | | |
| | | int clusterId = -1; |
| | | |
| | | for (int i = 0; i < n; i++) |
| | | // 使用快速选择优化第k近邻计算 |
| | | for (int i = 0; i < scaledData.GetLength(0); i++) |
| | | { |
| | | if (labels[i] != -2) continue; |
| | | |
| | | var neighbors = GetNeighbors(points, i); |
| | | |
| | | if (neighbors.Count < minPts) |
| | | var tempDists = new List<double>(); |
| | | for (int j = 0; j < scaledData.GetLength(0); j++) |
| | | { |
| | | labels[i] = -1; // 标记为噪声点 |
| | | continue; |
| | | if (i == j) continue; |
| | | double dx = scaledData[i, 0] - scaledData[j, 0]; |
| | | double dy = scaledData[i, 1] - scaledData[j, 1]; |
| | | tempDists.Add(dx * dx + dy * dy); |
| | | } |
| | | |
| | | clusterId++; |
| | | ExpandCluster(points, i, neighbors, clusterId, labels); |
| | | if (tempDists.Count >= k) |
| | | { |
| | | double kthDistSq = QuickSelect(tempDists, k); |
| | | distances.Add(Math.Sqrt(kthDistSq)); |
| | | } |
| | | } |
| | | |
| | | return labels; |
| | | } |
| | | // 异常处理与边界检测 |
| | | if (distances.Count == 0) return 0.5; |
| | | distances.Sort(); |
| | | |
| | | private void ExpandCluster(double[,] points, int pointIndex, List<int> neighbors, int clusterId, int[] labels) |
| | | { |
| | | labels[pointIndex] = clusterId; |
| | | // 预计算前缀和以加速窗口平均值计算 |
| | | double[] prefixSum = new double[distances.Count + 1]; |
| | | for (int i = 0; i < distances.Count; i++) |
| | | prefixSum[i + 1] = prefixSum[i] + distances[i]; |
| | | |
| | | for (int i = 0; i < neighbors.Count; i++) |
| | | // 动态窗口曲率检测 |
| | | double maxCurvature = double.MinValue; |
| | | double eps = distances[1]; // 使用 C# 8 的索引语法替代 Last() |
| | | int windowSize = Math.Max(1, distances.Count / 20); |
| | | windowSize = Math.Clamp(windowSize, 1, 5); |
| | | |
| | | for (int i = windowSize; i < distances.Count - windowSize; i++) |
| | | { |
| | | int neighborIndex = neighbors[i]; |
| | | if (labels[neighborIndex] == -1) |
| | | int prevStart = i - windowSize; |
| | | double prevSum = prefixSum[prevStart + windowSize] - prefixSum[prevStart]; |
| | | double prevAvg = prevSum / windowSize; |
| | | |
| | | int nextStart = i; |
| | | double nextSum = prefixSum[nextStart + windowSize] - prefixSum[nextStart]; |
| | | double nextAvg = nextSum / windowSize; |
| | | |
| | | double curvature = nextAvg - prevAvg; |
| | | if (curvature > maxCurvature) |
| | | { |
| | | labels[neighborIndex] = clusterId; |
| | | } |
| | | else if (labels[neighborIndex] == -2) |
| | | { |
| | | labels[neighborIndex] = clusterId; |
| | | var newNeighbors = GetNeighbors(points, neighborIndex); |
| | | if (newNeighbors.Count >= minPts) |
| | | { |
| | | neighbors.AddRange(newNeighbors); |
| | | } |
| | | maxCurvature = curvature; |
| | | eps = distances[i]; |
| | | } |
| | | } |
| | | |
| | | return eps * 1.15; |
| | | } |
| | | |
| | | private List<int> GetNeighbors(double[,] points, int pointIndex) |
| | | // 快速选择算法实现 |
| | | private static double QuickSelect(List<double> list, int k) |
| | | { |
| | | List<int> neighbors = new List<int>(); |
| | | int n = points.GetLength(0); |
| | | for (int i = 0; i < n; i++) |
| | | int left = 0; |
| | | int right = list.Count - 1; |
| | | var rand = new Random(); |
| | | while (left <= right) |
| | | { |
| | | double distance = EuclideanDistance(points, pointIndex, i); |
| | | if (distance < eps) |
| | | int pivotIndex = Partition(list, left, right, rand.Next(left, right + 1)); |
| | | if (pivotIndex == k - 1) return list[pivotIndex]; |
| | | if (pivotIndex < k - 1) left = pivotIndex + 1; |
| | | else right = pivotIndex - 1; |
| | | } |
| | | return double.NaN; // 理论不会执行到此 |
| | | } |
| | | |
| | | private static int Partition(List<double> list, int left, int right, int pivotIndex) |
| | | { |
| | | double pivotValue = list[pivotIndex]; |
| | | Swap(list, pivotIndex, right); |
| | | int storeIndex = left; |
| | | for (int i = left; i < right; i++) |
| | | { |
| | | if (list[i] < pivotValue) |
| | | { |
| | | neighbors.Add(i); |
| | | Swap(list, i, storeIndex); |
| | | storeIndex++; |
| | | } |
| | | } |
| | | return neighbors; |
| | | Swap(list, storeIndex, right); |
| | | return storeIndex; |
| | | } |
| | | |
| | | private double EuclideanDistance(double[,] points, int pointIndex1, int pointIndex2) |
| | | private static void Swap(List<double> list, int i, int j) |
| | | { |
| | | double dx = points[pointIndex1, 0] - points[pointIndex2, 0]; |
| | | double dy = points[pointIndex1, 1] - points[pointIndex2, 1]; |
| | | return Math.Sqrt(dx * dx + dy * dy); |
| | | (list[i], list[j]) = (list[j], list[i]); // 使用元组交换 |
| | | } |
| | | } |
| | | |
| | | } |
| | | public static class ArrayExtensions |
| | | { |
| | | public static double Mean(this double[] array) => ArrayStatistics.Mean(array); |
| | | public static double StandardDeviation(this double[] array) => Math.Sqrt(ArrayStatistics.Variance(array)); |
| | | |
| | | public static bool Between(this double value, double min, double max) => |
| | | value >= min && value <= max; |
| | | } |
| | | } |