using Accord.Math; using Accord.Math.Optimization; using DevExpress.Data.Extensions; using MathNet.Numerics; using MathNet.Numerics.LinearAlgebra; using MathNet.Numerics.LinearRegression; using MathNet.Numerics.Statistics; namespace IStation.Test { /// /// 泵曲线数据融合校正器(用于合并样条曲线和实测数据并进行优化) /// 增加了数据聚合和动态多项式拟合阶数的功能 /// public class PumpCurveDataFusionCorrectorHelper2 { # region 配置参数 // 异常值检测阈值(基于标准正态分布,2.5对应98.76%置信区间) public const double ZScoreThreshold = 2; // 过渡区域宽度(单位与输入数据相同,控制曲线衔接平滑度) public const double TransitionWidth = 500; // 多项式拟合阶数(三次平衡拟合能力与过拟合风险) public const int PolynomialDegree = 3; // 优化器最大迭代次数(Cobyla算法收敛阈值) private const int OptimizationMaxIterations = 1000; //动态多项式拟合阶数(用于动态调整) public int DynamicPolyDegree = 2; # endregion /// /// 主校正方法(入口函数) /// /// 样条曲线X坐标 /// 样条曲线Y坐标 /// 原始实测X坐标 /// 原始实测Y坐标 /// /// mergedX: 融合后的X坐标 /// mergedY: 初步融合的Y坐标 /// optimizedX: 优化后的X坐标采样 /// optimizedY: 优化后的Y坐标结果 /// public (double[] mergedX, double[] mergedY, double[] optimizedX, double[] optimizedY) Corrent( double[] splineX, double[] splineY, double[] measuredXAll, double[] measuredYAll) { if (measuredXAll.Length != measuredYAll.Length) { return default; } if (measuredXAll.Length>10000) { } var minX = measuredXAll[0]; var maxX = measuredXAll[^1]; var fv = 0.15; var threshold = minX + fv * (maxX - minX); var startIndex = measuredXAll.FindIndex(x => x > threshold); var measuredXAllFiltered = measuredXAll.Skip(startIndex).ToArray(); var measuredYAllFiltered = measuredYAll.Skip(startIndex).ToArray(); if (measuredXAllFiltered.Length < 5) { Console.WriteLine("异常值过滤后数据过小"); return default; } #region 步骤1:稳健回归与异常值过滤(最小二乘法) // 执行简单线性回归获取基准线 (double intercept, double slope) = SimpleRegression.Fit(measuredXAllFiltered, measuredYAllFiltered); // 计算预测值和残差 double[] predictedY = measuredXAllFiltered.Select(x => slope * x + intercept).ToArray(); double[] residuals = measuredYAllFiltered.Zip(predictedY, (m, p) => m - p).ToArray(); // 基于Z-Score的异常值过滤 var zScoreCalculator = new ZScore(residuals); bool[] validMask = zScoreCalculator.Scores .Select(score => Math.Abs(score) < ZScoreThreshold).ToArray(); // 过滤得到有效实测数据 var validData = measuredXAllFiltered.Zip(measuredYAllFiltered, validMask) .Where(t => t.Third) .Select(t => (t.First, t.Second)).ToList(); double[] measuredXValid = validData.Select(t => t.First).ToArray(); double[] measuredYValid = validData.Select(t => t.Second).ToArray(); #endregion if (measuredXValid.Length < 5) { Console.WriteLine("异常值过滤后数据过小"); return default; } return ProcessData(splineX, splineY, measuredXValid, measuredYValid, PolynomialDegree, TransitionWidth); } /// /// 核心数据处理流程(包含数据融合、过渡处理和优化) /// private (double[] mergedX, double[] mergedY, double[] optimizedX, double[] optimizedY) ProcessData( double[] splineX, double[] splineY, double[] measuredXValid, double[] measuredYValid, int polyDegree, double transitionWidth) { /* labels, areas = classify_points(filtered_first_x, filtered_first_y, method = 'dbscan', params={ 'eps': 0.25, 'min_samples': 15}) # 动态多项式次数调整 Config.Dynamic_POLY_DEGREE = 2 if len(areas) > 1 else 1*/ int min_samples = (int)(measuredXValid.Length * 0.02); min_samples = min_samples < 5 ? 5 : min_samples; var pointClassifier = new PointClassifier(); var (labels, areas) = pointClassifier.ClassifyPoints(measuredXValid, measuredYValid, "dbscan", new Dictionary { { "eps", 0.3 }, { "min_samples",(int)min_samples} }); // 动态多项式次数调整 DynamicPolyDegree = areas.Count > 1 ? 2 : 1; if (areas.Count<1) { throw new Exception("areas.Count<1"); } var measured_x = new List(); var measured_y = new List(); foreach (var item in areas) { var x_min = item.XRange.Min; var x_max = item.XRange.Max; var y_min = item.YRange.Min; var y_max = item.YRange.Max; var mask = measuredXValid .Select((x, i) => (x >= x_min && x <= x_max && measuredYValid[i] >= y_min && measuredYValid[i] <= y_max)) .ToArray(); for (int i = 0; i < mask.Length; i++) { var x = mask[i]; if (x) { measured_x.Add(measuredXValid[i]); measured_y.Add(measuredYValid[i]); } } } #region double minX = measuredXValid.Min(), maxX = measuredXValid.Max(); var (mergedX, mergedY) = MergeCurves(splineX, splineY, measured_x.ToArray(), measured_y.ToArray(), DynamicPolyDegree); #endregion #region 步骤5:带约束优化 // 数据标准化处理 double xMean = mergedX.Average(); double xStd = ArrayStatistics.PopulationStandardDeviation(mergedX); double[] xNorm = mergedX.Select(x => (x - xMean) / xStd).ToArray(); // 构建三次多项式优化问题 var X = Matrix.Build.DenseOfRowArrays( xNorm.Select(x => new[] { Math.Pow(x, 3), x * x, x, 1.0 }).ToArray() ); var initialParams = MultipleRegression.QR(X, Vector.Build.DenseOfArray(mergedY)); // 设置单调性约束(导数非负) var constraints = GenerateLinspace(xNorm.Min(), xNorm.Max(), 50) .Select(x => new NonlinearConstraint( numberOfVariables: 4, function: p => -Derivative(p, x), // 导数>=0 shouldBe: ConstraintType.GreaterThanOrEqualTo, value: 0 )).ToList(); // 执行COBYLA优化 var optimizer = new Cobyla( function: BuildObjectiveFunction(xNorm, mergedY), constraints: constraints.ToArray() ) { MaxIterations = OptimizationMaxIterations }; bool success = optimizer.Minimize(initialParams.ToArray()); double[] optimizedParams = optimizer.Solution; #endregion var a = optimizedParams.Select(x => Math.Round(x, 6)).ToList(); var json = JsonHelper.Object2FormatJson(a); // 生成最终优化曲线 double[] optimizedX = GenerateLinspace( Math.Min(splineX.Min(), minX), Math.Max(splineX.Max(), maxX), 300 ); double[] optimizedY = optimizedX .Select(x => CubicModel(optimizedParams, (x - xMean) / xStd)) .ToArray(); return (mergedX, mergedY, optimizedX, optimizedY); } # region 工具方法 /// /// 三次多项式模型(降幂排列) /// private static double CubicModel(double[] param, double x) => param[0] * x * x * x + param[1] * x * x + param[2] * x + param[3]; /// /// 多项式导数(用于单调性约束) /// private static double Derivative(double[] param, double x) => 3 * param[0] * x * x + 2 * param[1] * x + param[2]; /// /// 构建目标函数(均方误差) /// private NonlinearObjectiveFunction BuildObjectiveFunction(double[] xNorm, double[] y) => new NonlinearObjectiveFunction(4, p => xNorm.Select((x, i) => Math.Pow(CubicModel(p, x) - y[i], 2)).Average() ); /// /// 多项式求值(霍纳法则) /// private static double EvaluatePolynomial(double[] coeffs, double x) => coeffs.Aggregate((sum, c) => sum * x + c); /// /// 生成等间距采样点 /// private static double[] GenerateLinspace(double start, double end, int numPoints) => Enumerable.Range(0, numPoints) .Select(i => start + i * (end - start) / (numPoints - 1)) .ToArray(); # endregion # region 过渡处理 /// /// 生成合并的 x 值 /// public static double[] GenerateMergedX(double[] splineX, double[] measuredXValid, int numPoints) { // 生成等间距的点 double[] linspace = GenerateLinspace(measuredXValid.Min(), measuredXValid.Max(), numPoints); // 拼接并去重排序 var concat = splineX.Concat(linspace).ToArray(); var merged = concat.Distinct().OrderBy(x => x).ToArray(); return merged; } # endregion # region 辅助类 /// /// Z分数计算器(用于异常值检测) /// private class ZScore { public readonly double[] Scores; public ZScore(double[] data) { double mean = data.Average(); double std = data.StandardDeviation(); Scores = data.Select(x => (x - mean) / std).ToArray(); } } #endregion public static (double[] CorrectedX, double[] CorrectedY) MergeCurves( double[] originalX, double[] originalY, double[] measuredX, double[] measuredY,int degree) { // 实测数据不足处理 if (measuredX.Length < 3) { Console.WriteLine("实测数据不足,使用原始曲线"); return (originalX, originalY); } // 多项式拟合实测数据(系数反转保持与numpy一致) double[] polyCoeff = Fit.Polynomial(measuredX, measuredY, degree).Reverse().ToArray(); // 创建修正曲线副本 double[] correctedX = originalX.Select(x => x).ToArray(); double[] correctedY = originalY.Select(x => x).ToArray(); // 计算实测数据范围 double measuredMin = measuredX.Min(); double measuredMax = measuredX.Max(); // 核心区域修正 for (int i = 0; i < correctedX.Length; i++) { if (correctedX[i] >= measuredMin && correctedX[i] <= measuredMax) correctedY[i] = Polyval(polyCoeff, correctedX[i]); } // 过渡区域处理 double transition = TransitionWidth; // 左过渡区域 double[] originalPolyCoeff = Fit.Polynomial(originalX, originalY, 3).Reverse().ToArray(); double xLeftStart = Math.Max(measuredMin - transition, originalX.Min()); double yLeftStart = Polyval(originalPolyCoeff, xLeftStart); double yLeftEnd = Polyval(polyCoeff, measuredMin); for (int i = 0; i < correctedX.Length; i++) { double x = correctedX[i]; if (x >= xLeftStart && x < measuredMin) correctedY[i] = LinearInterp(x, xLeftStart, yLeftStart, measuredMin, yLeftEnd); } // 右过渡区域 double xRightEnd = Math.Min(measuredMax + transition, originalX.Max()); double yRightStart = Polyval(polyCoeff, measuredMax); double yRightEnd = Polyval(originalPolyCoeff, xRightEnd); for (int i = 0; i < correctedX.Length; i++) { double x = correctedX[i]; if (x > measuredMax && x <= xRightEnd) correctedY[i] = LinearInterp(x, measuredMax, yRightStart, xRightEnd, yRightEnd); } return (correctedX, correctedY); } // 多项式求值(支持降序系数) private static double Polyval(double[] coefficients, double x) { double result = 0; int degree = coefficients.Length - 1; for (int i = 0; i < coefficients.Length; i++) result += coefficients[i] * Math.Pow(x, degree - i); return result; } // 线性插值 private static double LinearInterp(double x, double x0, double y0, double x1, double y1) { return x1 != x0 ? y0 + (y1 - y0) * (x - x0) / (x1 - x0) : y0; } } public class PointClassifier { public class DenseArea { public (double Min, double Max) XRange { get; set; } public (double Min, double Max) YRange { get; set; } } public (int[] Labels, List DenseAreas) ClassifyPoints( double[] x, double[] y, string method = "dbscan", Dictionary parameters = null) { if (x.Length != y.Length) throw new ArgumentException("x 和 y 的长度必须相同"); // 数据标准化 double[,] data = new double[x.Length, 2]; for (int i = 0; i < x.Length; i++) { data[i, 0] = x[i]; data[i, 1] = y[i]; } // 计算均值和标准差 double[] mean = new double[2]; double[] std = new double[2]; for (int j = 0; j < 2; j++) { var jList = Enumerable.Range(0, data.GetLength(0)).Select(i => data[i, j]).ToArray(); mean[j] = jList.Mean(); std[j] = Math.Sqrt(jList.Variance()); } // 标准化数据 double[,] scaledData = new double[x.Length, 2]; for (int i = 0; i < x.Length; i++) { scaledData[i, 0] = (data[i, 0] - mean[0]) / std[0]; scaledData[i, 1] = (data[i, 1] - mean[1]) / std[1]; } int[] labels = new int[x.Length]; List denseAreas = new List(); // 方法选择 if (method == "dbscan") { int min_samples = (int)(x.Length * 0.02); min_samples = min_samples < 5 ? 5 : min_samples; // DBSCAN参数 double eps = parameters != null && parameters.ContainsKey("eps") ? (double)parameters["eps"] : 0.3; int minSamples = parameters != null && parameters.ContainsKey("min_samples") ? (int)parameters["min_samples"] : min_samples; // 计算距离矩阵 double[,] distanceMatrix = DistanceMatrix(scaledData); // 执行 DBSCAN var dbscan = new DBSCAN(eps, minSamples); labels = dbscan.Run(scaledData); } else if (method == "kde") { // 核密度估计 int bandwidth = parameters != null && parameters.ContainsKey("bandwidth") ? (int)parameters["bandwidth"] : 1; double percentile = parameters != null && parameters.ContainsKey("percentile") ? (double)parameters["percentile"] : 70.0; // 计算密度 double[] density = new double[x.Length]; for (int i = 0; i < x.Length; i++) { double sum = 0; for (int j = 0; j < x.Length; j++) { double distance = Math.Sqrt( Math.Pow(scaledData[i, 0] - scaledData[j, 0], 2) + Math.Pow(scaledData[i, 1] - scaledData[j, 1], 2)); sum += Math.Exp(-distance * distance / (2 * bandwidth * bandwidth)); } density[i] = sum / (x.Length * 2 * Math.PI * bandwidth * bandwidth); } // 计算阈值 double threshold = GetPercentile(density, percentile); // 标记密集区域 labels = density.Select(d => d >= threshold ? 1 : 0).ToArray(); } else if (method == "grid") { // 动态网格划分 int xBins = parameters != null && parameters.ContainsKey("x_bins") ? (int)parameters["x_bins"] : 30; int yBins = parameters != null && parameters.ContainsKey("y_bins") ? (int)parameters["y_bins"] : 20; double percentile = parameters != null && parameters.ContainsKey("percentile") ? (double)parameters["percentile"] : 75.0; // 创建网格 double[] xBinsArray = CreateBins(x, xBins); double[] yBinsArray = CreateBins(y, yBins); // 计算网格密度 int[,] gridDensity = new int[xBins, yBins]; for (int i = 0; i < x.Length; i++) { int xBin = Array.BinarySearch(xBinsArray, x[i]); int yBin = Array.BinarySearch(yBinsArray, y[i]); if (xBin >= 0 && xBin < xBins && yBin >= 0 && yBin < yBins) gridDensity[xBin, yBin]++; } // 计算阈值 double threshold = GetPercentile(gridDensity.Cast().ToArray(), percentile); // 标记密集网格 for (int i = 0; i < x.Length; i++) { int xBin = Array.BinarySearch(xBinsArray, x[i]); int yBin = Array.BinarySearch(yBinsArray, y[i]); if (xBin >= 0 && xBin < xBins && yBin >= 0 && yBin < yBins) labels[i] = gridDensity[xBin, yBin] > threshold ? 1 : 0; else labels[i] = 0; } } else { throw new ArgumentException("不支持的检测方法"); } // 提取密集区域 var uniqueLabels = labels.Distinct(); foreach (var label in uniqueLabels) { if (label == -1) continue; // 排除噪声点 bool[] mask = labels.Select(l => l == label).ToArray(); if (mask.Count(m => m) > x.Length * 0.05) // 过滤小簇 { double[,] clusterPoints = new double[mask.Count(m => m), 2]; int idx = 0; for (int i = 0; i < mask.Length; i++) { if (mask[i]) { clusterPoints[idx, 0] = data[i, 0]; clusterPoints[idx, 1] = data[i, 1]; idx++; } } var xList = Enumerable.Range(0, clusterPoints.GetLength(0)).Select(i => clusterPoints[i, 0]).ToArray(); var yList = Enumerable.Range(0, clusterPoints.GetLength(0)).Select(i => clusterPoints[i, 1]).ToArray(); double xMin = xList.Min(); double xMax = xList.Max(); double yMin = yList.Min(); double yMax = yList.Max(); denseAreas.Add(new DenseArea { XRange = (xMin, xMax), YRange = (yMin, yMax) }); } } return (labels, denseAreas); } private double[] CreateBins(double[] data, int bins) { double min = data.Min(); double max = data.Max(); double step = (max - min) / bins; return Enumerable.Range(0, bins + 1).Select(i => min + i * step).ToArray(); } private double GetPercentile(double[] data, double percentile) { Array.Sort(data); int index = (int)Math.Ceiling(percentile / 100.0 * data.Length) - 1; return data[index]; } private double[,] DistanceMatrix(double[,] data) { int n = data.GetLength(0); double[,] matrix = new double[n, n]; for (int i = 0; i < n; i++) { for (int j = i + 1; j < n; j++) { double dx = data[i, 0] - data[j, 0]; double dy = data[i, 1] - data[j, 1]; matrix[i, j] = matrix[j, i] = Math.Sqrt(dx * dx + dy * dy); } } return matrix; } } public class DBSCAN { private readonly double eps; private readonly int minPts; public DBSCAN(double eps, int minPts) { this.eps = eps; this.minPts = minPts; } public int[] Run(double[,] points) { int n = points.GetLength(0); int[] labels = new int[n]; Array.Fill(labels, -2); // 初始化为未处理状态 int clusterId = -1; for (int i = 0; i < n; i++) { if (labels[i] != -2) continue; var neighbors = GetNeighbors(points, i); if (neighbors.Count < minPts) { labels[i] = -1; // 标记为噪声点 continue; } clusterId++; ExpandCluster(points, i, neighbors, clusterId, labels); } return labels; } private void ExpandCluster(double[,] points, int pointIndex, List neighbors, int clusterId, int[] labels) { labels[pointIndex] = clusterId; for (int i = 0; i < neighbors.Count; i++) { int neighborIndex = neighbors[i]; if (labels[neighborIndex] == -1) { labels[neighborIndex] = clusterId; } else if (labels[neighborIndex] == -2) { labels[neighborIndex] = clusterId; var newNeighbors = GetNeighbors(points, neighborIndex); if (newNeighbors.Count >= minPts) { neighbors.AddRange(newNeighbors); } } } } private List GetNeighbors(double[,] points, int pointIndex) { List neighbors = new List(); int n = points.GetLength(0); for (int i = 0; i < n; i++) { double distance = EuclideanDistance(points, pointIndex, i); if (distance < eps) { neighbors.Add(i); } } return neighbors; } private double EuclideanDistance(double[,] points, int pointIndex1, int pointIndex2) { double dx = points[pointIndex1, 0] - points[pointIndex2, 0]; double dy = points[pointIndex1, 1] - points[pointIndex2, 1]; return Math.Sqrt(dx * dx + dy * dy); } } }