using Accord.Math;
|
using Accord.Math.Optimization;
|
using DevExpress.Data.Extensions;
|
using MathNet.Numerics;
|
using MathNet.Numerics.LinearAlgebra;
|
using MathNet.Numerics.LinearRegression;
|
using MathNet.Numerics.Statistics;
|
|
namespace IStation.Test
|
{
|
|
/// <summary>
|
/// 泵曲线数据融合校正器(用于合并样条曲线和实测数据并进行优化)
|
/// 增加了数据聚合和动态多项式拟合阶数的功能
|
/// </summary>
|
public class PumpCurveDataFusionCorrectorHelper2
|
{
|
# region 配置参数
|
// 异常值检测阈值(基于标准正态分布,2.5对应98.76%置信区间)
|
public const double ZScoreThreshold = 2;
|
// 过渡区域宽度(单位与输入数据相同,控制曲线衔接平滑度)
|
public const double TransitionWidth = 500;
|
// 多项式拟合阶数(三次平衡拟合能力与过拟合风险)
|
public const int PolynomialDegree = 3;
|
// 优化器最大迭代次数(Cobyla算法收敛阈值)
|
private const int OptimizationMaxIterations = 1000;
|
|
//动态多项式拟合阶数(用于动态调整)
|
public int DynamicPolyDegree = 2;
|
|
# endregion
|
|
/// <summary>
|
/// 主校正方法(入口函数)
|
/// </summary>
|
/// <param name="splineX">样条曲线X坐标</param>
|
/// <param name="splineY">样条曲线Y坐标</param>
|
/// <param name="measuredXAllFiltered">原始实测X坐标</param>
|
/// <param name="measuredYAllFiltered">原始实测Y坐标</param>
|
/// <returns>
|
/// mergedX: 融合后的X坐标
|
/// mergedY: 初步融合的Y坐标
|
/// optimizedX: 优化后的X坐标采样
|
/// optimizedY: 优化后的Y坐标结果
|
/// </returns>
|
public (double[] mergedX, double[] mergedY, double[] optimizedX, double[] optimizedY) Corrent(
|
double[] splineX, double[] splineY,
|
double[] measuredXAll, double[] measuredYAll)
|
{
|
|
if (measuredXAll.Length != measuredYAll.Length)
|
{
|
return default;
|
}
|
|
if (measuredXAll.Length>10000)
|
{
|
|
}
|
|
var minX = measuredXAll[0];
|
var maxX = measuredXAll[^1];
|
|
var fv = 0.15;
|
var threshold = minX + fv * (maxX - minX);
|
var startIndex = measuredXAll.FindIndex(x => x > threshold);
|
|
var measuredXAllFiltered = measuredXAll.Skip(startIndex).ToArray();
|
var measuredYAllFiltered = measuredYAll.Skip(startIndex).ToArray();
|
|
|
|
if (measuredXAllFiltered.Length < 5)
|
{
|
Console.WriteLine("异常值过滤后数据过小");
|
return default;
|
}
|
|
#region 步骤1:稳健回归与异常值过滤(最小二乘法)
|
// 执行简单线性回归获取基准线
|
(double intercept, double slope) = SimpleRegression.Fit(measuredXAllFiltered, measuredYAllFiltered);
|
|
// 计算预测值和残差
|
double[] predictedY = measuredXAllFiltered.Select(x => slope * x + intercept).ToArray();
|
double[] residuals = measuredYAllFiltered.Zip(predictedY, (m, p) => m - p).ToArray();
|
|
// 基于Z-Score的异常值过滤
|
var zScoreCalculator = new ZScore(residuals);
|
bool[] validMask = zScoreCalculator.Scores
|
.Select(score => Math.Abs(score) < ZScoreThreshold).ToArray();
|
|
// 过滤得到有效实测数据
|
var validData = measuredXAllFiltered.Zip(measuredYAllFiltered, validMask)
|
.Where(t => t.Third)
|
.Select(t => (t.First, t.Second)).ToList();
|
double[] measuredXValid = validData.Select(t => t.First).ToArray();
|
double[] measuredYValid = validData.Select(t => t.Second).ToArray();
|
#endregion
|
|
|
|
if (measuredXValid.Length < 5)
|
{
|
Console.WriteLine("异常值过滤后数据过小");
|
return default;
|
}
|
|
return ProcessData(splineX, splineY, measuredXValid, measuredYValid,
|
PolynomialDegree, TransitionWidth);
|
}
|
|
/// <summary>
|
/// 核心数据处理流程(包含数据融合、过渡处理和优化)
|
/// </summary>
|
private (double[] mergedX, double[] mergedY, double[] optimizedX, double[] optimizedY) ProcessData(
|
double[] splineX, double[] splineY,
|
double[] measuredXValid, double[] measuredYValid,
|
int polyDegree, double transitionWidth)
|
{
|
|
/* labels, areas = classify_points(filtered_first_x, filtered_first_y, method = 'dbscan',
|
params={ 'eps': 0.25, 'min_samples': 15})
|
# 动态多项式次数调整
|
Config.Dynamic_POLY_DEGREE = 2 if len(areas) > 1 else 1*/
|
int min_samples = (int)(measuredXValid.Length * 0.02);
|
min_samples = min_samples < 5 ? 5 : min_samples;
|
var pointClassifier = new PointClassifier();
|
var (labels, areas) = pointClassifier.ClassifyPoints(measuredXValid, measuredYValid, "dbscan", new Dictionary<string, object>
|
{
|
{ "eps", 0.3 },
|
{ "min_samples",(int)min_samples}
|
});
|
|
// 动态多项式次数调整
|
DynamicPolyDegree = areas.Count > 1 ? 2 : 1;
|
|
|
if (areas.Count<1)
|
{
|
throw new Exception("areas.Count<1");
|
}
|
|
var measured_x = new List<double>();
|
var measured_y = new List<double>();
|
|
foreach (var item in areas)
|
{
|
var x_min = item.XRange.Min;
|
var x_max = item.XRange.Max;
|
var y_min = item.YRange.Min;
|
var y_max = item.YRange.Max;
|
var mask = measuredXValid
|
.Select((x, i) => (x >= x_min && x <= x_max && measuredYValid[i] >= y_min && measuredYValid[i] <= y_max))
|
.ToArray();
|
for (int i = 0; i < mask.Length; i++)
|
{
|
var x = mask[i];
|
if (x)
|
{
|
measured_x.Add(measuredXValid[i]);
|
measured_y.Add(measuredYValid[i]);
|
}
|
}
|
}
|
|
#region
|
|
|
double minX = measuredXValid.Min(), maxX = measuredXValid.Max();
|
var (mergedX, mergedY) = MergeCurves(splineX, splineY, measured_x.ToArray(), measured_y.ToArray(), DynamicPolyDegree);
|
#endregion
|
|
#region 步骤5:带约束优化
|
|
// 数据标准化处理
|
double xMean = mergedX.Average();
|
double xStd = ArrayStatistics.PopulationStandardDeviation(mergedX);
|
double[] xNorm = mergedX.Select(x => (x - xMean) / xStd).ToArray();
|
|
// 构建三次多项式优化问题
|
var X = Matrix<double>.Build.DenseOfRowArrays(
|
xNorm.Select(x => new[] { Math.Pow(x, 3), x * x, x, 1.0 }).ToArray()
|
);
|
var initialParams = MultipleRegression.QR(X, Vector<double>.Build.DenseOfArray(mergedY));
|
|
|
|
|
// 设置单调性约束(导数非负)
|
var constraints = GenerateLinspace(xNorm.Min(), xNorm.Max(), 50)
|
.Select(x => new NonlinearConstraint(
|
numberOfVariables: 4,
|
function: p => -Derivative(p, x), // 导数>=0
|
shouldBe: ConstraintType.GreaterThanOrEqualTo,
|
value: 0
|
)).ToList();
|
|
// 执行COBYLA优化
|
var optimizer = new Cobyla(
|
function: BuildObjectiveFunction(xNorm, mergedY),
|
constraints: constraints.ToArray()
|
)
|
{ MaxIterations = OptimizationMaxIterations };
|
|
bool success = optimizer.Minimize(initialParams.ToArray());
|
double[] optimizedParams = optimizer.Solution;
|
#endregion
|
|
|
|
var a = optimizedParams.Select(x => Math.Round(x, 6)).ToList();
|
var json = JsonHelper.Object2FormatJson(a);
|
// 生成最终优化曲线
|
double[] optimizedX = GenerateLinspace(
|
Math.Min(splineX.Min(), minX),
|
Math.Max(splineX.Max(), maxX),
|
300
|
);
|
double[] optimizedY = optimizedX
|
.Select(x => CubicModel(optimizedParams, (x - xMean) / xStd))
|
.ToArray();
|
|
return (mergedX, mergedY, optimizedX, optimizedY);
|
}
|
|
# region 工具方法
|
|
/// <summary>
|
/// 三次多项式模型(降幂排列)
|
/// </summary>
|
private static double CubicModel(double[] param, double x) =>
|
param[0] * x * x * x + param[1] * x * x + param[2] * x + param[3];
|
|
/// <summary>
|
/// 多项式导数(用于单调性约束)
|
/// </summary>
|
private static double Derivative(double[] param, double x) =>
|
3 * param[0] * x * x + 2 * param[1] * x + param[2];
|
|
|
/// <summary>
|
/// 构建目标函数(均方误差)
|
/// </summary>
|
private NonlinearObjectiveFunction BuildObjectiveFunction(double[] xNorm, double[] y) =>
|
new NonlinearObjectiveFunction(4, p =>
|
xNorm.Select((x, i) => Math.Pow(CubicModel(p, x) - y[i], 2)).Average()
|
);
|
|
|
|
/// <summary>
|
/// 多项式求值(霍纳法则)
|
/// </summary>
|
private static double EvaluatePolynomial(double[] coeffs, double x) =>
|
coeffs.Aggregate((sum, c) => sum * x + c);
|
|
/// <summary>
|
/// 生成等间距采样点
|
/// </summary>
|
private static double[] GenerateLinspace(double start, double end, int numPoints) =>
|
Enumerable.Range(0, numPoints)
|
.Select(i => start + i * (end - start) / (numPoints - 1))
|
.ToArray();
|
# endregion
|
|
# region 过渡处理
|
|
|
/// <summary>
|
/// 生成合并的 x 值
|
/// </summary>
|
public static double[] GenerateMergedX(double[] splineX, double[] measuredXValid, int numPoints)
|
{
|
// 生成等间距的点
|
double[] linspace = GenerateLinspace(measuredXValid.Min(), measuredXValid.Max(), numPoints);
|
// 拼接并去重排序
|
var concat = splineX.Concat(linspace).ToArray();
|
var merged = concat.Distinct().OrderBy(x => x).ToArray();
|
return merged;
|
}
|
|
# endregion
|
|
# region 辅助类
|
|
/// <summary>
|
/// Z分数计算器(用于异常值检测)
|
/// </summary>
|
private class ZScore
|
{
|
public readonly double[] Scores;
|
|
public ZScore(double[] data)
|
{
|
double mean = data.Average();
|
double std = data.StandardDeviation();
|
Scores = data.Select(x => (x - mean) / std).ToArray();
|
}
|
}
|
|
#endregion
|
|
|
|
public static (double[] CorrectedX, double[] CorrectedY) MergeCurves(
|
double[] originalX, double[] originalY,
|
double[] measuredX, double[] measuredY,int degree)
|
{
|
// 实测数据不足处理
|
if (measuredX.Length < 3)
|
{
|
Console.WriteLine("实测数据不足,使用原始曲线");
|
return (originalX, originalY);
|
}
|
|
// 多项式拟合实测数据(系数反转保持与numpy一致)
|
double[] polyCoeff = Fit.Polynomial(measuredX, measuredY, degree).Reverse().ToArray();
|
|
// 创建修正曲线副本
|
double[] correctedX = originalX.Select(x => x).ToArray();
|
double[] correctedY = originalY.Select(x => x).ToArray();
|
|
// 计算实测数据范围
|
double measuredMin = measuredX.Min();
|
double measuredMax = measuredX.Max();
|
|
// 核心区域修正
|
for (int i = 0; i < correctedX.Length; i++)
|
{
|
if (correctedX[i] >= measuredMin && correctedX[i] <= measuredMax)
|
correctedY[i] = Polyval(polyCoeff, correctedX[i]);
|
}
|
|
// 过渡区域处理
|
double transition = TransitionWidth;
|
|
// 左过渡区域
|
double[] originalPolyCoeff = Fit.Polynomial(originalX, originalY, 3).Reverse().ToArray();
|
double xLeftStart = Math.Max(measuredMin - transition, originalX.Min());
|
double yLeftStart = Polyval(originalPolyCoeff, xLeftStart);
|
double yLeftEnd = Polyval(polyCoeff, measuredMin);
|
|
for (int i = 0; i < correctedX.Length; i++)
|
{
|
double x = correctedX[i];
|
if (x >= xLeftStart && x < measuredMin)
|
correctedY[i] = LinearInterp(x, xLeftStart, yLeftStart, measuredMin, yLeftEnd);
|
}
|
|
// 右过渡区域
|
double xRightEnd = Math.Min(measuredMax + transition, originalX.Max());
|
double yRightStart = Polyval(polyCoeff, measuredMax);
|
double yRightEnd = Polyval(originalPolyCoeff, xRightEnd);
|
|
for (int i = 0; i < correctedX.Length; i++)
|
{
|
double x = correctedX[i];
|
if (x > measuredMax && x <= xRightEnd)
|
correctedY[i] = LinearInterp(x, measuredMax, yRightStart, xRightEnd, yRightEnd);
|
}
|
|
return (correctedX, correctedY);
|
}
|
|
// 多项式求值(支持降序系数)
|
private static double Polyval(double[] coefficients, double x)
|
{
|
double result = 0;
|
int degree = coefficients.Length - 1;
|
for (int i = 0; i < coefficients.Length; i++)
|
result += coefficients[i] * Math.Pow(x, degree - i);
|
return result;
|
}
|
|
// 线性插值
|
private static double LinearInterp(double x, double x0, double y0, double x1, double y1)
|
{
|
return x1 != x0
|
? y0 + (y1 - y0) * (x - x0) / (x1 - x0)
|
: y0;
|
}
|
|
}
|
|
|
|
|
|
|
public class PointClassifier
|
{
|
public class DenseArea
|
{
|
public (double Min, double Max) XRange { get; set; }
|
public (double Min, double Max) YRange { get; set; }
|
}
|
|
public (int[] Labels, List<DenseArea> DenseAreas) ClassifyPoints(
|
double[] x,
|
double[] y,
|
string method = "dbscan",
|
Dictionary<string, object> parameters = null)
|
{
|
|
if (x.Length != y.Length)
|
throw new ArgumentException("x 和 y 的长度必须相同");
|
|
// 数据标准化
|
double[,] data = new double[x.Length, 2];
|
for (int i = 0; i < x.Length; i++)
|
{
|
data[i, 0] = x[i];
|
data[i, 1] = y[i];
|
}
|
|
// 计算均值和标准差
|
double[] mean = new double[2];
|
double[] std = new double[2];
|
for (int j = 0; j < 2; j++)
|
{
|
var jList = Enumerable.Range(0, data.GetLength(0)).Select(i => data[i, j]).ToArray();
|
mean[j] = jList.Mean();
|
std[j] = Math.Sqrt(jList.Variance());
|
}
|
|
// 标准化数据
|
double[,] scaledData = new double[x.Length, 2];
|
for (int i = 0; i < x.Length; i++)
|
{
|
scaledData[i, 0] = (data[i, 0] - mean[0]) / std[0];
|
scaledData[i, 1] = (data[i, 1] - mean[1]) / std[1];
|
}
|
|
int[] labels = new int[x.Length];
|
List<DenseArea> denseAreas = new List<DenseArea>();
|
|
// 方法选择
|
if (method == "dbscan")
|
{
|
int min_samples = (int)(x.Length * 0.02);
|
min_samples = min_samples < 5 ? 5 : min_samples;
|
// DBSCAN参数
|
double eps = parameters != null && parameters.ContainsKey("eps") ? (double)parameters["eps"] : 0.3;
|
int minSamples = parameters != null && parameters.ContainsKey("min_samples")
|
? (int)parameters["min_samples"]
|
: min_samples;
|
|
// 计算距离矩阵
|
double[,] distanceMatrix = DistanceMatrix(scaledData);
|
|
// 执行 DBSCAN
|
var dbscan = new DBSCAN(eps, minSamples);
|
labels = dbscan.Run(scaledData);
|
}
|
else if (method == "kde")
|
{
|
// 核密度估计
|
int bandwidth = parameters != null && parameters.ContainsKey("bandwidth")
|
? (int)parameters["bandwidth"]
|
: 1;
|
double percentile = parameters != null && parameters.ContainsKey("percentile")
|
? (double)parameters["percentile"]
|
: 70.0;
|
|
// 计算密度
|
double[] density = new double[x.Length];
|
for (int i = 0; i < x.Length; i++)
|
{
|
double sum = 0;
|
for (int j = 0; j < x.Length; j++)
|
{
|
double distance = Math.Sqrt(
|
Math.Pow(scaledData[i, 0] - scaledData[j, 0], 2) +
|
Math.Pow(scaledData[i, 1] - scaledData[j, 1], 2));
|
sum += Math.Exp(-distance * distance / (2 * bandwidth * bandwidth));
|
}
|
density[i] = sum / (x.Length * 2 * Math.PI * bandwidth * bandwidth);
|
}
|
|
// 计算阈值
|
double threshold = GetPercentile(density, percentile);
|
|
// 标记密集区域
|
labels = density.Select(d => d >= threshold ? 1 : 0).ToArray();
|
}
|
else if (method == "grid")
|
{
|
// 动态网格划分
|
int xBins = parameters != null && parameters.ContainsKey("x_bins")
|
? (int)parameters["x_bins"]
|
: 30;
|
int yBins = parameters != null && parameters.ContainsKey("y_bins")
|
? (int)parameters["y_bins"]
|
: 20;
|
double percentile = parameters != null && parameters.ContainsKey("percentile")
|
? (double)parameters["percentile"]
|
: 75.0;
|
|
// 创建网格
|
double[] xBinsArray = CreateBins(x, xBins);
|
double[] yBinsArray = CreateBins(y, yBins);
|
|
// 计算网格密度
|
int[,] gridDensity = new int[xBins, yBins];
|
for (int i = 0; i < x.Length; i++)
|
{
|
int xBin = Array.BinarySearch(xBinsArray, x[i]);
|
int yBin = Array.BinarySearch(yBinsArray, y[i]);
|
if (xBin >= 0 && xBin < xBins && yBin >= 0 && yBin < yBins)
|
gridDensity[xBin, yBin]++;
|
}
|
|
// 计算阈值
|
double threshold = GetPercentile(gridDensity.Cast<double>().ToArray(), percentile);
|
|
// 标记密集网格
|
for (int i = 0; i < x.Length; i++)
|
{
|
int xBin = Array.BinarySearch(xBinsArray, x[i]);
|
int yBin = Array.BinarySearch(yBinsArray, y[i]);
|
if (xBin >= 0 && xBin < xBins && yBin >= 0 && yBin < yBins)
|
labels[i] = gridDensity[xBin, yBin] > threshold ? 1 : 0;
|
else
|
labels[i] = 0;
|
}
|
}
|
else
|
{
|
throw new ArgumentException("不支持的检测方法");
|
}
|
|
// 提取密集区域
|
var uniqueLabels = labels.Distinct();
|
foreach (var label in uniqueLabels)
|
{
|
if (label == -1) continue; // 排除噪声点
|
|
bool[] mask = labels.Select(l => l == label).ToArray();
|
if (mask.Count(m => m) > x.Length * 0.05) // 过滤小簇
|
{
|
double[,] clusterPoints = new double[mask.Count(m => m), 2];
|
int idx = 0;
|
for (int i = 0; i < mask.Length; i++)
|
{
|
if (mask[i])
|
{
|
clusterPoints[idx, 0] = data[i, 0];
|
clusterPoints[idx, 1] = data[i, 1];
|
idx++;
|
}
|
}
|
|
var xList = Enumerable.Range(0, clusterPoints.GetLength(0)).Select(i => clusterPoints[i, 0]).ToArray();
|
var yList = Enumerable.Range(0, clusterPoints.GetLength(0)).Select(i => clusterPoints[i, 1]).ToArray();
|
|
|
double xMin = xList.Min();
|
double xMax = xList.Max();
|
double yMin = yList.Min();
|
double yMax = yList.Max();
|
|
denseAreas.Add(new DenseArea
|
{
|
XRange = (xMin, xMax),
|
YRange = (yMin, yMax)
|
});
|
}
|
}
|
|
return (labels, denseAreas);
|
}
|
|
private double[] CreateBins(double[] data, int bins)
|
{
|
double min = data.Min();
|
double max = data.Max();
|
double step = (max - min) / bins;
|
return Enumerable.Range(0, bins + 1).Select(i => min + i * step).ToArray();
|
}
|
|
private double GetPercentile(double[] data, double percentile)
|
{
|
Array.Sort(data);
|
int index = (int)Math.Ceiling(percentile / 100.0 * data.Length) - 1;
|
return data[index];
|
}
|
|
private double[,] DistanceMatrix(double[,] data)
|
{
|
int n = data.GetLength(0);
|
double[,] matrix = new double[n, n];
|
for (int i = 0; i < n; i++)
|
{
|
for (int j = i + 1; j < n; j++)
|
{
|
double dx = data[i, 0] - data[j, 0];
|
double dy = data[i, 1] - data[j, 1];
|
matrix[i, j] = matrix[j, i] = Math.Sqrt(dx * dx + dy * dy);
|
}
|
}
|
return matrix;
|
}
|
|
}
|
|
|
public class DBSCAN
|
{
|
private readonly double eps;
|
private readonly int minPts;
|
|
public DBSCAN(double eps, int minPts)
|
{
|
this.eps = eps;
|
this.minPts = minPts;
|
}
|
|
public int[] Run(double[,] points)
|
{
|
int n = points.GetLength(0);
|
int[] labels = new int[n];
|
Array.Fill(labels, -2); // 初始化为未处理状态
|
|
int clusterId = -1;
|
|
for (int i = 0; i < n; i++)
|
{
|
if (labels[i] != -2) continue;
|
|
var neighbors = GetNeighbors(points, i);
|
|
if (neighbors.Count < minPts)
|
{
|
labels[i] = -1; // 标记为噪声点
|
continue;
|
}
|
|
clusterId++;
|
ExpandCluster(points, i, neighbors, clusterId, labels);
|
}
|
|
return labels;
|
}
|
|
private void ExpandCluster(double[,] points, int pointIndex, List<int> neighbors, int clusterId, int[] labels)
|
{
|
labels[pointIndex] = clusterId;
|
|
for (int i = 0; i < neighbors.Count; i++)
|
{
|
int neighborIndex = neighbors[i];
|
if (labels[neighborIndex] == -1)
|
{
|
labels[neighborIndex] = clusterId;
|
}
|
else if (labels[neighborIndex] == -2)
|
{
|
labels[neighborIndex] = clusterId;
|
var newNeighbors = GetNeighbors(points, neighborIndex);
|
if (newNeighbors.Count >= minPts)
|
{
|
neighbors.AddRange(newNeighbors);
|
}
|
}
|
}
|
}
|
|
private List<int> GetNeighbors(double[,] points, int pointIndex)
|
{
|
List<int> neighbors = new List<int>();
|
int n = points.GetLength(0);
|
for (int i = 0; i < n; i++)
|
{
|
double distance = EuclideanDistance(points, pointIndex, i);
|
if (distance < eps)
|
{
|
neighbors.Add(i);
|
}
|
}
|
return neighbors;
|
}
|
|
private double EuclideanDistance(double[,] points, int pointIndex1, int pointIndex2)
|
{
|
double dx = points[pointIndex1, 0] - points[pointIndex2, 0];
|
double dy = points[pointIndex1, 1] - points[pointIndex2, 1];
|
return Math.Sqrt(dx * dx + dy * dy);
|
}
|
}
|
|
}
|