using Accord.Math;
using Accord.Math.Optimization;
using DevExpress.Data.Extensions;
using MathNet.Numerics;
using MathNet.Numerics.LinearAlgebra;
using MathNet.Numerics.LinearRegression;
using MathNet.Numerics.Statistics;
namespace IStation.Test
{
///
/// 泵曲线数据融合校正器(用于合并样条曲线和实测数据并进行优化)
/// 增加了数据聚合和动态多项式拟合阶数的功能
///
public class PumpCurveDataFusionCorrectorHelper2
{
# region 配置参数
// 异常值检测阈值(基于标准正态分布,2.5对应98.76%置信区间)
public const double ZScoreThreshold = 2;
// 过渡区域宽度(单位与输入数据相同,控制曲线衔接平滑度)
public const double TransitionWidth = 500;
// 多项式拟合阶数(三次平衡拟合能力与过拟合风险)
public const int PolynomialDegree = 3;
// 优化器最大迭代次数(Cobyla算法收敛阈值)
private const int OptimizationMaxIterations = 1000;
//动态多项式拟合阶数(用于动态调整)
public int DynamicPolyDegree = 2;
# endregion
///
/// 主校正方法(入口函数)
///
/// 样条曲线X坐标
/// 样条曲线Y坐标
/// 原始实测X坐标
/// 原始实测Y坐标
///
/// mergedX: 融合后的X坐标
/// mergedY: 初步融合的Y坐标
/// optimizedX: 优化后的X坐标采样
/// optimizedY: 优化后的Y坐标结果
///
public (double[] mergedX, double[] mergedY, double[] optimizedX, double[] optimizedY) Corrent(
double[] splineX, double[] splineY,
double[] measuredXAll, double[] measuredYAll)
{
if (measuredXAll.Length != measuredYAll.Length)
{
return default;
}
if (measuredXAll.Length>10000)
{
}
var minX = measuredXAll[0];
var maxX = measuredXAll[^1];
var fv = 0.15;
var threshold = minX + fv * (maxX - minX);
var startIndex = measuredXAll.FindIndex(x => x > threshold);
var measuredXAllFiltered = measuredXAll.Skip(startIndex).ToArray();
var measuredYAllFiltered = measuredYAll.Skip(startIndex).ToArray();
if (measuredXAllFiltered.Length < 5)
{
Console.WriteLine("异常值过滤后数据过小");
return default;
}
#region 步骤1:稳健回归与异常值过滤(最小二乘法)
// 执行简单线性回归获取基准线
(double intercept, double slope) = SimpleRegression.Fit(measuredXAllFiltered, measuredYAllFiltered);
// 计算预测值和残差
double[] predictedY = measuredXAllFiltered.Select(x => slope * x + intercept).ToArray();
double[] residuals = measuredYAllFiltered.Zip(predictedY, (m, p) => m - p).ToArray();
// 基于Z-Score的异常值过滤
var zScoreCalculator = new ZScore(residuals);
bool[] validMask = zScoreCalculator.Scores
.Select(score => Math.Abs(score) < ZScoreThreshold).ToArray();
// 过滤得到有效实测数据
var validData = measuredXAllFiltered.Zip(measuredYAllFiltered, validMask)
.Where(t => t.Third)
.Select(t => (t.First, t.Second)).ToList();
double[] measuredXValid = validData.Select(t => t.First).ToArray();
double[] measuredYValid = validData.Select(t => t.Second).ToArray();
#endregion
if (measuredXValid.Length < 5)
{
Console.WriteLine("异常值过滤后数据过小");
return default;
}
return ProcessData(splineX, splineY, measuredXValid, measuredYValid,
PolynomialDegree, TransitionWidth);
}
///
/// 核心数据处理流程(包含数据融合、过渡处理和优化)
///
private (double[] mergedX, double[] mergedY, double[] optimizedX, double[] optimizedY) ProcessData(
double[] splineX, double[] splineY,
double[] measuredXValid, double[] measuredYValid,
int polyDegree, double transitionWidth)
{
/* labels, areas = classify_points(filtered_first_x, filtered_first_y, method = 'dbscan',
params={ 'eps': 0.25, 'min_samples': 15})
# 动态多项式次数调整
Config.Dynamic_POLY_DEGREE = 2 if len(areas) > 1 else 1*/
int min_samples = (int)(measuredXValid.Length * 0.02);
min_samples = min_samples < 5 ? 5 : min_samples;
var pointClassifier = new PointClassifier();
var (labels, areas) = pointClassifier.ClassifyPoints(measuredXValid, measuredYValid, "dbscan", new Dictionary
{
{ "eps", 0.3 },
{ "min_samples",(int)min_samples}
});
// 动态多项式次数调整
DynamicPolyDegree = areas.Count > 1 ? 2 : 1;
if (areas.Count<1)
{
throw new Exception("areas.Count<1");
}
var measured_x = new List();
var measured_y = new List();
foreach (var item in areas)
{
var x_min = item.XRange.Min;
var x_max = item.XRange.Max;
var y_min = item.YRange.Min;
var y_max = item.YRange.Max;
var mask = measuredXValid
.Select((x, i) => (x >= x_min && x <= x_max && measuredYValid[i] >= y_min && measuredYValid[i] <= y_max))
.ToArray();
for (int i = 0; i < mask.Length; i++)
{
var x = mask[i];
if (x)
{
measured_x.Add(measuredXValid[i]);
measured_y.Add(measuredYValid[i]);
}
}
}
#region
double minX = measuredXValid.Min(), maxX = measuredXValid.Max();
var (mergedX, mergedY) = MergeCurves(splineX, splineY, measured_x.ToArray(), measured_y.ToArray(), DynamicPolyDegree);
#endregion
#region 步骤5:带约束优化
// 数据标准化处理
double xMean = mergedX.Average();
double xStd = ArrayStatistics.PopulationStandardDeviation(mergedX);
double[] xNorm = mergedX.Select(x => (x - xMean) / xStd).ToArray();
// 构建三次多项式优化问题
var X = Matrix.Build.DenseOfRowArrays(
xNorm.Select(x => new[] { Math.Pow(x, 3), x * x, x, 1.0 }).ToArray()
);
var initialParams = MultipleRegression.QR(X, Vector.Build.DenseOfArray(mergedY));
// 设置单调性约束(导数非负)
var constraints = GenerateLinspace(xNorm.Min(), xNorm.Max(), 50)
.Select(x => new NonlinearConstraint(
numberOfVariables: 4,
function: p => -Derivative(p, x), // 导数>=0
shouldBe: ConstraintType.GreaterThanOrEqualTo,
value: 0
)).ToList();
// 执行COBYLA优化
var optimizer = new Cobyla(
function: BuildObjectiveFunction(xNorm, mergedY),
constraints: constraints.ToArray()
)
{ MaxIterations = OptimizationMaxIterations };
bool success = optimizer.Minimize(initialParams.ToArray());
double[] optimizedParams = optimizer.Solution;
#endregion
var a = optimizedParams.Select(x => Math.Round(x, 6)).ToList();
var json = JsonHelper.Object2FormatJson(a);
// 生成最终优化曲线
double[] optimizedX = GenerateLinspace(
Math.Min(splineX.Min(), minX),
Math.Max(splineX.Max(), maxX),
300
);
double[] optimizedY = optimizedX
.Select(x => CubicModel(optimizedParams, (x - xMean) / xStd))
.ToArray();
return (mergedX, mergedY, optimizedX, optimizedY);
}
# region 工具方法
///
/// 三次多项式模型(降幂排列)
///
private static double CubicModel(double[] param, double x) =>
param[0] * x * x * x + param[1] * x * x + param[2] * x + param[3];
///
/// 多项式导数(用于单调性约束)
///
private static double Derivative(double[] param, double x) =>
3 * param[0] * x * x + 2 * param[1] * x + param[2];
///
/// 构建目标函数(均方误差)
///
private NonlinearObjectiveFunction BuildObjectiveFunction(double[] xNorm, double[] y) =>
new NonlinearObjectiveFunction(4, p =>
xNorm.Select((x, i) => Math.Pow(CubicModel(p, x) - y[i], 2)).Average()
);
///
/// 多项式求值(霍纳法则)
///
private static double EvaluatePolynomial(double[] coeffs, double x) =>
coeffs.Aggregate((sum, c) => sum * x + c);
///
/// 生成等间距采样点
///
private static double[] GenerateLinspace(double start, double end, int numPoints) =>
Enumerable.Range(0, numPoints)
.Select(i => start + i * (end - start) / (numPoints - 1))
.ToArray();
# endregion
# region 过渡处理
///
/// 生成合并的 x 值
///
public static double[] GenerateMergedX(double[] splineX, double[] measuredXValid, int numPoints)
{
// 生成等间距的点
double[] linspace = GenerateLinspace(measuredXValid.Min(), measuredXValid.Max(), numPoints);
// 拼接并去重排序
var concat = splineX.Concat(linspace).ToArray();
var merged = concat.Distinct().OrderBy(x => x).ToArray();
return merged;
}
# endregion
# region 辅助类
///
/// Z分数计算器(用于异常值检测)
///
private class ZScore
{
public readonly double[] Scores;
public ZScore(double[] data)
{
double mean = data.Average();
double std = data.StandardDeviation();
Scores = data.Select(x => (x - mean) / std).ToArray();
}
}
#endregion
public static (double[] CorrectedX, double[] CorrectedY) MergeCurves(
double[] originalX, double[] originalY,
double[] measuredX, double[] measuredY,int degree)
{
// 实测数据不足处理
if (measuredX.Length < 3)
{
Console.WriteLine("实测数据不足,使用原始曲线");
return (originalX, originalY);
}
// 多项式拟合实测数据(系数反转保持与numpy一致)
double[] polyCoeff = Fit.Polynomial(measuredX, measuredY, degree).Reverse().ToArray();
// 创建修正曲线副本
double[] correctedX = originalX.Select(x => x).ToArray();
double[] correctedY = originalY.Select(x => x).ToArray();
// 计算实测数据范围
double measuredMin = measuredX.Min();
double measuredMax = measuredX.Max();
// 核心区域修正
for (int i = 0; i < correctedX.Length; i++)
{
if (correctedX[i] >= measuredMin && correctedX[i] <= measuredMax)
correctedY[i] = Polyval(polyCoeff, correctedX[i]);
}
// 过渡区域处理
double transition = TransitionWidth;
// 左过渡区域
double[] originalPolyCoeff = Fit.Polynomial(originalX, originalY, 3).Reverse().ToArray();
double xLeftStart = Math.Max(measuredMin - transition, originalX.Min());
double yLeftStart = Polyval(originalPolyCoeff, xLeftStart);
double yLeftEnd = Polyval(polyCoeff, measuredMin);
for (int i = 0; i < correctedX.Length; i++)
{
double x = correctedX[i];
if (x >= xLeftStart && x < measuredMin)
correctedY[i] = LinearInterp(x, xLeftStart, yLeftStart, measuredMin, yLeftEnd);
}
// 右过渡区域
double xRightEnd = Math.Min(measuredMax + transition, originalX.Max());
double yRightStart = Polyval(polyCoeff, measuredMax);
double yRightEnd = Polyval(originalPolyCoeff, xRightEnd);
for (int i = 0; i < correctedX.Length; i++)
{
double x = correctedX[i];
if (x > measuredMax && x <= xRightEnd)
correctedY[i] = LinearInterp(x, measuredMax, yRightStart, xRightEnd, yRightEnd);
}
return (correctedX, correctedY);
}
// 多项式求值(支持降序系数)
private static double Polyval(double[] coefficients, double x)
{
double result = 0;
int degree = coefficients.Length - 1;
for (int i = 0; i < coefficients.Length; i++)
result += coefficients[i] * Math.Pow(x, degree - i);
return result;
}
// 线性插值
private static double LinearInterp(double x, double x0, double y0, double x1, double y1)
{
return x1 != x0
? y0 + (y1 - y0) * (x - x0) / (x1 - x0)
: y0;
}
}
public class PointClassifier
{
public class DenseArea
{
public (double Min, double Max) XRange { get; set; }
public (double Min, double Max) YRange { get; set; }
}
public (int[] Labels, List DenseAreas) ClassifyPoints(
double[] x,
double[] y,
string method = "dbscan",
Dictionary parameters = null)
{
if (x.Length != y.Length)
throw new ArgumentException("x 和 y 的长度必须相同");
// 数据标准化
double[,] data = new double[x.Length, 2];
for (int i = 0; i < x.Length; i++)
{
data[i, 0] = x[i];
data[i, 1] = y[i];
}
// 计算均值和标准差
double[] mean = new double[2];
double[] std = new double[2];
for (int j = 0; j < 2; j++)
{
var jList = Enumerable.Range(0, data.GetLength(0)).Select(i => data[i, j]).ToArray();
mean[j] = jList.Mean();
std[j] = Math.Sqrt(jList.Variance());
}
// 标准化数据
double[,] scaledData = new double[x.Length, 2];
for (int i = 0; i < x.Length; i++)
{
scaledData[i, 0] = (data[i, 0] - mean[0]) / std[0];
scaledData[i, 1] = (data[i, 1] - mean[1]) / std[1];
}
int[] labels = new int[x.Length];
List denseAreas = new List();
// 方法选择
if (method == "dbscan")
{
int min_samples = (int)(x.Length * 0.02);
min_samples = min_samples < 5 ? 5 : min_samples;
// DBSCAN参数
double eps = parameters != null && parameters.ContainsKey("eps") ? (double)parameters["eps"] : 0.3;
int minSamples = parameters != null && parameters.ContainsKey("min_samples")
? (int)parameters["min_samples"]
: min_samples;
// 计算距离矩阵
double[,] distanceMatrix = DistanceMatrix(scaledData);
// 执行 DBSCAN
var dbscan = new DBSCAN(eps, minSamples);
labels = dbscan.Run(scaledData);
}
else if (method == "kde")
{
// 核密度估计
int bandwidth = parameters != null && parameters.ContainsKey("bandwidth")
? (int)parameters["bandwidth"]
: 1;
double percentile = parameters != null && parameters.ContainsKey("percentile")
? (double)parameters["percentile"]
: 70.0;
// 计算密度
double[] density = new double[x.Length];
for (int i = 0; i < x.Length; i++)
{
double sum = 0;
for (int j = 0; j < x.Length; j++)
{
double distance = Math.Sqrt(
Math.Pow(scaledData[i, 0] - scaledData[j, 0], 2) +
Math.Pow(scaledData[i, 1] - scaledData[j, 1], 2));
sum += Math.Exp(-distance * distance / (2 * bandwidth * bandwidth));
}
density[i] = sum / (x.Length * 2 * Math.PI * bandwidth * bandwidth);
}
// 计算阈值
double threshold = GetPercentile(density, percentile);
// 标记密集区域
labels = density.Select(d => d >= threshold ? 1 : 0).ToArray();
}
else if (method == "grid")
{
// 动态网格划分
int xBins = parameters != null && parameters.ContainsKey("x_bins")
? (int)parameters["x_bins"]
: 30;
int yBins = parameters != null && parameters.ContainsKey("y_bins")
? (int)parameters["y_bins"]
: 20;
double percentile = parameters != null && parameters.ContainsKey("percentile")
? (double)parameters["percentile"]
: 75.0;
// 创建网格
double[] xBinsArray = CreateBins(x, xBins);
double[] yBinsArray = CreateBins(y, yBins);
// 计算网格密度
int[,] gridDensity = new int[xBins, yBins];
for (int i = 0; i < x.Length; i++)
{
int xBin = Array.BinarySearch(xBinsArray, x[i]);
int yBin = Array.BinarySearch(yBinsArray, y[i]);
if (xBin >= 0 && xBin < xBins && yBin >= 0 && yBin < yBins)
gridDensity[xBin, yBin]++;
}
// 计算阈值
double threshold = GetPercentile(gridDensity.Cast().ToArray(), percentile);
// 标记密集网格
for (int i = 0; i < x.Length; i++)
{
int xBin = Array.BinarySearch(xBinsArray, x[i]);
int yBin = Array.BinarySearch(yBinsArray, y[i]);
if (xBin >= 0 && xBin < xBins && yBin >= 0 && yBin < yBins)
labels[i] = gridDensity[xBin, yBin] > threshold ? 1 : 0;
else
labels[i] = 0;
}
}
else
{
throw new ArgumentException("不支持的检测方法");
}
// 提取密集区域
var uniqueLabels = labels.Distinct();
foreach (var label in uniqueLabels)
{
if (label == -1) continue; // 排除噪声点
bool[] mask = labels.Select(l => l == label).ToArray();
if (mask.Count(m => m) > x.Length * 0.05) // 过滤小簇
{
double[,] clusterPoints = new double[mask.Count(m => m), 2];
int idx = 0;
for (int i = 0; i < mask.Length; i++)
{
if (mask[i])
{
clusterPoints[idx, 0] = data[i, 0];
clusterPoints[idx, 1] = data[i, 1];
idx++;
}
}
var xList = Enumerable.Range(0, clusterPoints.GetLength(0)).Select(i => clusterPoints[i, 0]).ToArray();
var yList = Enumerable.Range(0, clusterPoints.GetLength(0)).Select(i => clusterPoints[i, 1]).ToArray();
double xMin = xList.Min();
double xMax = xList.Max();
double yMin = yList.Min();
double yMax = yList.Max();
denseAreas.Add(new DenseArea
{
XRange = (xMin, xMax),
YRange = (yMin, yMax)
});
}
}
return (labels, denseAreas);
}
private double[] CreateBins(double[] data, int bins)
{
double min = data.Min();
double max = data.Max();
double step = (max - min) / bins;
return Enumerable.Range(0, bins + 1).Select(i => min + i * step).ToArray();
}
private double GetPercentile(double[] data, double percentile)
{
Array.Sort(data);
int index = (int)Math.Ceiling(percentile / 100.0 * data.Length) - 1;
return data[index];
}
private double[,] DistanceMatrix(double[,] data)
{
int n = data.GetLength(0);
double[,] matrix = new double[n, n];
for (int i = 0; i < n; i++)
{
for (int j = i + 1; j < n; j++)
{
double dx = data[i, 0] - data[j, 0];
double dy = data[i, 1] - data[j, 1];
matrix[i, j] = matrix[j, i] = Math.Sqrt(dx * dx + dy * dy);
}
}
return matrix;
}
}
public class DBSCAN
{
private readonly double eps;
private readonly int minPts;
public DBSCAN(double eps, int minPts)
{
this.eps = eps;
this.minPts = minPts;
}
public int[] Run(double[,] points)
{
int n = points.GetLength(0);
int[] labels = new int[n];
Array.Fill(labels, -2); // 初始化为未处理状态
int clusterId = -1;
for (int i = 0; i < n; i++)
{
if (labels[i] != -2) continue;
var neighbors = GetNeighbors(points, i);
if (neighbors.Count < minPts)
{
labels[i] = -1; // 标记为噪声点
continue;
}
clusterId++;
ExpandCluster(points, i, neighbors, clusterId, labels);
}
return labels;
}
private void ExpandCluster(double[,] points, int pointIndex, List neighbors, int clusterId, int[] labels)
{
labels[pointIndex] = clusterId;
for (int i = 0; i < neighbors.Count; i++)
{
int neighborIndex = neighbors[i];
if (labels[neighborIndex] == -1)
{
labels[neighborIndex] = clusterId;
}
else if (labels[neighborIndex] == -2)
{
labels[neighborIndex] = clusterId;
var newNeighbors = GetNeighbors(points, neighborIndex);
if (newNeighbors.Count >= minPts)
{
neighbors.AddRange(newNeighbors);
}
}
}
}
private List GetNeighbors(double[,] points, int pointIndex)
{
List neighbors = new List();
int n = points.GetLength(0);
for (int i = 0; i < n; i++)
{
double distance = EuclideanDistance(points, pointIndex, i);
if (distance < eps)
{
neighbors.Add(i);
}
}
return neighbors;
}
private double EuclideanDistance(double[,] points, int pointIndex1, int pointIndex2)
{
double dx = points[pointIndex1, 0] - points[pointIndex2, 0];
double dy = points[pointIndex1, 1] - points[pointIndex2, 1];
return Math.Sqrt(dx * dx + dy * dy);
}
}
}