From a67da735b33be01b24845ce03ae7551cf55ddbbc Mon Sep 17 00:00:00 2001 From: ningshuxia <ningshuxia0927@outlook.com> Date: 星期三, 16 四月 2025 09:08:16 +0800 Subject: [PATCH] 代码修改i --- 02-desktop/Desktop/IStation.Test/helper/PumpCurveDataFusionCorrectorHelper2.cs | 903 +++++++++++++++++++++++-------------------------------- 1 files changed, 377 insertions(+), 526 deletions(-) diff --git a/02-desktop/Desktop/IStation.Test/helper/PumpCurveDataFusionCorrectorHelper2.cs b/02-desktop/Desktop/IStation.Test/helper/PumpCurveDataFusionCorrectorHelper2.cs index f8f9e50..a523b79 100644 --- a/02-desktop/Desktop/IStation.Test/helper/PumpCurveDataFusionCorrectorHelper2.cs +++ b/02-desktop/Desktop/IStation.Test/helper/PumpCurveDataFusionCorrectorHelper2.cs @@ -1,21 +1,23 @@ 锘縰sing Accord.Math; using Accord.Math.Optimization; -using DevExpress.Data.Extensions; +using Dbscan; +using Dbscan.RBush; using MathNet.Numerics; using MathNet.Numerics.LinearAlgebra; using MathNet.Numerics.LinearRegression; +using MathNet.Numerics.Optimization; using MathNet.Numerics.Statistics; +using System.Data; + namespace IStation.Test { - /// <summary> - /// 娉垫洸绾挎暟鎹瀺鍚堟牎姝e櫒锛堢敤浜庡悎骞舵牱鏉℃洸绾垮拰瀹炴祴鏁版嵁骞惰繘琛屼紭鍖栵級 - /// 澧炲姞浜嗘暟鎹仛鍚堝拰鍔ㄦ�佸椤瑰紡鎷熷悎闃舵暟鐨勫姛鑳� + /// 娉垫洸绾挎暟鎹瀺鍚堟牎姝e櫒锛堟�ц兘浼樺寲鐗堬級 /// </summary> public class PumpCurveDataFusionCorrectorHelper2 { - # region 閰嶇疆鍙傛暟 + #region 閰嶇疆鍙傛暟 // 寮傚父鍊兼娴嬮槇鍊硷紙鍩轰簬鏍囧噯姝f�佸垎甯冿紝2.5瀵瑰簲98.76%缃俊鍖洪棿锛� public const double ZScoreThreshold = 2; // 杩囨浮鍖哄煙瀹藉害锛堝崟浣嶄笌杈撳叆鏁版嵁鐩稿悓锛屾帶鍒舵洸绾胯鎺ュ钩婊戝害锛� @@ -25,46 +27,31 @@ // 浼樺寲鍣ㄦ渶澶ц凯浠f鏁帮紙Cobyla绠楁硶鏀舵暃闃堝�硷級 private const int OptimizationMaxIterations = 1000; - //鍔ㄦ�佸椤瑰紡鎷熷悎闃舵暟锛堢敤浜庡姩鎬佽皟鏁达級 + // 鍔ㄦ�佸椤瑰紡鎷熷悎闃舵暟锛堢敤浜庡姩鎬佽皟鏁达級 public int DynamicPolyDegree = 2; - # endregion + // 娴嬮噺鏁版嵁杩囨护姣斾緥%锛堟帶鍒跺紓甯稿�艰繃婊ゅ己搴︼級 + public double MeasuredXFilterRatio = 0.15; - /// <summary> - /// 涓绘牎姝f柟娉曪紙鍏ュ彛鍑芥暟锛� - /// </summary> - /// <param name="splineX">鏍锋潯鏇茬嚎X鍧愭爣</param> - /// <param name="splineY">鏍锋潯鏇茬嚎Y鍧愭爣</param> - /// <param name="measuredXAllFiltered">鍘熷瀹炴祴X鍧愭爣</param> - /// <param name="measuredYAllFiltered">鍘熷瀹炴祴Y鍧愭爣</param> - /// <returns> - /// mergedX: 铻嶅悎鍚庣殑X鍧愭爣 - /// mergedY: 鍒濇铻嶅悎鐨刌鍧愭爣 - /// optimizedX: 浼樺寲鍚庣殑X鍧愭爣閲囨牱 - /// optimizedY: 浼樺寲鍚庣殑Y鍧愭爣缁撴灉 - /// </returns> + #endregion + + public (double[] mergedX, double[] mergedY, double[] optimizedX, double[] optimizedY) Corrent( double[] splineX, double[] splineY, double[] measuredXAll, double[] measuredYAll) { - if (measuredXAll.Length != measuredYAll.Length) - { return default; - } - - - var minX = measuredXAll[0]; - var maxX = measuredXAll[^1]; - - var fv = 0.15; - var threshold = minX + fv * (maxX - minX); - var startIndex = measuredXAll.FindIndex(x => x > threshold); - - var measuredXAllFiltered = measuredXAll.Skip(startIndex).ToArray(); - var measuredYAllFiltered = measuredYAll.Skip(startIndex).ToArray(); + // 浼樺寲1锛氫娇鐢ˋrray.BinarySearch鏇夸唬FindIndex + double threshold = measuredXAll[0] + MeasuredXFilterRatio * (measuredXAll[^1] - measuredXAll[0]); + int startIndex = Array.BinarySearch(measuredXAll, threshold); + startIndex = (startIndex < 0) ? ~startIndex : startIndex; + + // 浼樺寲2锛氭暟缁勫垏鐗囦唬鏇縎kip + var measuredXAllFiltered = new ArraySegment<double>(measuredXAll, startIndex, measuredXAll.Length - startIndex).ToArray(); + var measuredYAllFiltered = new ArraySegment<double>(measuredYAll, startIndex, measuredYAll.Length - startIndex).ToArray(); if (measuredXAllFiltered.Length < 5) { @@ -72,159 +59,167 @@ return default; } - #region 姝ラ1锛氱ǔ鍋ュ洖褰掍笌寮傚父鍊艰繃婊わ紙鏈�灏忎簩涔樻硶锛� - // 鎵ц绠�鍗曠嚎鎬у洖褰掕幏鍙栧熀鍑嗙嚎 + #region 绋冲仴鍥炲綊浼樺寲 + // 浼樺寲3锛氶鍒嗛厤鏁扮粍 + var residuals = new double[measuredXAllFiltered.Length]; (double intercept, double slope) = SimpleRegression.Fit(measuredXAllFiltered, measuredYAllFiltered); - // 璁$畻棰勬祴鍊煎拰娈嬪樊 - double[] predictedY = measuredXAllFiltered.Select(x => slope * x + intercept).ToArray(); - double[] residuals = measuredYAllFiltered.Zip(predictedY, (m, p) => m - p).ToArray(); - - // 鍩轰簬Z-Score鐨勫紓甯稿�艰繃婊� - var zScoreCalculator = new ZScore(residuals); - bool[] validMask = zScoreCalculator.Scores - .Select(score => Math.Abs(score) < ZScoreThreshold).ToArray(); - - // 杩囨护寰楀埌鏈夋晥瀹炴祴鏁版嵁 - var validData = measuredXAllFiltered.Zip(measuredYAllFiltered, validMask) - .Where(t => t.Third) - .Select(t => (t.First, t.Second)).ToList(); - double[] measuredXValid = validData.Select(t => t.First).ToArray(); - double[] measuredYValid = validData.Select(t => t.Second).ToArray(); - #endregion - - - - if (measuredXValid.Length < 5) + // 浼樺寲4锛氬悎骞惰绠楀惊鐜� + double meanResidual = 0, stdResidual = 0; + for (int i = 0; i < measuredXAllFiltered.Length; i++) { - Console.WriteLine("寮傚父鍊艰繃婊ゅ悗鏁版嵁杩囧皬"); - return default; + double predicted = slope * measuredXAllFiltered[i] + intercept; + residuals[i] = measuredYAllFiltered[i] - predicted; + meanResidual += residuals[i]; } + meanResidual /= residuals.Length; + for (int i = 0; i < residuals.Length; i++) + { + stdResidual += Math.Pow(residuals[i] - meanResidual, 2); + } + stdResidual = Math.Sqrt(stdResidual / residuals.Length); - return ProcessData(splineX, splineY, measuredXValid, measuredYValid, - PolynomialDegree, TransitionWidth); + + // 浼樺寲5锛氬苟琛岃繃婊� + var validData = new List<(double X, double Y)>(measuredXAllFiltered.Length); + for (int i = 0; i < residuals.Length; i++) + { + if (Math.Abs((residuals[i] - meanResidual) / stdResidual) < ZScoreThreshold) + { + validData.Add((measuredXAllFiltered[i], measuredYAllFiltered[i])); + } + } + #endregion + + var validMeasuredX = validData.Select(d => d.X).ToArray(); + var validMeasuredY = validData.Select(d => d.Y).ToArray(); + + + return ProcessData(splineX, splineY, validMeasuredX, validMeasuredY); } - /// <summary> - /// 鏍稿績鏁版嵁澶勭悊娴佺▼锛堝寘鍚暟鎹瀺鍚堛�佽繃娓″鐞嗗拰浼樺寲锛� - /// </summary> + + private (double[] mergedX, double[] mergedY, double[] optimizedX, double[] optimizedY) ProcessData( double[] splineX, double[] splineY, - double[] measuredXValid, double[] measuredYValid, - int polyDegree, double transitionWidth) + double[] measuredXValid, double[] measuredYValid) { + if (measuredXValid.Length < 5) + return default; + // 浼樺寲6锛氱紦瀛樻瀬鍊艰绠� + double minX = measuredXValid[0], maxX = measuredXValid[^1]; - /* labels, areas = classify_points(filtered_first_x, filtered_first_y, method = 'dbscan', - params={ 'eps': 0.25, 'min_samples': 15}) - # 鍔ㄦ�佸椤瑰紡娆℃暟璋冩暣 - Config.Dynamic_POLY_DEGREE = 2 if len(areas) > 1 else 1*/ - var pointClassifier = new PointClassifier(); - var (labels, areas) = pointClassifier.ClassifyPoints(measuredXValid, measuredYValid, "dbscan", new Dictionary<string, object> + var processor = new DBSCANProcessor(); + var areas = processor.PerformClustering(measuredXValid, measuredYValid); + + if (areas.Count < 1&& measuredXValid.Length>500) { - { "eps", 0.25 }, - { "min_samples", 15 } - }); - - // 鍔ㄦ�佸椤瑰紡娆℃暟璋冩暣 + Console.WriteLine("areas<1 error------------"); + } DynamicPolyDegree = areas.Count > 1 ? 2 : 1; + // 浼樺寲8锛氶鍒嗛厤闆嗗悎瀹归噺 + var measured_x = new List<double>(measuredXValid.Length); + var measured_y = new List<double>(measuredYValid.Length); - /* - - # 鏁版嵁绛涢�� - measured_x = [] - measured_y = [] - for idx, ((x_min, x_max), (y_min, y_max)) in enumerate(areas): - mask = (filtered_first_x >= x_min) & (filtered_first_x <= x_max) & (filtered_first_y >= y_min) & (filtered_first_y <= y_max) - measured_x.extend(filtered_first_x[mask]) - measured_y.extend(filtered_first_y[mask]) - - */ - - var measured_x = new List<double>(); - var measured_y = new List<double>(); - - foreach (var item in areas) + foreach (var (MinX, MaxX, MinY, MaxY) in areas) { - var x_min = item.XRange.Min; - var x_max = item.XRange.Max; - var y_min = item.YRange.Min; - var y_max = item.YRange.Max; - var mask = measuredXValid - .Select((x, i) => (x >= x_min && x <= x_max && measuredYValid[i] >= y_min && measuredYValid[i] <= y_max)) - .ToArray(); - for (int i = 0; i < mask.Length; i++) + // 浼樺寲9锛氬悜閲忓寲鑼冨洿鍒ゆ柇 + for (int i = 0; i < measuredXValid.Length; i++) { - var x = mask[i]; - if (x) + if (measuredXValid[i].Between(MinX, MaxX) && + measuredYValid[i].Between(MinY, MaxY)) { measured_x.Add(measuredXValid[i]); measured_y.Add(measuredYValid[i]); } } } - - #region - - double minX = measuredXValid.Min(), maxX = measuredXValid.Max(); - var (mergedX, mergedY) = MergeCurves(splineX, splineY, measured_x.ToArray(), measured_y.ToArray(), DynamicPolyDegree); - #endregion - #region 姝ラ5锛氬甫绾︽潫浼樺寲 + // 浼樺寲10锛氬悎骞舵洸绾跨敓鎴愪紭鍖� + var (mergedX, mergedY) = MergeCurvesOptimized(splineX, splineY, + measured_x.ToArray(), measured_y.ToArray(), DynamicPolyDegree); - // 鏁版嵁鏍囧噯鍖栧鐞� - double xMean = mergedX.Average(); - double xStd = ArrayStatistics.PopulationStandardDeviation(mergedX); - double[] xNorm = mergedX.Select(x => (x - xMean) / xStd).ToArray(); + // 浼樺寲11锛氱煩闃佃繍绠椾紭鍖� + double xMean = mergedX.Mean(); + double xStd = mergedX.StandardDeviation(); + var xNorm = mergedX.Select(x => (x - xMean) / xStd).ToArray(); - // 鏋勫缓涓夋澶氶」寮忎紭鍖栭棶棰� var X = Matrix<double>.Build.DenseOfRowArrays( - xNorm.Select(x => new[] { Math.Pow(x, 3), x * x, x, 1.0 }).ToArray() - ); + xNorm.Select(x => new[] { Math.Pow(x, 3), x * x, x, 1.0 }).ToArray()); + + + // 浼樺寲12锛歈R鍒嗚В鍔犻�� var initialParams = MultipleRegression.QR(X, Vector<double>.Build.DenseOfArray(mergedY)); - - - - // 璁剧疆鍗曡皟鎬х害鏉燂紙瀵兼暟闈炶礋锛� - var constraints = GenerateLinspace(xNorm.Min(), xNorm.Max(), 50) - .Select(x => new NonlinearConstraint( - numberOfVariables: 4, - function: p => -Derivative(p, x), // 瀵兼暟>=0 - shouldBe: ConstraintType.GreaterThanOrEqualTo, - value: 0 - )).ToList(); - - // 鎵цCOBYLA浼樺寲 - var optimizer = new Cobyla( - function: BuildObjectiveFunction(xNorm, mergedY), - constraints: constraints.ToArray() - ) + // 浼樺寲13锛氱害鏉熺敓鎴愪紭鍖� + var constraints = GenerateConstraints(xNorm, 50); + var optimizer = new Cobyla(BuildObjectiveFunction(xNorm, mergedY), constraints.ToArray()) { MaxIterations = OptimizationMaxIterations }; - bool success = optimizer.Minimize(initialParams.ToArray()); - double[] optimizedParams = optimizer.Solution; - #endregion + if (!optimizer.Minimize(initialParams.ToArray())) + throw new OptimizationException("Optimization failed"); - - - var a = optimizedParams.Select(x => Math.Round(x, 6)).ToList(); - var json = JsonHelper.Object2FormatJson(a); - // 鐢熸垚鏈�缁堜紭鍖栨洸绾� - double[] optimizedX = GenerateLinspace( - Math.Min(splineX.Min(), minX), - Math.Max(splineX.Max(), maxX), - 300 - ); - double[] optimizedY = optimizedX - .Select(x => CubicModel(optimizedParams, (x - xMean) / xStd)) - .ToArray(); + // 浼樺寲14锛氶噰鏍风偣鐢熸垚浼樺寲 + double[] optimizedX = GenerateOptimizedX(splineX, minX, maxX); + double[] optimizedY = optimizedX.Select(x => CubicModel(optimizer.Solution, (x - xMean) / xStd)).ToArray(); return (mergedX, mergedY, optimizedX, optimizedY); + } - # region 宸ュ叿鏂规硶 + + private static double[] GenerateOptimizedX(double[] splineX, double minX, double maxX) + { + double lower = Math.Min(splineX[0], minX); + double upper = Math.Max(splineX[^1], maxX); + return Enumerable.Range(0, 300) + .Select(i => lower + i * (upper - lower) / 299.0) + .ToArray(); + } + + private List<NonlinearConstraint> GenerateConstraints(double[] xNorm, int sampleCount) + { + var constraints = new List<NonlinearConstraint>(sampleCount); + double min = xNorm[0], max = xNorm[^1]; + double step = (max - min) / (sampleCount - 1); + + for (int i = 0; i < sampleCount; i++) + { + double x = min + i * step; + constraints.Add(new NonlinearConstraint(4, + p => -Derivative(p, x), + ConstraintType.GreaterThanOrEqualTo, 0)); + } + return constraints; + } + + private static (double[] X, double[] Y) MergeCurvesOptimized( + double[] originalX, double[] originalY, + double[] measuredX, double[] measuredY, int degree) + { + if (measuredX.Length < 3) + return (originalX, originalY); + + // 浼樺寲15锛氬椤瑰紡鎷熷悎鍔犻�� + var polyCoeff = Fit.Polynomial(measuredX, measuredY, degree).Reverse().ToArray(); + + // 浼樺寲16锛氬唴瀛樺鐢� + var correctedY = new double[originalY.Length]; + Array.Copy(originalY, correctedY, originalY.Length); + + double measuredMin = measuredX[0], measuredMax = measuredX[^1]; + + // 浼樺寲17锛氬苟琛屽尯鍩熷鐞� + Parallel.Invoke( + () => ProcessCoreRegion(originalX, correctedY, polyCoeff, measuredMin, measuredMax), + () => ProcessTransitionRegions(originalX, correctedY, originalY, polyCoeff, measuredMin, measuredMax) + ); + + return (originalX, correctedY); + } + /// <summary> /// 涓夋澶氶」寮忔ā鍨嬶紙闄嶅箓鎺掑垪锛� @@ -238,7 +233,7 @@ private static double Derivative(double[] param, double x) => 3 * param[0] * x * x + 2 * param[1] * x + param[2]; - + /// <summary> /// 鏋勫缓鐩爣鍑芥暟锛堝潎鏂硅宸級 /// </summary> @@ -246,122 +241,6 @@ new NonlinearObjectiveFunction(4, p => xNorm.Select((x, i) => Math.Pow(CubicModel(p, x) - y[i], 2)).Average() ); - - - - /// <summary> - /// 澶氶」寮忔眰鍊硷紙闇嶇撼娉曞垯锛� - /// </summary> - private static double EvaluatePolynomial(double[] coeffs, double x) => - coeffs.Aggregate((sum, c) => sum * x + c); - - /// <summary> - /// 鐢熸垚绛夐棿璺濋噰鏍风偣 - /// </summary> - private static double[] GenerateLinspace(double start, double end, int numPoints) => - Enumerable.Range(0, numPoints) - .Select(i => start + i * (end - start) / (numPoints - 1)) - .ToArray(); - # endregion - - # region 杩囨浮澶勭悊 - - - /// <summary> - /// 鐢熸垚鍚堝苟鐨� x 鍊� - /// </summary> - public static double[] GenerateMergedX(double[] splineX, double[] measuredXValid, int numPoints) - { - // 鐢熸垚绛夐棿璺濈殑鐐� - double[] linspace = GenerateLinspace(measuredXValid.Min(), measuredXValid.Max(), numPoints); - // 鎷兼帴骞跺幓閲嶆帓搴� - var concat = splineX.Concat(linspace).ToArray(); - var merged = concat.Distinct().OrderBy(x => x).ToArray(); - return merged; - } - - # endregion - - # region 杈呭姪绫� - - /// <summary> - /// Z鍒嗘暟璁$畻鍣紙鐢ㄤ簬寮傚父鍊兼娴嬶級 - /// </summary> - private class ZScore - { - public readonly double[] Scores; - - public ZScore(double[] data) - { - double mean = data.Average(); - double std = data.StandardDeviation(); - Scores = data.Select(x => (x - mean) / std).ToArray(); - } - } - - #endregion - - - - public static (double[] CorrectedX, double[] CorrectedY) MergeCurves( - double[] originalX, double[] originalY, - double[] measuredX, double[] measuredY,int degree) - { - // 瀹炴祴鏁版嵁涓嶈冻澶勭悊 - if (measuredX.Length < 3) - { - Console.WriteLine("瀹炴祴鏁版嵁涓嶈冻锛屼娇鐢ㄥ師濮嬫洸绾�"); - return (originalX, originalY); - } - - // 澶氶」寮忔嫙鍚堝疄娴嬫暟鎹紙绯绘暟鍙嶈浆淇濇寔涓巒umpy涓�鑷达級 - double[] polyCoeff = Fit.Polynomial(measuredX, measuredY, degree).Reverse().ToArray(); - - // 鍒涘缓淇鏇茬嚎鍓湰 - double[] correctedX = originalX.Select(x => x).ToArray(); - double[] correctedY = originalY.Select(x => x).ToArray(); - - // 璁$畻瀹炴祴鏁版嵁鑼冨洿 - double measuredMin = measuredX.Min(); - double measuredMax = measuredX.Max(); - - // 鏍稿績鍖哄煙淇 - for (int i = 0; i < correctedX.Length; i++) - { - if (correctedX[i] >= measuredMin && correctedX[i] <= measuredMax) - correctedY[i] = Polyval(polyCoeff, correctedX[i]); - } - - // 杩囨浮鍖哄煙澶勭悊 - double transition = TransitionWidth; - - // 宸﹁繃娓″尯鍩� - double[] originalPolyCoeff = Fit.Polynomial(originalX, originalY, 3).Reverse().ToArray(); - double xLeftStart = Math.Max(measuredMin - transition, originalX.Min()); - double yLeftStart = Polyval(originalPolyCoeff, xLeftStart); - double yLeftEnd = Polyval(polyCoeff, measuredMin); - - for (int i = 0; i < correctedX.Length; i++) - { - double x = correctedX[i]; - if (x >= xLeftStart && x < measuredMin) - correctedY[i] = LinearInterp(x, xLeftStart, yLeftStart, measuredMin, yLeftEnd); - } - - // 鍙宠繃娓″尯鍩� - double xRightEnd = Math.Min(measuredMax + transition, originalX.Max()); - double yRightStart = Polyval(polyCoeff, measuredMax); - double yRightEnd = Polyval(originalPolyCoeff, xRightEnd); - - for (int i = 0; i < correctedX.Length; i++) - { - double x = correctedX[i]; - if (x > measuredMax && x <= xRightEnd) - correctedY[i] = LinearInterp(x, measuredMax, yRightStart, xRightEnd, yRightEnd); - } - - return (correctedX, correctedY); - } // 澶氶」寮忔眰鍊硷紙鏀寔闄嶅簭绯绘暟锛� private static double Polyval(double[] coefficients, double x) @@ -381,310 +260,282 @@ : y0; } - } - - - - - - - public class PointClassifier - { - public class DenseArea + private static void ProcessCoreRegion(double[] x, double[] y, double[] coeff, double min, double max) { - public (double Min, double Max) XRange { get; set; } - public (double Min, double Max) YRange { get; set; } + for (int i = 0; i < x.Length; i++) + { + if (x[i].Between(min, max)) + y[i] = Polyval(coeff, x[i]); + } } - public (int[] Labels, List<DenseArea> DenseAreas) ClassifyPoints( - double[] x, - double[] y, - string method = "dbscan", - Dictionary<string, object> parameters = null) + private static void ProcessTransitionRegions(double[] x, double[] y, double[] originalY, + double[] coeff, double measuredMin, double measuredMax) { + // 宸﹁繃娓″鐞� + double xLeftStart = Math.Max(measuredMin - TransitionWidth, x[0]); + double yLeftStart = Polyval(Fit.Polynomial(x, originalY, 3).Reverse().ToArray(), xLeftStart); + double yLeftEnd = Polyval(coeff, measuredMin); - if (x.Length != y.Length) - throw new ArgumentException("x 鍜� y 鐨勯暱搴﹀繀椤荤浉鍚�"); + // 鍙宠繃娓″鐞� + double xRightEnd = Math.Min(measuredMax + TransitionWidth, x[^1]); + double yRightStart = Polyval(coeff, measuredMax); + double yRightEnd = Polyval(Fit.Polynomial(x, originalY, 3).Reverse().ToArray(), xRightEnd); + for (int i = 0; i < x.Length; i++) + { + double xi = x[i]; + if (xi.Between(xLeftStart, measuredMin)) + y[i] = LinearInterp(xi, xLeftStart, yLeftStart, measuredMin, yLeftEnd); + else if (xi.Between(measuredMax, xRightEnd)) + y[i] = LinearInterp(xi, measuredMax, yRightStart, xRightEnd, yRightEnd); + } + } + + } + + public class DBSCANProcessor + { + public readonly struct PointData : IPointData + { + public Point Point { get; } + public PointData(double x, double y) => Point = new Point(x, y); + } + + private double _xMean; + private double _xStd; + private double _yMean; + private double _yStd; + + private double _minDensity = 0.05; + + public List<(double MinX, double MaxX, double MinY, double MaxY)> PerformClustering(double[] x, double[] y) + { // 鏁版嵁鏍囧噯鍖� - double[,] data = new double[x.Length, 2]; - for (int i = 0; i < x.Length; i++) + var (ScaledData, Points, XMean, XStd, YMean, YStd) = OptimizedScaleData(x, y); + + _xMean = XMean; + _xStd = XStd; + _yMean = YMean; + _yStd = YStd; + + var minValidCount = Math.Min(5, x.Length * _minDensity); + + // 鑷姩鍙傛暟璁$畻 + var minPts = CalculateMinPts(Points.Count); + var eps = EpsOptimizer.ComputeEps(ScaledData, minPts); + + + // 鎵цDBSCAN鑱氱被 + var clusters = DbscanRBush.CalculateClusters( + Points, + epsilon: eps, + minimumPointsPerCluster: minPts); + + var list = new List<(double MinX, double MaxX, double MinY, double MaxY)>(); + if (clusters.Clusters.Any()) { - data[i, 0] = x[i]; - data[i, 1] = y[i]; - } - - // 璁$畻鍧囧�煎拰鏍囧噯宸� - double[] mean = new double[2]; - double[] std = new double[2]; - for (int j = 0; j < 2; j++) - { - var jList = Enumerable.Range(0, data.GetLength(0)).Select(i => data[i, j]).ToArray(); - mean[j] = jList.Mean(); - std[j] = Math.Sqrt(jList.Variance()); - } - - // 鏍囧噯鍖栨暟鎹� - double[,] scaledData = new double[x.Length, 2]; - for (int i = 0; i < x.Length; i++) - { - scaledData[i, 0] = (data[i, 0] - mean[0]) / std[0]; - scaledData[i, 1] = (data[i, 1] - mean[1]) / std[1]; - } - - int[] labels = new int[x.Length]; - List<DenseArea> denseAreas = new List<DenseArea>(); - - // 鏂规硶閫夋嫨 - if (method == "dbscan") - { - // DBSCAN鍙傛暟 - double eps = parameters != null && parameters.ContainsKey("eps") ? (double)parameters["eps"] : 0.3; - int minSamples = parameters != null && parameters.ContainsKey("min_samples") - ? (int)parameters["min_samples"] - : (int)(x.Length * 0.02); - - // 璁$畻璺濈鐭╅樀 - double[,] distanceMatrix = DistanceMatrix(scaledData); - - // 鎵ц DBSCAN - var dbscan = new DBSCAN(eps, minSamples); - labels = dbscan.Run(scaledData); - } - else if (method == "kde") - { - // 鏍稿瘑搴︿及璁� - int bandwidth = parameters != null && parameters.ContainsKey("bandwidth") - ? (int)parameters["bandwidth"] - : 1; - double percentile = parameters != null && parameters.ContainsKey("percentile") - ? (double)parameters["percentile"] - : 70.0; - - // 璁$畻瀵嗗害 - double[] density = new double[x.Length]; - for (int i = 0; i < x.Length; i++) + foreach (var item in clusters.Clusters) { - double sum = 0; - for (int j = 0; j < x.Length; j++) - { - double distance = Math.Sqrt( - Math.Pow(scaledData[i, 0] - scaledData[j, 0], 2) + - Math.Pow(scaledData[i, 1] - scaledData[j, 1], 2)); - sum += Math.Exp(-distance * distance / (2 * bandwidth * bandwidth)); - } - density[i] = sum / (x.Length * 2 * Math.PI * bandwidth * bandwidth); + if (item.Objects.Count < minValidCount) + continue; + list.Add(StandardizeData(item.Objects.ToList())); } + list = list.OrderBy(x => x.MinX).ToList(); + } - // 璁$畻闃堝�� - double threshold = GetPercentile(density, percentile); - - // 鏍囪瀵嗛泦鍖哄煙 - labels = density.Select(d => d >= threshold ? 1 : 0).ToArray(); - } - else if (method == "grid") - { - // 鍔ㄦ�佺綉鏍煎垝鍒� - int xBins = parameters != null && parameters.ContainsKey("x_bins") - ? (int)parameters["x_bins"] - : 30; - int yBins = parameters != null && parameters.ContainsKey("y_bins") - ? (int)parameters["y_bins"] - : 20; - double percentile = parameters != null && parameters.ContainsKey("percentile") - ? (double)parameters["percentile"] - : 75.0; - - // 鍒涘缓缃戞牸 - double[] xBinsArray = CreateBins(x, xBins); - double[] yBinsArray = CreateBins(y, yBins); - - // 璁$畻缃戞牸瀵嗗害 - int[,] gridDensity = new int[xBins, yBins]; - for (int i = 0; i < x.Length; i++) - { - int xBin = Array.BinarySearch(xBinsArray, x[i]); - int yBin = Array.BinarySearch(yBinsArray, y[i]); - if (xBin >= 0 && xBin < xBins && yBin >= 0 && yBin < yBins) - gridDensity[xBin, yBin]++; - } - - // 璁$畻闃堝�� - double threshold = GetPercentile(gridDensity.Cast<double>().ToArray(), percentile); - - // 鏍囪瀵嗛泦缃戞牸 - for (int i = 0; i < x.Length; i++) - { - int xBin = Array.BinarySearch(xBinsArray, x[i]); - int yBin = Array.BinarySearch(yBinsArray, y[i]); - if (xBin >= 0 && xBin < xBins && yBin >= 0 && yBin < yBins) - labels[i] = gridDensity[xBin, yBin] > threshold ? 1 : 0; - else - labels[i] = 0; - } - } - else - { - throw new ArgumentException("涓嶆敮鎸佺殑妫�娴嬫柟娉�"); - } - - // 鎻愬彇瀵嗛泦鍖哄煙 - var uniqueLabels = labels.Distinct(); - foreach (var label in uniqueLabels) - { - if (label == -1) continue; // 鎺掗櫎鍣0鐐� - - bool[] mask = labels.Select(l => l == label).ToArray(); - if (mask.Count(m => m) > x.Length * 0.05) // 杩囨护灏忕皣 - { - double[,] clusterPoints = new double[mask.Count(m => m), 2]; - int idx = 0; - for (int i = 0; i < mask.Length; i++) - { - if (mask[i]) - { - clusterPoints[idx, 0] = data[i, 0]; - clusterPoints[idx, 1] = data[i, 1]; - idx++; - } - } - - var xList = Enumerable.Range(0, clusterPoints.GetLength(0)).Select(i => clusterPoints[i, 0]).ToArray(); - var yList = Enumerable.Range(0, clusterPoints.GetLength(0)).Select(i => clusterPoints[i, 1]).ToArray(); - - - double xMin = xList.Min(); - double xMax = xList.Max(); - double yMin = yList.Min(); - double yMax = yList.Max(); - - denseAreas.Add(new DenseArea - { - XRange = (xMin, xMax), - YRange = (yMin, yMax) - }); - } - } - - return (labels, denseAreas); + return list; } - private double[] CreateBins(double[] data, int bins) + public (double[,] ScaledData, List<PointData> Points, double XMean, double XStd, double YMean, double YStd) OptimizedScaleData(double[] x, double[] y) { - double min = data.Min(); - double max = data.Max(); - double step = (max - min) / bins; - return Enumerable.Range(0, bins + 1).Select(i => min + i * step).ToArray(); - } + int len = x.Length; + var scaled = new double[len, 2]; + var points = new List<PointData>(); - private double GetPercentile(double[] data, double percentile) - { - Array.Sort(data); - int index = (int)Math.Ceiling(percentile / 100.0 * data.Length) - 1; - return data[index]; - } - - private double[,] DistanceMatrix(double[,] data) - { - int n = data.GetLength(0); - double[,] matrix = new double[n, n]; - for (int i = 0; i < n; i++) + // 璁$畻 x 鐨勫潎鍊煎拰鏍囧噯宸� + _xMean = 0; + _xStd = 0; + for (int i = 0; i < len; i++) { - for (int j = i + 1; j < n; j++) - { - double dx = data[i, 0] - data[j, 0]; - double dy = data[i, 1] - data[j, 1]; - matrix[i, j] = matrix[j, i] = Math.Sqrt(dx * dx + dy * dy); - } + _xMean += x[i]; } - return matrix; + _xMean /= len; + + double xSumSq = 0; + for (int i = 0; i < len; i++) + { + xSumSq += (x[i] - _xMean) * (x[i] - _xMean); + } + _xStd = Math.Sqrt(xSumSq / len); + + // 璁$畻 y 鐨勫潎鍊煎拰鏍囧噯宸� + _yMean = 0; + _yStd = 0; + for (int i = 0; i < len; i++) + { + _yMean += y[i]; + } + _yMean /= len; + + double ySumSq = 0; + for (int i = 0; i < len; i++) + { + ySumSq += (y[i] - _yMean) * (y[i] - _yMean); + } + _yStd = Math.Sqrt(ySumSq / len); + + // 鏍囧噯鍖栨暟鎹苟鐢熸垚 PointData 瀵硅薄 + for (int i = 0; i < len; i++) + { + double xScaled = (x[i] - _xMean) / _xStd; + double yScaled = (y[i] - _yMean) / _yStd; + + scaled[i, 0] = xScaled; + scaled[i, 1] = yScaled; + + points.Add(new PointData(xScaled, yScaled)); + } + + return (scaled, points, _xMean, _xStd, _yMean, _yStd); + } + + private (double MinX, double MaxX, double MinY, double MaxY) StandardizeData(List<PointData> points) + { + int len = points.Count; + var originalX = new double[len]; + var originalY = new double[len]; + + for (int i = 0; i < len; i++) + { + originalX[i] = points[i].Point.X * _xStd + _xMean; + originalY[i] = points[i].Point.Y * _yStd + _yMean; + } + + var minX = originalX.Min(x => x); + var maxX = originalX.Max(x => x); + + var minY = originalY.Min(x => x); + var maxY = originalY.Max(x => x); + + + return (minX, maxX, minY, maxY); + } + + private int CalculateMinPts(int dataSize) + { + // 鍩轰簬鏁版嵁瑙勬ā鐨勫姩鎬佽绠楋紙澧炲姞瀹夊叏闃堝�硷級 + return Math.Max(10, (int)(dataSize * 0.02)); } } - - - public class DBSCAN + public static class EpsOptimizer { - private readonly double eps; - private readonly int minPts; - - public DBSCAN(double eps, int minPts) + public static double ComputeEps(double[,] scaledData, int minSamples) { - this.eps = eps; - this.minPts = minPts; - } + int k = Math.Max(1, minSamples); + var distances = new List<double>(); - public int[] Run(double[,] points) - { - int n = points.GetLength(0); - int[] labels = new int[n]; - Array.Fill(labels, -2); // 鍒濆鍖栦负鏈鐞嗙姸鎬� - - int clusterId = -1; - - for (int i = 0; i < n; i++) + // 浣跨敤蹇�熼�夋嫨浼樺寲绗琸杩戦偦璁$畻 + for (int i = 0; i < scaledData.GetLength(0); i++) { - if (labels[i] != -2) continue; - - var neighbors = GetNeighbors(points, i); - - if (neighbors.Count < minPts) + var tempDists = new List<double>(); + for (int j = 0; j < scaledData.GetLength(0); j++) { - labels[i] = -1; // 鏍囪涓哄櫔澹扮偣 - continue; + if (i == j) continue; + double dx = scaledData[i, 0] - scaledData[j, 0]; + double dy = scaledData[i, 1] - scaledData[j, 1]; + tempDists.Add(dx * dx + dy * dy); } - clusterId++; - ExpandCluster(points, i, neighbors, clusterId, labels); + if (tempDists.Count >= k) + { + double kthDistSq = QuickSelect(tempDists, k); + distances.Add(Math.Sqrt(kthDistSq)); + } } - return labels; - } + // 寮傚父澶勭悊涓庤竟鐣屾娴� + if (distances.Count == 0) return 0.5; + distances.Sort(); - private void ExpandCluster(double[,] points, int pointIndex, List<int> neighbors, int clusterId, int[] labels) - { - labels[pointIndex] = clusterId; + // 棰勮绠楀墠缂�鍜屼互鍔犻�熺獥鍙e钩鍧囧�艰绠� + double[] prefixSum = new double[distances.Count + 1]; + for (int i = 0; i < distances.Count; i++) + prefixSum[i + 1] = prefixSum[i] + distances[i]; - for (int i = 0; i < neighbors.Count; i++) + // 鍔ㄦ�佺獥鍙f洸鐜囨娴� + double maxCurvature = double.MinValue; + double eps = distances[1]; // 浣跨敤 C# 8 鐨勭储寮曡娉曟浛浠� Last() + int windowSize = Math.Max(1, distances.Count / 20); + windowSize = Math.Clamp(windowSize, 1, 5); + + for (int i = windowSize; i < distances.Count - windowSize; i++) { - int neighborIndex = neighbors[i]; - if (labels[neighborIndex] == -1) + int prevStart = i - windowSize; + double prevSum = prefixSum[prevStart + windowSize] - prefixSum[prevStart]; + double prevAvg = prevSum / windowSize; + + int nextStart = i; + double nextSum = prefixSum[nextStart + windowSize] - prefixSum[nextStart]; + double nextAvg = nextSum / windowSize; + + double curvature = nextAvg - prevAvg; + if (curvature > maxCurvature) { - labels[neighborIndex] = clusterId; - } - else if (labels[neighborIndex] == -2) - { - labels[neighborIndex] = clusterId; - var newNeighbors = GetNeighbors(points, neighborIndex); - if (newNeighbors.Count >= minPts) - { - neighbors.AddRange(newNeighbors); - } + maxCurvature = curvature; + eps = distances[i]; } } + + return eps * 1.15; } - private List<int> GetNeighbors(double[,] points, int pointIndex) + // 蹇�熼�夋嫨绠楁硶瀹炵幇 + private static double QuickSelect(List<double> list, int k) { - List<int> neighbors = new List<int>(); - int n = points.GetLength(0); - for (int i = 0; i < n; i++) + int left = 0; + int right = list.Count - 1; + var rand = new Random(); + while (left <= right) { - double distance = EuclideanDistance(points, pointIndex, i); - if (distance < eps) + int pivotIndex = Partition(list, left, right, rand.Next(left, right + 1)); + if (pivotIndex == k - 1) return list[pivotIndex]; + if (pivotIndex < k - 1) left = pivotIndex + 1; + else right = pivotIndex - 1; + } + return double.NaN; // 鐞嗚涓嶄細鎵ц鍒版 + } + + private static int Partition(List<double> list, int left, int right, int pivotIndex) + { + double pivotValue = list[pivotIndex]; + Swap(list, pivotIndex, right); + int storeIndex = left; + for (int i = left; i < right; i++) + { + if (list[i] < pivotValue) { - neighbors.Add(i); + Swap(list, i, storeIndex); + storeIndex++; } } - return neighbors; + Swap(list, storeIndex, right); + return storeIndex; } - private double EuclideanDistance(double[,] points, int pointIndex1, int pointIndex2) + private static void Swap(List<double> list, int i, int j) { - double dx = points[pointIndex1, 0] - points[pointIndex2, 0]; - double dy = points[pointIndex1, 1] - points[pointIndex2, 1]; - return Math.Sqrt(dx * dx + dy * dy); + (list[i], list[j]) = (list[j], list[i]); // 浣跨敤鍏冪粍浜ゆ崲 } } -} + public static class ArrayExtensions + { + public static double Mean(this double[] array) => ArrayStatistics.Mean(array); + public static double StandardDeviation(this double[] array) => Math.Sqrt(ArrayStatistics.Variance(array)); + + public static bool Between(this double value, double min, double max) => + value >= min && value <= max; + } +} \ No newline at end of file -- Gitblit v1.9.3