From bb3caa0cd41bd56f496125934491759b4d865733 Mon Sep 17 00:00:00 2001
From: rp <rp@outlook.com>
Date: 星期三, 09 四月 2025 21:58:26 +0800
Subject: [PATCH] 修复备份

---
 xg_优化备份.py |  611 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 611 insertions(+), 0 deletions(-)

diff --git "a/xg_\344\274\230\345\214\226\345\244\207\344\273\275.py" "b/xg_\344\274\230\345\214\226\345\244\207\344\273\275.py"
new file mode 100644
index 0000000..7e8061c
--- /dev/null
+++ "b/xg_\344\274\230\345\214\226\345\244\207\344\273\275.py"
@@ -0,0 +1,611 @@
+import os
+import pickle
+import pandas as pd
+import numpy as np
+import tkinter as tk
+import tkinter.font as tkfont
+from tkinter import ttk
+from datetime import timedelta
+from time import time
+import matplotlib.pyplot as plt
+from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk
+from xgboost import XGBRegressor
+from lunardate import LunarDate
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+import matplotlib
+
+# 閰嶇疆 matplotlib 涓枃鏄剧ず
+matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'SimSun', 'Arial Unicode MS']
+matplotlib.rcParams['axes.unicode_minus'] = False
+matplotlib.rcParams['font.family'] = 'sans-serif'
+
+# 鍏ㄥ眬缂撳瓨鍙橀噺鍙婄壒寰佸悕绉帮紙姝ゅ feature_columns 浠呬负鍗犱綅锛�
+cached_model = None
+last_training_time = None
+feature_columns = None
+
+# -------------------------------
+# 鏁版嵁鍔犺浇涓庨澶勭悊鍑芥暟
+# -------------------------------
+def load_data(upstream_file, downstream_file, qinglong_lake_file=None):
+    try:
+        upstream_df = pd.read_csv(upstream_file)
+        downstream_df = pd.read_csv(downstream_file)
+        if qinglong_lake_file:
+            qinglong_lake_df = pd.read_csv(qinglong_lake_file)
+    except FileNotFoundError:
+        print("鏂囦欢鏈壘鍒帮紝璇锋鏌ヨ矾寰�")
+        return None
+
+    # 鍋囪鍘熷鏁版嵁鍒椾緷娆′负 ['DateTime', 'TagName', 'Value']
+    upstream_df.columns = ['DateTime', 'TagName', 'Value']
+    downstream_df.columns = ['DateTime', 'TagName', 'Value']
+    if qinglong_lake_file:
+        qinglong_lake_df.columns = ['DateTime', 'TagName', 'Value']
+
+    # 杞崲鏃堕棿鏍煎紡鍙婃暟鍊煎鐞�
+    upstream_df['DateTime'] = pd.to_datetime(upstream_df['DateTime'])
+    downstream_df['DateTime'] = pd.to_datetime(downstream_df['DateTime'])
+    if qinglong_lake_file:
+        qinglong_lake_df['DateTime'] = pd.to_datetime(qinglong_lake_df['DateTime'])
+    upstream_df['Value'] = pd.to_numeric(upstream_df['Value'], errors='coerce')
+    downstream_df['Value'] = pd.to_numeric(downstream_df['Value'], errors='coerce')
+    if qinglong_lake_file:
+        qinglong_lake_df['Value'] = pd.to_numeric(qinglong_lake_df['Value'], errors='coerce')
+
+    # 杩囨护鐩愬害灏忎簬5鐨勬暟鎹�
+    upstream_df = upstream_df[upstream_df['Value'] >= 5]
+    downstream_df = downstream_df[downstream_df['Value'] >= 5]
+    if qinglong_lake_file:
+        qinglong_lake_df = qinglong_lake_df[qinglong_lake_df['Value'] >= 5]
+
+    # 灏�0鏇挎崲涓篘aN锛屽苟鍒╃敤3鍊嶆爣鍑嗗樊娉曞鐞嗗紓甯稿��
+    for df in [upstream_df, downstream_df]:
+        df.loc[df['Value'] == 0, 'Value'] = np.nan
+        mean_val, std_val = df['Value'].mean(), df['Value'].std()
+        lower_bound, upper_bound = mean_val - 3 * std_val, mean_val + 3 * std_val
+        df.loc[(df['Value'] < lower_bound) | (df['Value'] > upper_bound), 'Value'] = np.nan
+    if qinglong_lake_file:
+        qinglong_lake_df.loc[qinglong_lake_df['Value'] == 0, 'Value'] = np.nan
+        mean_val, std_val = qinglong_lake_df['Value'].mean(), qinglong_lake_df['Value'].std()
+        lower_bound, upper_bound = mean_val - 3 * std_val, mean_val + 3 * std_val
+        qinglong_lake_df.loc[(qinglong_lake_df['Value'] < lower_bound) | (qinglong_lake_df['Value'] > upper_bound), 'Value'] = np.nan
+
+    # 閲嶅懡鍚� Value 鍒楀苟淇濈暀闇�瑕佺殑鍒�
+    upstream_df = upstream_df.rename(columns={'Value': 'upstream'})[['DateTime', 'upstream']]
+    downstream_df = downstream_df.rename(columns={'Value': 'downstream'})[['DateTime', 'downstream']]
+    if qinglong_lake_file:
+        qinglong_lake_df = qinglong_lake_df.rename(columns={'Value': 'qinglong_lake'})[['DateTime', 'qinglong_lake']]
+
+    # 鍚堝苟鏁版嵁
+    merged_df = pd.merge(upstream_df, downstream_df, on='DateTime', how='inner')
+    if qinglong_lake_file:
+        merged_df = pd.merge(merged_df, qinglong_lake_df, on='DateTime', how='left')
+
+    print(f"鍚堝苟鍓嶆暟鎹鏁�: {len(merged_df)}")
+    merged_df = merged_df.set_index('DateTime')
+
+    # 鎻掑�硷細鍏堢敤绾挎�э紝鍐嶇敤鏃堕棿鎻掑�硷紝鏈�鍚庣敤鍓嶅悜鍚庡悜濉厖
+    merged_df['upstream'] = merged_df['upstream'].interpolate(method='linear', limit=4)
+    merged_df['downstream'] = merged_df['downstream'].interpolate(method='linear', limit=4)
+    if qinglong_lake_file:
+        merged_df['qinglong_lake'] = merged_df['qinglong_lake'].interpolate(method='linear', limit=4)
+    merged_df['upstream'] = merged_df['upstream'].interpolate(method='time', limit=24)
+    merged_df['downstream'] = merged_df['downstream'].interpolate(method='time', limit=24)
+    if qinglong_lake_file:
+        merged_df['qinglong_lake'] = merged_df['qinglong_lake'].interpolate(method='time', limit=24)
+    merged_df['upstream'] = merged_df['upstream'].fillna(method='ffill').fillna(method='bfill')
+    merged_df['downstream'] = merged_df['downstream'].fillna(method='ffill').fillna(method='bfill')
+    if qinglong_lake_file:
+        merged_df['qinglong_lake'] = merged_df['qinglong_lake'].fillna(method='ffill').fillna(method='bfill')
+
+    # 骞虫粦澶勭悊锛氫娇鐢ㄦ粦鍔ㄧ獥鍙gЩ鍔ㄥ钩鍧�
+    merged_df['upstream_smooth'] = merged_df['upstream'].rolling(window=24, min_periods=1, center=True).mean()
+    merged_df['downstream_smooth'] = merged_df['downstream'].rolling(window=24, min_periods=1, center=True).mean()
+    if qinglong_lake_file:
+        merged_df['qinglong_lake_smooth'] = merged_df['qinglong_lake'].rolling(window=24, min_periods=1, center=True).mean()
+    # 瀵逛綆鐩愬害閮ㄥ垎鐢ㄦ洿澶х獥鍙e钩婊�
+    low_sal_mask = merged_df['upstream'] < 50
+    if low_sal_mask.any():
+        merged_df.loc[low_sal_mask, 'upstream_smooth'] = merged_df.loc[low_sal_mask, 'upstream']\
+            .rolling(window=48, min_periods=1, center=True).mean()
+
+    merged_df = merged_df.dropna()
+    merged_df = merged_df[merged_df['upstream'].apply(np.isfinite)]
+    merged_df = merged_df[merged_df['downstream'].apply(np.isfinite)]
+    if qinglong_lake_file:
+        merged_df = merged_df[merged_df['qinglong_lake'].apply(np.isfinite)]
+    merged_df = merged_df.reset_index()
+    print(f"娓呮礂鍚庢暟鎹鏁�: {len(merged_df)}")
+    print(f"涓婃父鐩愬害鑼冨洿: {merged_df['upstream'].min()} - {merged_df['upstream'].max()}")
+    print(f"涓嬫父鐩愬害鑼冨洿: {merged_df['downstream'].min()} - {merged_df['downstream'].max()}")
+    if qinglong_lake_file:
+        print(f"闈掗緳婀栫洂搴﹁寖鍥�: {merged_df['qinglong_lake'].min()} - {merged_df['qinglong_lake'].max()}")
+    merged_df = merged_df.sort_values('DateTime')
+    return merged_df
+
+# -------------------------------
+# 娣诲姞鍐滃巻锛堟疆姹愶級鐗瑰緛
+# -------------------------------
+def add_lunar_features(df):
+    lunar_day, lunar_phase_sin, lunar_phase_cos, is_high_tide = [], [], [], []
+    for dt in df['DateTime']:
+        ld = LunarDate.fromSolarDate(dt.year, dt.month, dt.day)
+        lunar_day.append(ld.day)
+        lunar_phase_sin.append(np.sin(2 * np.pi * ld.day / 15))
+        lunar_phase_cos.append(np.cos(2 * np.pi * ld.day / 15))
+        is_high_tide.append(1 if (ld.day <= 5 or (ld.day >= 16 and ld.day <= 20)) else 0)
+    df['lunar_day'] = lunar_day
+    df['lunar_phase_sin'] = lunar_phase_sin
+    df['lunar_phase_cos'] = lunar_phase_cos
+    df['is_high_tide'] = is_high_tide
+    return df
+
+# -------------------------------
+# 鎵归噺鐢熸垚寤惰繜鐗瑰緛锛堝悜閲忓寲锛屽埄鐢� shift锛�
+# -------------------------------
+def batch_create_delay_features(df, delay_hours):
+    for delay in delay_hours:
+        df[f'upstream_delay_{delay}h'] = df['upstream'].shift(delay)
+        df[f'downstream_delay_{delay}h'] = df['downstream'].shift(delay)
+    return df
+
+# -------------------------------
+# 鍚戦噺鍖栨瀯閫犺缁冩牱鏈紙浼樺寲鐗瑰緛宸ョ▼锛�
+# -------------------------------
+def create_features_vectorized(df, look_back=96, forecast_horizon=5):
+    """
+    鍒╃敤 numpy 鐨� sliding_window_view 瀵瑰巻鍙茬獥鍙c�佷笅娓哥獥鍙c�佹爣绛捐繘琛屾壒閲忓垏鐗囷紝
+    鍏朵粬鐗瑰緛锛堟椂闂淬�佸啘鍘嗐�佺粺璁°�佸欢杩熺壒寰侊級鐩存帴鎵归噺璇诲彇鍚庢嫾鎺�
+    """
+    # 杩欓噷瀹氫箟 total_samples 涓猴細
+    total_samples = len(df) - look_back - forecast_horizon + 1
+    if total_samples <= 0:
+        print("鏁版嵁涓嶈冻浠ュ垱寤虹壒寰�")
+        return np.array([]), np.array([])
+
+    # 鍒╃敤 sliding_window_view 鏋勯�犲巻鍙茬獥鍙o紙涓婃父杩炵画 look_back 涓暟鎹級
+    upstream_array = df['upstream'].values  # shape (n,)
+    # 婊戝姩绐楀彛锛岀粨鏋� shape (n - look_back + 1, look_back)
+    from numpy.lib.stride_tricks import sliding_window_view
+    window_up = sliding_window_view(upstream_array, window_shape=look_back)[:total_samples, :]
+    
+    # 涓嬫父鏈�杩� 24 灏忔椂锛氬埄鐢ㄦ粦鍔ㄧ獥鍙f瀯閫狅紝绐楀彛澶у皬涓� 24
+    downstream_array = df['downstream'].values
+    window_down_full = sliding_window_view(downstream_array, window_shape=24)
+    # 瀵逛簬鏍囩鍜屼笅娓哥獥鍙o紝鍘熼�昏緫锛氬彇 df['downstream'].iloc[i+look_back-24:i+look_back]
+    # 鍒欏搴旂储寮曚负 i+look_back-24锛� i 浠� 0 鍒� total_samples-1
+    window_down = window_down_full[look_back-24 : look_back-24 + total_samples, :]
+
+    # 鏃堕棿鐗瑰緛涓庡啘鍘嗙壒寰佺瓑锛氬彇鏍峰尯闂翠负 df.iloc[look_back: len(df)-forecast_horizon+1]
+    sample_df = df.iloc[look_back: len(df)-forecast_horizon+1].copy()
+    basic_time = sample_df['DateTime'].dt.hour.values.reshape(-1, 1) / 24.0
+    weekday = sample_df['DateTime'].dt.dayofweek.values.reshape(-1, 1) / 7.0
+    month = sample_df['DateTime'].dt.month.values.reshape(-1, 1) / 12.0
+    basic_time_feats = np.hstack([basic_time, weekday, month])
+    
+    lunar_feats = sample_df[['lunar_phase_sin','lunar_phase_cos','is_high_tide']].values
+    # 缁熻鐗瑰緛锛堥鍏堝埄鐢� rolling 宸茶绠楀ソ锛屾敞鎰忓彇鍑哄搴旇锛�
+    try:
+        stats_up = sample_df[['mean_1d_up','mean_3d_up','std_1d_up','max_1d_up','min_1d_up']].values
+        stats_down = sample_df[['mean_1d_down','mean_3d_down','std_1d_down','max_1d_down','min_1d_down']].values
+    except KeyError as e:
+        print(f"缁熻鐗瑰緛鍒椾笉瀛樺湪: {e}锛岃纭繚鍏堣绠楃粺璁$壒寰�")
+        return np.array([]), np.array([])
+    
+    # 寤惰繜鐗瑰緛锛氬亣璁炬墍鏈夊欢杩熺壒寰佸垪鍚嶅潎浠� "upstream_delay_" 鎴� "downstream_delay_" 寮�澶�
+    delay_cols = [col for col in sample_df.columns if col.startswith('upstream_delay_') or col.startswith('downstream_delay_')]
+    delay_feats = sample_df[delay_cols].values
+
+    # 鎷兼帴鎵�鏈夌壒寰侊細鍏堝皢鍘嗗彶绐楀彛锛坵indow_up锛変笌涓嬫父绐楀彛锛坵indow_down锛夋嫾鎺ワ紝鍐嶆嫾鎺ュ叾浠栫壒寰�
+    X = np.hstack([window_up, window_down, basic_time_feats, lunar_feats, stats_up, stats_down, delay_feats])
+    
+    # 鏋勯�犳爣绛撅細鍒╃敤婊戝姩绐楀彛鏋勯�� forecast_horizon 鍐呯殑涓嬫父鏁版嵁
+    label_full = sliding_window_view(downstream_array, window_shape=forecast_horizon)
+    # 鏍囩鍖洪棿瀵瑰簲浠� index = look_back 鍒� look_back + total_samples
+    y = label_full[look_back: look_back + total_samples, :]
+    global feature_columns
+    feature_columns = ["combined_vector_features"] 
+    print(f"鍚戦噺鍖栫壒寰佸伐绋嬪畬鎴愶紝鏈夋晥鏍锋湰鏁�: {X.shape[0]}")
+    return X, y
+
+# -------------------------------
+# 鑾峰彇妯″瀷鍑嗙‘搴︽寚鏍�
+# -------------------------------
+def get_model_metrics():
+    """鑾峰彇淇濆瓨鍦ㄦā鍨嬬紦瀛樹腑鐨勫噯纭害鎸囨爣"""
+    model_cache_file = 'salinity_model.pkl'
+    if os.path.exists(model_cache_file):
+        try:
+            with open(model_cache_file, 'rb') as f:
+                model_data = pickle.load(f)
+                return {
+                    'rmse': model_data.get('rmse', None),
+                    'mae': model_data.get('mae', None)
+                }
+        except Exception as e:
+            print(f"鑾峰彇妯″瀷鎸囨爣澶辫触: {e}")
+    return None
+
+# -------------------------------
+# 妯″瀷璁粌涓庨娴嬶紝灞曠ず楠岃瘉鍑嗙‘搴︼紙RMSE, MAE锛�
+# -------------------------------
+def train_and_predict(df, start_time, force_retrain=False):
+    global cached_model, last_training_time
+    model_cache_file = 'salinity_model.pkl'
+    model_needs_training = True
+
+    if os.path.exists(model_cache_file) and force_retrain:
+        try:
+            os.remove(model_cache_file)
+            print("宸插垹闄ゆ棫妯″瀷缂撳瓨锛堝己鍒堕噸鏂拌缁冿級")
+        except Exception as e:
+            print("鍒犻櫎缂撳瓨寮傚父:", e)
+
+    train_df = df[df['DateTime'] < start_time].copy()
+    if not force_retrain and cached_model is not None and last_training_time is not None:
+        if last_training_time >= train_df['DateTime'].max():
+            model_needs_training = False
+            print(f"浣跨敤缂撳瓨妯″瀷锛岃缁冩椂闂�: {last_training_time}")
+    elif not force_retrain and os.path.exists(model_cache_file):
+        try:
+            with open(model_cache_file, 'rb') as f:
+                model_data = pickle.load(f)
+                cached_model = model_data['model']
+                last_training_time = model_data['training_time']
+                if last_training_time >= train_df['DateTime'].max():
+                    model_needs_training = False
+                    print(f"浠庢枃浠跺姞杞芥ā鍨嬶紝璁粌鏃堕棿: {last_training_time}")
+        except Exception as e:
+            print("鍔犺浇妯″瀷澶辫触:", e)
+
+    if model_needs_training:
+        print("寮�濮嬭缁冩柊妯″瀷...")
+        if len(train_df) < 100:
+            print("璁粌鏁版嵁涓嶈冻")
+            return None, None, None, None
+
+        start_train = time()
+        X, y = create_features_vectorized(train_df, look_back=96, forecast_horizon=5)
+        if len(X) == 0 or len(y) == 0:
+            print("鏍锋湰鐢熸垚涓嶈冻锛岃缁冪粓姝�")
+            return None, None, None, None
+        print(f"璁粌鏍锋湰鏁伴噺: {X.shape[0]}")
+        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
+        model = XGBRegressor(
+            n_estimators=300,
+            learning_rate=0.03,
+            max_depth=5,
+            min_child_weight=2,
+            subsample=0.85,
+            colsample_bytree=0.85,
+            gamma=0.1,
+            reg_alpha=0.2,
+            reg_lambda=1.5,
+            n_jobs=-1,
+            random_state=42
+        )
+        try:
+            model.fit(X_train, y_train,
+                      eval_set=[(X_val, y_val)], eval_metric='rmse',
+                      early_stopping_rounds=20, verbose=False)
+            # 鍦ㄩ獙璇侀泦涓婅绠� RMSE 鍜� MAE
+            y_val_pred = model.predict(X_val)
+            rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
+            mae = mean_absolute_error(y_val, y_val_pred)
+            print(f"楠岃瘉闆� RMSE: {rmse:.4f}, MAE: {mae:.4f}")
+            last_training_time = start_time
+            cached_model = model
+            with open(model_cache_file, 'wb') as f:
+                pickle.dump({
+                    'model': model,
+                    'training_time': last_training_time,
+                    'feature_columns': feature_columns,
+                    'rmse': rmse,
+                    'mae': mae
+                }, f)
+            print(f"妯″瀷璁粌瀹屾垚锛岃�楁椂: {time() - start_train:.2f}绉�")
+        except Exception as e:
+            print("妯″瀷璁粌寮傚父:", e)
+            return None, None, None, None
+    else:
+        model = cached_model
+
+    # 棰勬祴閮ㄥ垎锛氭瀯閫犲崟涓娴嬫牱鏈紙涓庤缁冩椂鐗瑰緛鏋勯�犱竴鑷达級
+    try:
+        # 杩欓噷閲囩敤涓� create_features_vectorized 绫讳技鐨勬�濊矾鏋勯�犻娴嬫牱鏈�
+        # 鍙栨渶杩戞暟鎹冻澶熸瀯鎴愬巻鍙茬獥鍙e拰鍏朵粬鐗瑰緛
+        n = len(df)
+        if n < 96 + 5:
+            print("棰勬祴鏁版嵁涓嶈冻")
+            return None, None, None, None
+
+        # 浣跨敤 sliding_window_view 鏋勯�犳渶鏂扮殑涓婃父鍜屼笅娓哥獥鍙�
+        upstream_array = df['upstream'].values
+        window_up = np.lib.stride_tricks.sliding_window_view(upstream_array, window_shape=96)[-1, :]
+        downstream_array = df['downstream'].values
+        window_down = np.lib.stride_tricks.sliding_window_view(downstream_array, window_shape=24)[-1, :]
+
+        # 鏃堕棿鐗瑰緛鍜屽啘鍘嗙壒寰佸熀浜庡綋鍓嶉娴嬪紑濮嬫椂鍒�
+        hour_norm = start_time.hour / 24.0
+        weekday_norm = start_time.dayofweek / 7.0
+        month_norm = start_time.month / 12.0
+        basic_time_feats = np.array([hour_norm, weekday_norm, month_norm]).reshape(1, -1)
+        ld = LunarDate.fromSolarDate(start_time.year, start_time.month, start_time.day)
+        lunar_feats = np.array([np.sin(2*np.pi*ld.day/15),
+                                np.cos(2*np.pi*ld.day/15),
+                                1 if (ld.day <=5 or (ld.day >=16 and ld.day<=20)) else 0]).reshape(1, -1)
+
+        # 缁熻鐗瑰緛锛氱敤鏈�鏂� 24/72 灏忔椂鏁版嵁锛堝彇鏈熬24/72锛�
+        try:
+            # 浼樺厛浣跨敤DataFrame涓凡璁$畻鐨勭粺璁$壒寰�
+            stats_up = df[['mean_1d_up','mean_3d_up','std_1d_up','max_1d_up','min_1d_up']].iloc[-1:].values
+            stats_down = df[['mean_1d_down','mean_3d_down','std_1d_down','max_1d_down','min_1d_down']].iloc[-1:].values
+        except KeyError:
+            # 濡傛灉涓嶅瓨鍦紝鍒欑洿鎺ヨ绠�
+            recent_up = df['upstream'].values[-24:]
+            stats_up = np.array([np.mean(recent_up),
+                                np.mean(df['upstream'].values[-72:]),
+                                np.std(recent_up),
+                                np.max(recent_up),
+                                np.min(recent_up)]).reshape(1, -1)
+            recent_down = df['downstream'].values[-24:]
+            stats_down = np.array([np.mean(recent_down),
+                                    np.mean(df['downstream'].values[-72:]),
+                                    np.std(recent_down),
+                                    np.max(recent_down),
+                                    np.min(recent_down)]).reshape(1, -1)
+
+        # 寤惰繜鐗瑰緛锛氱洿鎺ヤ粠鏈�鍚庝竴琛屽欢杩熺壒寰佸彇鍊�
+        delay_cols = [col for col in df.columns if col.startswith('upstream_delay_') or col.startswith('downstream_delay_')]
+        delay_feats = df[delay_cols].iloc[-1:].values  # shape (1, ?)
+
+        # 鎷兼帴鎵�鏈夐娴嬬壒寰�
+        X_pred = np.hstack([window_up.reshape(1, -1), 
+                            window_down.reshape(1, -1),
+                            basic_time_feats, lunar_feats, stats_up, stats_down, delay_feats])
+        if np.isnan(X_pred).any() or np.isinf(X_pred).any():
+            X_pred = np.nan_to_num(X_pred, nan=0.0, posinf=1e6, neginf=-1e6)
+        predictions = model.predict(X_pred)
+        # 鐢熸垚鏈潵鏃ユ湡鏍囩锛堥娴嬫湭鏉� 5 澶╋級
+        future_dates = [start_time + timedelta(days=i) for i in range(5)]
+        print("棰勬祴瀹屾垚")
+        
+        # 鑾峰彇妯″瀷鎸囨爣
+        metrics = None
+        if os.path.exists(model_cache_file):
+            try:
+                with open(model_cache_file, 'rb') as f:
+                    model_data = pickle.load(f)
+                    metrics = {
+                        'rmse': model_data.get('rmse', None),
+                        'mae': model_data.get('mae', None)
+                    }
+            except Exception as e:
+                print(f"鑾峰彇妯″瀷鎸囨爣澶辫触: {e}")
+        
+        return future_dates, predictions.flatten(), model, metrics
+    except Exception as e:
+        print("棰勬祴杩囩▼寮傚父:", e)
+        return None, None, None, None
+
+# -------------------------------
+# GUI鐣岄潰閮ㄥ垎
+# -------------------------------
+def run_gui():
+    def configure_gui_fonts():
+        font_names = ['寰蒋闆呴粦', 'Microsoft YaHei', 'SimSun', 'SimHei']
+        for font_name in font_names:
+            try:
+                default_font = tkfont.nametofont("TkDefaultFont")
+                default_font.configure(family=font_name)
+                text_font = tkfont.nametofont("TkTextFont")
+                text_font.configure(family=font_name)
+                fixed_font = tkfont.nametofont("TkFixedFont")
+                fixed_font.configure(family=font_name)
+                return True
+            except Exception as e:
+                continue
+        return False
+
+    def on_predict():
+        try:
+            predict_start = time()
+            status_label.config(text="棰勬祴涓�...")
+            root.update()
+            start_time_dt = pd.to_datetime(entry.get())
+            force_retrain = retrain_var.get()
+            future_dates, predictions, model, metrics = train_and_predict(df, start_time_dt, force_retrain)
+            if future_dates is None or predictions is None:
+                status_label.config(text="棰勬祴澶辫触")
+                return
+
+            # 鑾峰彇骞舵樉绀烘ā鍨嬪噯纭害鎸囨爣
+            if metrics:
+                metrics_text = f"妯″瀷鍑嗙‘搴� - RMSE: {metrics['rmse']:.4f}, MAE: {metrics['mae']:.4f}"
+                metrics_label.config(text=metrics_text)
+
+            ax.clear()
+            # 缁樺埗鍘嗗彶鏁版嵁锛堟渶杩� 120 澶╋級
+            history_end = min(start_time_dt, df['DateTime'].max())
+            history_start = history_end - timedelta(days=120)
+            hist_data = df[(df['DateTime'] >= history_start) & (df['DateTime'] <= history_end)]
+            ax.plot(hist_data['DateTime'], hist_data['downstream'], label='涓�鍙栨按(涓嬫父)鐩愬害', color='blue', linewidth=1.5)
+            ax.plot(hist_data['DateTime'], hist_data['upstream_smooth'], label='闈掗緳娓�(涓婃父)鐩愬害', color='purple', linewidth=1.5, alpha=0.7)
+            if 'qinglong_lake_smooth' in hist_data.columns:
+                ax.plot(hist_data['DateTime'], hist_data['qinglong_lake_smooth'], label='闈掗緳婀栫洂搴�', color='green', linewidth=1.5, alpha=0.7)
+            ax.plot(future_dates, predictions, marker='o', linestyle='--', label='棰勬祴鐩愬害', color='red', linewidth=2)
+            actual_data = df[(df['DateTime'] >= start_time_dt) & (df['DateTime'] <= future_dates[-1])]
+            if not actual_data.empty:
+                ax.plot(actual_data['DateTime'], actual_data['downstream'], marker='s', linestyle='-', label='瀹為檯鐩愬害', color='orange', linewidth=2)
+            std_dev = hist_data['downstream'].std() * 0.5
+            ax.fill_between(future_dates, predictions - std_dev, predictions + std_dev, color='red', alpha=0.2)
+            ax.set_xlabel('鏃ユ湡')
+            ax.set_ylabel('鐩愬害')
+            ax.set_title(f"浠� {start_time_dt.strftime('%Y-%m-%d %H:%M:%S')} 寮�濮嬬殑鐩愬害棰勬祴")
+            ax.legend(loc='upper left')
+            fig.tight_layout()
+            canvas.draw()
+            predict_time = time() - predict_start
+            status_label.config(text=f"棰勬祴瀹屾垚 (鑰楁椂: {predict_time:.2f}绉�)")
+            result_text = "棰勬祴缁撴灉:\n"
+            for i, (date, pred) in enumerate(zip(future_dates, predictions)):
+                result_text += f"绗� {i+1} 澶� ({date.strftime('%Y-%m-%d')}): {pred:.2f}\n"
+            result_label.config(text=result_text)
+        except Exception as e:
+            status_label.config(text=f"閿欒: {str(e)}")
+
+    def on_scroll(event):
+        xlim = ax.get_xlim()
+        ylim = ax.get_ylim()
+        zoom_factor = 1.1
+        x_data = event.xdata if event.xdata is not None else (xlim[0]+xlim[1])/2
+        y_data = event.ydata if event.ydata is not None else (ylim[0]+ylim[1])/2
+        x_rel = (x_data - xlim[0]) / (xlim[1] - xlim[0])
+        y_rel = (y_data - ylim[0]) / (ylim[1] - ylim[0])
+        if event.step > 0:
+            new_width = (xlim[1]-xlim[0]) / zoom_factor
+            new_height = (ylim[1]-ylim[0]) / zoom_factor
+            x0 = x_data - x_rel * new_width
+            y0 = y_data - y_rel * new_height
+            ax.set_xlim([x0, x0+new_width])
+            ax.set_ylim([y0, y0+new_height])
+        else:
+            new_width = (xlim[1]-xlim[0]) * zoom_factor
+            new_height = (ylim[1]-ylim[0]) * zoom_factor
+            x0 = x_data - x_rel * new_width
+            y0 = y_data - y_rel * new_height
+            ax.set_xlim([x0, x0+new_width])
+            ax.set_ylim([y0, y0+new_height])
+        canvas.draw_idle()
+
+    def update_cursor(event):
+        if event.inaxes == ax:
+            canvas.get_tk_widget().config(cursor="fleur")
+        else:
+            canvas.get_tk_widget().config(cursor="")
+
+    def reset_view():
+        display_history()
+        status_label.config(text="鍥捐〃瑙嗗浘宸查噸缃�")
+
+    root = tk.Tk()
+    root.title("闈掗緳娓�-闄堣鐩愬害棰勬祴绯荤粺")
+    try:
+        configure_gui_fonts()
+    except Exception as e:
+        print("瀛椾綋閰嶇疆寮傚父:", e)
+    input_frame = ttk.Frame(root, padding="10")
+    input_frame.pack(fill=tk.X)
+    control_frame = ttk.Frame(root, padding="5")
+    control_frame.pack(fill=tk.X)
+    result_frame = ttk.Frame(root, padding="10")
+    result_frame.pack(fill=tk.BOTH, expand=True)
+    ttk.Label(input_frame, text="杈撳叆寮�濮嬫椂闂� (YYYY-MM-DD HH:MM:SS)").pack(side=tk.LEFT)
+    entry = ttk.Entry(input_frame, width=25)
+    entry.pack(side=tk.LEFT, padx=5)
+    predict_button = ttk.Button(input_frame, text="棰勬祴", command=on_predict)
+    predict_button.pack(side=tk.LEFT)
+    status_label = ttk.Label(input_frame, text="鎻愮ず: 绗竴娆¤繍琛岃鍕鹃��'寮哄埗閲嶆柊璁粌妯″瀷'")
+    status_label.pack(side=tk.LEFT, padx=10)
+    retrain_var = tk.BooleanVar(value=False)
+    ttk.Checkbutton(control_frame, text="寮哄埗閲嶆柊璁粌妯″瀷", variable=retrain_var).pack(side=tk.LEFT)
+    legend_label = ttk.Label(control_frame, text="鍥句緥: 绱壊=闈掗緳娓笂娓告暟鎹�, 钃濊壊=涓�鍙栨按涓嬫父鏁版嵁, 绾㈣壊=棰勬祴鍊�, 缁胯壊=瀹為檯鍊�")
+    legend_label.pack(side=tk.LEFT, padx=10)
+    reset_button = ttk.Button(control_frame, text="閲嶇疆瑙嗗浘", command=reset_view)
+    reset_button.pack(side=tk.LEFT, padx=5)
+    
+    # 娣诲姞鏄剧ず妯″瀷鍑嗙‘搴︾殑鏍囩
+    metrics_frame = ttk.Frame(root, padding="5")
+    metrics_frame.pack(fill=tk.X)
+    model_metrics = get_model_metrics()
+    metrics_text = "妯″瀷鍑嗙‘搴�: 鏈煡" if not model_metrics else f"妯″瀷鍑嗙‘搴� - RMSE: {model_metrics['rmse']:.4f}, MAE: {model_metrics['mae']:.4f}"
+    metrics_label = ttk.Label(metrics_frame, text=metrics_text)
+    metrics_label.pack(side=tk.LEFT, padx=10)
+    
+    result_label = ttk.Label(result_frame, text="", justify=tk.LEFT)
+    result_label.pack(side=tk.RIGHT, fill=tk.Y)
+    fig, ax = plt.subplots(figsize=(10,5), dpi=100)
+    canvas = FigureCanvasTkAgg(fig, master=result_frame)
+    canvas.get_tk_widget().pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
+    toolbar_frame = ttk.Frame(result_frame)
+    toolbar_frame.pack(side=tk.BOTTOM, fill=tk.X)
+    toolbar = NavigationToolbar2Tk(canvas, toolbar_frame)
+    toolbar.update()
+    canvas.mpl_connect('scroll_event', on_scroll)
+    canvas.mpl_connect('motion_notify_event', update_cursor)
+    
+    def display_history():
+        ax.clear()
+        end_date = df['DateTime'].max()
+        start_date = max(df['DateTime'].min(), end_date - timedelta(days=60))
+        hist_data = df[(df['DateTime']>=start_date)&(df['DateTime']<=end_date)]
+        ax.plot(hist_data['DateTime'], hist_data['downstream'], label='涓�鍙栨按(涓嬫父)鐩愬害', color='blue', linewidth=1.5)
+        ax.plot(hist_data['DateTime'], hist_data['upstream_smooth'], label='闈掗緳娓�(涓婃父)鐩愬害', color='purple', linewidth=1.5, alpha=0.7)
+        ax.set_xlabel('鏃ユ湡')
+        ax.set_ylabel('鐩愬害')
+        ax.set_title('鍘嗗彶鐩愬害鏁版嵁瀵规瘮')
+        ax.legend()
+        fig.tight_layout()
+        canvas.draw()
+    
+    display_history()
+    root.mainloop()
+
+# -------------------------------
+# 涓荤▼搴忓叆鍙o細鍔犺浇鏁版嵁銆佹坊鍔犵壒寰併�佺敓鎴愬欢杩熺壒寰佸悗鍚姩GUI
+# -------------------------------
+def save_processed_data(df, filename='processed_data.pkl'):
+    try:
+        df.to_pickle(filename)
+        print(f"宸蹭繚瀛樺鐞嗗悗鐨勬暟鎹埌 {filename}")
+        return True
+    except Exception as e:
+        print(f"淇濆瓨鏁版嵁澶辫触: {e}")
+        return False
+
+def load_processed_data(filename='processed_data.pkl'):
+    try:
+        if os.path.exists(filename):
+            df = pd.read_pickle(filename)
+            print(f"宸蹭粠 {filename} 鍔犺浇澶勭悊鍚庣殑鏁版嵁")
+            return df
+        else:
+            print(f"鎵句笉鍒板鐞嗗悗鐨勬暟鎹枃浠� {filename}")
+            return None
+    except Exception as e:
+        print(f"鍔犺浇鏁版嵁澶辫触: {e}")
+        return None
+
+# 灏濊瘯鍔犺浇澶勭悊鍚庣殑鏁版嵁锛屽鏋滀笉瀛樺湪鍒欓噸鏂板鐞�
+processed_data = load_processed_data()
+if processed_data is not None:
+    df = processed_data
+else:
+    df = load_data('闈掗緳娓�1.csv', '涓�鍙栨按.csv')
+    if df is not None:
+        df = add_lunar_features(df)
+        delay_hours = [1,2,3,4,6,12,24,36,48,60,72,84,96,108,120]
+        df = batch_create_delay_features(df, delay_hours)
+        
+        # 娣诲姞缁熻鐗瑰緛
+        df['mean_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).mean()
+        df['mean_3d_up'] = df['upstream'].rolling(window=72, min_periods=1).mean()
+        df['std_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).std()
+        df['max_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).max()
+        df['min_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).min()
+        
+        df['mean_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).mean()
+        df['mean_3d_down'] = df['downstream'].rolling(window=72, min_periods=1).mean()
+        df['std_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).std()
+        df['max_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).max()
+        df['min_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).min()
+        
+        # 淇濆瓨澶勭悊鍚庣殑鏁版嵁
+        save_processed_data(df)
+    
+if df is not None:
+    run_gui()
+else:
+    print("鏁版嵁鍔犺浇澶辫触锛屾棤娉曡繍琛岄娴嬨��")

--
Gitblit v1.9.3