From bb3caa0cd41bd56f496125934491759b4d865733 Mon Sep 17 00:00:00 2001 From: rp <rp@outlook.com> Date: 星期三, 09 四月 2025 21:58:26 +0800 Subject: [PATCH] 修复备份 --- xg_优化备份.py | 611 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 611 insertions(+), 0 deletions(-) diff --git "a/xg_\344\274\230\345\214\226\345\244\207\344\273\275.py" "b/xg_\344\274\230\345\214\226\345\244\207\344\273\275.py" new file mode 100644 index 0000000..7e8061c --- /dev/null +++ "b/xg_\344\274\230\345\214\226\345\244\207\344\273\275.py" @@ -0,0 +1,611 @@ +import os +import pickle +import pandas as pd +import numpy as np +import tkinter as tk +import tkinter.font as tkfont +from tkinter import ttk +from datetime import timedelta +from time import time +import matplotlib.pyplot as plt +from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk +from xgboost import XGBRegressor +from lunardate import LunarDate +from sklearn.model_selection import train_test_split +from sklearn.metrics import mean_squared_error, mean_absolute_error +import matplotlib + +# 閰嶇疆 matplotlib 涓枃鏄剧ず +matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'SimSun', 'Arial Unicode MS'] +matplotlib.rcParams['axes.unicode_minus'] = False +matplotlib.rcParams['font.family'] = 'sans-serif' + +# 鍏ㄥ眬缂撳瓨鍙橀噺鍙婄壒寰佸悕绉帮紙姝ゅ feature_columns 浠呬负鍗犱綅锛� +cached_model = None +last_training_time = None +feature_columns = None + +# ------------------------------- +# 鏁版嵁鍔犺浇涓庨澶勭悊鍑芥暟 +# ------------------------------- +def load_data(upstream_file, downstream_file, qinglong_lake_file=None): + try: + upstream_df = pd.read_csv(upstream_file) + downstream_df = pd.read_csv(downstream_file) + if qinglong_lake_file: + qinglong_lake_df = pd.read_csv(qinglong_lake_file) + except FileNotFoundError: + print("鏂囦欢鏈壘鍒帮紝璇锋鏌ヨ矾寰�") + return None + + # 鍋囪鍘熷鏁版嵁鍒椾緷娆′负 ['DateTime', 'TagName', 'Value'] + upstream_df.columns = ['DateTime', 'TagName', 'Value'] + downstream_df.columns = ['DateTime', 'TagName', 'Value'] + if qinglong_lake_file: + qinglong_lake_df.columns = ['DateTime', 'TagName', 'Value'] + + # 杞崲鏃堕棿鏍煎紡鍙婃暟鍊煎鐞� + upstream_df['DateTime'] = pd.to_datetime(upstream_df['DateTime']) + downstream_df['DateTime'] = pd.to_datetime(downstream_df['DateTime']) + if qinglong_lake_file: + qinglong_lake_df['DateTime'] = pd.to_datetime(qinglong_lake_df['DateTime']) + upstream_df['Value'] = pd.to_numeric(upstream_df['Value'], errors='coerce') + downstream_df['Value'] = pd.to_numeric(downstream_df['Value'], errors='coerce') + if qinglong_lake_file: + qinglong_lake_df['Value'] = pd.to_numeric(qinglong_lake_df['Value'], errors='coerce') + + # 杩囨护鐩愬害灏忎簬5鐨勬暟鎹� + upstream_df = upstream_df[upstream_df['Value'] >= 5] + downstream_df = downstream_df[downstream_df['Value'] >= 5] + if qinglong_lake_file: + qinglong_lake_df = qinglong_lake_df[qinglong_lake_df['Value'] >= 5] + + # 灏�0鏇挎崲涓篘aN锛屽苟鍒╃敤3鍊嶆爣鍑嗗樊娉曞鐞嗗紓甯稿�� + for df in [upstream_df, downstream_df]: + df.loc[df['Value'] == 0, 'Value'] = np.nan + mean_val, std_val = df['Value'].mean(), df['Value'].std() + lower_bound, upper_bound = mean_val - 3 * std_val, mean_val + 3 * std_val + df.loc[(df['Value'] < lower_bound) | (df['Value'] > upper_bound), 'Value'] = np.nan + if qinglong_lake_file: + qinglong_lake_df.loc[qinglong_lake_df['Value'] == 0, 'Value'] = np.nan + mean_val, std_val = qinglong_lake_df['Value'].mean(), qinglong_lake_df['Value'].std() + lower_bound, upper_bound = mean_val - 3 * std_val, mean_val + 3 * std_val + qinglong_lake_df.loc[(qinglong_lake_df['Value'] < lower_bound) | (qinglong_lake_df['Value'] > upper_bound), 'Value'] = np.nan + + # 閲嶅懡鍚� Value 鍒楀苟淇濈暀闇�瑕佺殑鍒� + upstream_df = upstream_df.rename(columns={'Value': 'upstream'})[['DateTime', 'upstream']] + downstream_df = downstream_df.rename(columns={'Value': 'downstream'})[['DateTime', 'downstream']] + if qinglong_lake_file: + qinglong_lake_df = qinglong_lake_df.rename(columns={'Value': 'qinglong_lake'})[['DateTime', 'qinglong_lake']] + + # 鍚堝苟鏁版嵁 + merged_df = pd.merge(upstream_df, downstream_df, on='DateTime', how='inner') + if qinglong_lake_file: + merged_df = pd.merge(merged_df, qinglong_lake_df, on='DateTime', how='left') + + print(f"鍚堝苟鍓嶆暟鎹鏁�: {len(merged_df)}") + merged_df = merged_df.set_index('DateTime') + + # 鎻掑�硷細鍏堢敤绾挎�э紝鍐嶇敤鏃堕棿鎻掑�硷紝鏈�鍚庣敤鍓嶅悜鍚庡悜濉厖 + merged_df['upstream'] = merged_df['upstream'].interpolate(method='linear', limit=4) + merged_df['downstream'] = merged_df['downstream'].interpolate(method='linear', limit=4) + if qinglong_lake_file: + merged_df['qinglong_lake'] = merged_df['qinglong_lake'].interpolate(method='linear', limit=4) + merged_df['upstream'] = merged_df['upstream'].interpolate(method='time', limit=24) + merged_df['downstream'] = merged_df['downstream'].interpolate(method='time', limit=24) + if qinglong_lake_file: + merged_df['qinglong_lake'] = merged_df['qinglong_lake'].interpolate(method='time', limit=24) + merged_df['upstream'] = merged_df['upstream'].fillna(method='ffill').fillna(method='bfill') + merged_df['downstream'] = merged_df['downstream'].fillna(method='ffill').fillna(method='bfill') + if qinglong_lake_file: + merged_df['qinglong_lake'] = merged_df['qinglong_lake'].fillna(method='ffill').fillna(method='bfill') + + # 骞虫粦澶勭悊锛氫娇鐢ㄦ粦鍔ㄧ獥鍙gЩ鍔ㄥ钩鍧� + merged_df['upstream_smooth'] = merged_df['upstream'].rolling(window=24, min_periods=1, center=True).mean() + merged_df['downstream_smooth'] = merged_df['downstream'].rolling(window=24, min_periods=1, center=True).mean() + if qinglong_lake_file: + merged_df['qinglong_lake_smooth'] = merged_df['qinglong_lake'].rolling(window=24, min_periods=1, center=True).mean() + # 瀵逛綆鐩愬害閮ㄥ垎鐢ㄦ洿澶х獥鍙e钩婊� + low_sal_mask = merged_df['upstream'] < 50 + if low_sal_mask.any(): + merged_df.loc[low_sal_mask, 'upstream_smooth'] = merged_df.loc[low_sal_mask, 'upstream']\ + .rolling(window=48, min_periods=1, center=True).mean() + + merged_df = merged_df.dropna() + merged_df = merged_df[merged_df['upstream'].apply(np.isfinite)] + merged_df = merged_df[merged_df['downstream'].apply(np.isfinite)] + if qinglong_lake_file: + merged_df = merged_df[merged_df['qinglong_lake'].apply(np.isfinite)] + merged_df = merged_df.reset_index() + print(f"娓呮礂鍚庢暟鎹鏁�: {len(merged_df)}") + print(f"涓婃父鐩愬害鑼冨洿: {merged_df['upstream'].min()} - {merged_df['upstream'].max()}") + print(f"涓嬫父鐩愬害鑼冨洿: {merged_df['downstream'].min()} - {merged_df['downstream'].max()}") + if qinglong_lake_file: + print(f"闈掗緳婀栫洂搴﹁寖鍥�: {merged_df['qinglong_lake'].min()} - {merged_df['qinglong_lake'].max()}") + merged_df = merged_df.sort_values('DateTime') + return merged_df + +# ------------------------------- +# 娣诲姞鍐滃巻锛堟疆姹愶級鐗瑰緛 +# ------------------------------- +def add_lunar_features(df): + lunar_day, lunar_phase_sin, lunar_phase_cos, is_high_tide = [], [], [], [] + for dt in df['DateTime']: + ld = LunarDate.fromSolarDate(dt.year, dt.month, dt.day) + lunar_day.append(ld.day) + lunar_phase_sin.append(np.sin(2 * np.pi * ld.day / 15)) + lunar_phase_cos.append(np.cos(2 * np.pi * ld.day / 15)) + is_high_tide.append(1 if (ld.day <= 5 or (ld.day >= 16 and ld.day <= 20)) else 0) + df['lunar_day'] = lunar_day + df['lunar_phase_sin'] = lunar_phase_sin + df['lunar_phase_cos'] = lunar_phase_cos + df['is_high_tide'] = is_high_tide + return df + +# ------------------------------- +# 鎵归噺鐢熸垚寤惰繜鐗瑰緛锛堝悜閲忓寲锛屽埄鐢� shift锛� +# ------------------------------- +def batch_create_delay_features(df, delay_hours): + for delay in delay_hours: + df[f'upstream_delay_{delay}h'] = df['upstream'].shift(delay) + df[f'downstream_delay_{delay}h'] = df['downstream'].shift(delay) + return df + +# ------------------------------- +# 鍚戦噺鍖栨瀯閫犺缁冩牱鏈紙浼樺寲鐗瑰緛宸ョ▼锛� +# ------------------------------- +def create_features_vectorized(df, look_back=96, forecast_horizon=5): + """ + 鍒╃敤 numpy 鐨� sliding_window_view 瀵瑰巻鍙茬獥鍙c�佷笅娓哥獥鍙c�佹爣绛捐繘琛屾壒閲忓垏鐗囷紝 + 鍏朵粬鐗瑰緛锛堟椂闂淬�佸啘鍘嗐�佺粺璁°�佸欢杩熺壒寰侊級鐩存帴鎵归噺璇诲彇鍚庢嫾鎺� + """ + # 杩欓噷瀹氫箟 total_samples 涓猴細 + total_samples = len(df) - look_back - forecast_horizon + 1 + if total_samples <= 0: + print("鏁版嵁涓嶈冻浠ュ垱寤虹壒寰�") + return np.array([]), np.array([]) + + # 鍒╃敤 sliding_window_view 鏋勯�犲巻鍙茬獥鍙o紙涓婃父杩炵画 look_back 涓暟鎹級 + upstream_array = df['upstream'].values # shape (n,) + # 婊戝姩绐楀彛锛岀粨鏋� shape (n - look_back + 1, look_back) + from numpy.lib.stride_tricks import sliding_window_view + window_up = sliding_window_view(upstream_array, window_shape=look_back)[:total_samples, :] + + # 涓嬫父鏈�杩� 24 灏忔椂锛氬埄鐢ㄦ粦鍔ㄧ獥鍙f瀯閫狅紝绐楀彛澶у皬涓� 24 + downstream_array = df['downstream'].values + window_down_full = sliding_window_view(downstream_array, window_shape=24) + # 瀵逛簬鏍囩鍜屼笅娓哥獥鍙o紝鍘熼�昏緫锛氬彇 df['downstream'].iloc[i+look_back-24:i+look_back] + # 鍒欏搴旂储寮曚负 i+look_back-24锛� i 浠� 0 鍒� total_samples-1 + window_down = window_down_full[look_back-24 : look_back-24 + total_samples, :] + + # 鏃堕棿鐗瑰緛涓庡啘鍘嗙壒寰佺瓑锛氬彇鏍峰尯闂翠负 df.iloc[look_back: len(df)-forecast_horizon+1] + sample_df = df.iloc[look_back: len(df)-forecast_horizon+1].copy() + basic_time = sample_df['DateTime'].dt.hour.values.reshape(-1, 1) / 24.0 + weekday = sample_df['DateTime'].dt.dayofweek.values.reshape(-1, 1) / 7.0 + month = sample_df['DateTime'].dt.month.values.reshape(-1, 1) / 12.0 + basic_time_feats = np.hstack([basic_time, weekday, month]) + + lunar_feats = sample_df[['lunar_phase_sin','lunar_phase_cos','is_high_tide']].values + # 缁熻鐗瑰緛锛堥鍏堝埄鐢� rolling 宸茶绠楀ソ锛屾敞鎰忓彇鍑哄搴旇锛� + try: + stats_up = sample_df[['mean_1d_up','mean_3d_up','std_1d_up','max_1d_up','min_1d_up']].values + stats_down = sample_df[['mean_1d_down','mean_3d_down','std_1d_down','max_1d_down','min_1d_down']].values + except KeyError as e: + print(f"缁熻鐗瑰緛鍒椾笉瀛樺湪: {e}锛岃纭繚鍏堣绠楃粺璁$壒寰�") + return np.array([]), np.array([]) + + # 寤惰繜鐗瑰緛锛氬亣璁炬墍鏈夊欢杩熺壒寰佸垪鍚嶅潎浠� "upstream_delay_" 鎴� "downstream_delay_" 寮�澶� + delay_cols = [col for col in sample_df.columns if col.startswith('upstream_delay_') or col.startswith('downstream_delay_')] + delay_feats = sample_df[delay_cols].values + + # 鎷兼帴鎵�鏈夌壒寰侊細鍏堝皢鍘嗗彶绐楀彛锛坵indow_up锛変笌涓嬫父绐楀彛锛坵indow_down锛夋嫾鎺ワ紝鍐嶆嫾鎺ュ叾浠栫壒寰� + X = np.hstack([window_up, window_down, basic_time_feats, lunar_feats, stats_up, stats_down, delay_feats]) + + # 鏋勯�犳爣绛撅細鍒╃敤婊戝姩绐楀彛鏋勯�� forecast_horizon 鍐呯殑涓嬫父鏁版嵁 + label_full = sliding_window_view(downstream_array, window_shape=forecast_horizon) + # 鏍囩鍖洪棿瀵瑰簲浠� index = look_back 鍒� look_back + total_samples + y = label_full[look_back: look_back + total_samples, :] + global feature_columns + feature_columns = ["combined_vector_features"] + print(f"鍚戦噺鍖栫壒寰佸伐绋嬪畬鎴愶紝鏈夋晥鏍锋湰鏁�: {X.shape[0]}") + return X, y + +# ------------------------------- +# 鑾峰彇妯″瀷鍑嗙‘搴︽寚鏍� +# ------------------------------- +def get_model_metrics(): + """鑾峰彇淇濆瓨鍦ㄦā鍨嬬紦瀛樹腑鐨勫噯纭害鎸囨爣""" + model_cache_file = 'salinity_model.pkl' + if os.path.exists(model_cache_file): + try: + with open(model_cache_file, 'rb') as f: + model_data = pickle.load(f) + return { + 'rmse': model_data.get('rmse', None), + 'mae': model_data.get('mae', None) + } + except Exception as e: + print(f"鑾峰彇妯″瀷鎸囨爣澶辫触: {e}") + return None + +# ------------------------------- +# 妯″瀷璁粌涓庨娴嬶紝灞曠ず楠岃瘉鍑嗙‘搴︼紙RMSE, MAE锛� +# ------------------------------- +def train_and_predict(df, start_time, force_retrain=False): + global cached_model, last_training_time + model_cache_file = 'salinity_model.pkl' + model_needs_training = True + + if os.path.exists(model_cache_file) and force_retrain: + try: + os.remove(model_cache_file) + print("宸插垹闄ゆ棫妯″瀷缂撳瓨锛堝己鍒堕噸鏂拌缁冿級") + except Exception as e: + print("鍒犻櫎缂撳瓨寮傚父:", e) + + train_df = df[df['DateTime'] < start_time].copy() + if not force_retrain and cached_model is not None and last_training_time is not None: + if last_training_time >= train_df['DateTime'].max(): + model_needs_training = False + print(f"浣跨敤缂撳瓨妯″瀷锛岃缁冩椂闂�: {last_training_time}") + elif not force_retrain and os.path.exists(model_cache_file): + try: + with open(model_cache_file, 'rb') as f: + model_data = pickle.load(f) + cached_model = model_data['model'] + last_training_time = model_data['training_time'] + if last_training_time >= train_df['DateTime'].max(): + model_needs_training = False + print(f"浠庢枃浠跺姞杞芥ā鍨嬶紝璁粌鏃堕棿: {last_training_time}") + except Exception as e: + print("鍔犺浇妯″瀷澶辫触:", e) + + if model_needs_training: + print("寮�濮嬭缁冩柊妯″瀷...") + if len(train_df) < 100: + print("璁粌鏁版嵁涓嶈冻") + return None, None, None, None + + start_train = time() + X, y = create_features_vectorized(train_df, look_back=96, forecast_horizon=5) + if len(X) == 0 or len(y) == 0: + print("鏍锋湰鐢熸垚涓嶈冻锛岃缁冪粓姝�") + return None, None, None, None + print(f"璁粌鏍锋湰鏁伴噺: {X.shape[0]}") + X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42) + model = XGBRegressor( + n_estimators=300, + learning_rate=0.03, + max_depth=5, + min_child_weight=2, + subsample=0.85, + colsample_bytree=0.85, + gamma=0.1, + reg_alpha=0.2, + reg_lambda=1.5, + n_jobs=-1, + random_state=42 + ) + try: + model.fit(X_train, y_train, + eval_set=[(X_val, y_val)], eval_metric='rmse', + early_stopping_rounds=20, verbose=False) + # 鍦ㄩ獙璇侀泦涓婅绠� RMSE 鍜� MAE + y_val_pred = model.predict(X_val) + rmse = np.sqrt(mean_squared_error(y_val, y_val_pred)) + mae = mean_absolute_error(y_val, y_val_pred) + print(f"楠岃瘉闆� RMSE: {rmse:.4f}, MAE: {mae:.4f}") + last_training_time = start_time + cached_model = model + with open(model_cache_file, 'wb') as f: + pickle.dump({ + 'model': model, + 'training_time': last_training_time, + 'feature_columns': feature_columns, + 'rmse': rmse, + 'mae': mae + }, f) + print(f"妯″瀷璁粌瀹屾垚锛岃�楁椂: {time() - start_train:.2f}绉�") + except Exception as e: + print("妯″瀷璁粌寮傚父:", e) + return None, None, None, None + else: + model = cached_model + + # 棰勬祴閮ㄥ垎锛氭瀯閫犲崟涓娴嬫牱鏈紙涓庤缁冩椂鐗瑰緛鏋勯�犱竴鑷达級 + try: + # 杩欓噷閲囩敤涓� create_features_vectorized 绫讳技鐨勬�濊矾鏋勯�犻娴嬫牱鏈� + # 鍙栨渶杩戞暟鎹冻澶熸瀯鎴愬巻鍙茬獥鍙e拰鍏朵粬鐗瑰緛 + n = len(df) + if n < 96 + 5: + print("棰勬祴鏁版嵁涓嶈冻") + return None, None, None, None + + # 浣跨敤 sliding_window_view 鏋勯�犳渶鏂扮殑涓婃父鍜屼笅娓哥獥鍙� + upstream_array = df['upstream'].values + window_up = np.lib.stride_tricks.sliding_window_view(upstream_array, window_shape=96)[-1, :] + downstream_array = df['downstream'].values + window_down = np.lib.stride_tricks.sliding_window_view(downstream_array, window_shape=24)[-1, :] + + # 鏃堕棿鐗瑰緛鍜屽啘鍘嗙壒寰佸熀浜庡綋鍓嶉娴嬪紑濮嬫椂鍒� + hour_norm = start_time.hour / 24.0 + weekday_norm = start_time.dayofweek / 7.0 + month_norm = start_time.month / 12.0 + basic_time_feats = np.array([hour_norm, weekday_norm, month_norm]).reshape(1, -1) + ld = LunarDate.fromSolarDate(start_time.year, start_time.month, start_time.day) + lunar_feats = np.array([np.sin(2*np.pi*ld.day/15), + np.cos(2*np.pi*ld.day/15), + 1 if (ld.day <=5 or (ld.day >=16 and ld.day<=20)) else 0]).reshape(1, -1) + + # 缁熻鐗瑰緛锛氱敤鏈�鏂� 24/72 灏忔椂鏁版嵁锛堝彇鏈熬24/72锛� + try: + # 浼樺厛浣跨敤DataFrame涓凡璁$畻鐨勭粺璁$壒寰� + stats_up = df[['mean_1d_up','mean_3d_up','std_1d_up','max_1d_up','min_1d_up']].iloc[-1:].values + stats_down = df[['mean_1d_down','mean_3d_down','std_1d_down','max_1d_down','min_1d_down']].iloc[-1:].values + except KeyError: + # 濡傛灉涓嶅瓨鍦紝鍒欑洿鎺ヨ绠� + recent_up = df['upstream'].values[-24:] + stats_up = np.array([np.mean(recent_up), + np.mean(df['upstream'].values[-72:]), + np.std(recent_up), + np.max(recent_up), + np.min(recent_up)]).reshape(1, -1) + recent_down = df['downstream'].values[-24:] + stats_down = np.array([np.mean(recent_down), + np.mean(df['downstream'].values[-72:]), + np.std(recent_down), + np.max(recent_down), + np.min(recent_down)]).reshape(1, -1) + + # 寤惰繜鐗瑰緛锛氱洿鎺ヤ粠鏈�鍚庝竴琛屽欢杩熺壒寰佸彇鍊� + delay_cols = [col for col in df.columns if col.startswith('upstream_delay_') or col.startswith('downstream_delay_')] + delay_feats = df[delay_cols].iloc[-1:].values # shape (1, ?) + + # 鎷兼帴鎵�鏈夐娴嬬壒寰� + X_pred = np.hstack([window_up.reshape(1, -1), + window_down.reshape(1, -1), + basic_time_feats, lunar_feats, stats_up, stats_down, delay_feats]) + if np.isnan(X_pred).any() or np.isinf(X_pred).any(): + X_pred = np.nan_to_num(X_pred, nan=0.0, posinf=1e6, neginf=-1e6) + predictions = model.predict(X_pred) + # 鐢熸垚鏈潵鏃ユ湡鏍囩锛堥娴嬫湭鏉� 5 澶╋級 + future_dates = [start_time + timedelta(days=i) for i in range(5)] + print("棰勬祴瀹屾垚") + + # 鑾峰彇妯″瀷鎸囨爣 + metrics = None + if os.path.exists(model_cache_file): + try: + with open(model_cache_file, 'rb') as f: + model_data = pickle.load(f) + metrics = { + 'rmse': model_data.get('rmse', None), + 'mae': model_data.get('mae', None) + } + except Exception as e: + print(f"鑾峰彇妯″瀷鎸囨爣澶辫触: {e}") + + return future_dates, predictions.flatten(), model, metrics + except Exception as e: + print("棰勬祴杩囩▼寮傚父:", e) + return None, None, None, None + +# ------------------------------- +# GUI鐣岄潰閮ㄥ垎 +# ------------------------------- +def run_gui(): + def configure_gui_fonts(): + font_names = ['寰蒋闆呴粦', 'Microsoft YaHei', 'SimSun', 'SimHei'] + for font_name in font_names: + try: + default_font = tkfont.nametofont("TkDefaultFont") + default_font.configure(family=font_name) + text_font = tkfont.nametofont("TkTextFont") + text_font.configure(family=font_name) + fixed_font = tkfont.nametofont("TkFixedFont") + fixed_font.configure(family=font_name) + return True + except Exception as e: + continue + return False + + def on_predict(): + try: + predict_start = time() + status_label.config(text="棰勬祴涓�...") + root.update() + start_time_dt = pd.to_datetime(entry.get()) + force_retrain = retrain_var.get() + future_dates, predictions, model, metrics = train_and_predict(df, start_time_dt, force_retrain) + if future_dates is None or predictions is None: + status_label.config(text="棰勬祴澶辫触") + return + + # 鑾峰彇骞舵樉绀烘ā鍨嬪噯纭害鎸囨爣 + if metrics: + metrics_text = f"妯″瀷鍑嗙‘搴� - RMSE: {metrics['rmse']:.4f}, MAE: {metrics['mae']:.4f}" + metrics_label.config(text=metrics_text) + + ax.clear() + # 缁樺埗鍘嗗彶鏁版嵁锛堟渶杩� 120 澶╋級 + history_end = min(start_time_dt, df['DateTime'].max()) + history_start = history_end - timedelta(days=120) + hist_data = df[(df['DateTime'] >= history_start) & (df['DateTime'] <= history_end)] + ax.plot(hist_data['DateTime'], hist_data['downstream'], label='涓�鍙栨按(涓嬫父)鐩愬害', color='blue', linewidth=1.5) + ax.plot(hist_data['DateTime'], hist_data['upstream_smooth'], label='闈掗緳娓�(涓婃父)鐩愬害', color='purple', linewidth=1.5, alpha=0.7) + if 'qinglong_lake_smooth' in hist_data.columns: + ax.plot(hist_data['DateTime'], hist_data['qinglong_lake_smooth'], label='闈掗緳婀栫洂搴�', color='green', linewidth=1.5, alpha=0.7) + ax.plot(future_dates, predictions, marker='o', linestyle='--', label='棰勬祴鐩愬害', color='red', linewidth=2) + actual_data = df[(df['DateTime'] >= start_time_dt) & (df['DateTime'] <= future_dates[-1])] + if not actual_data.empty: + ax.plot(actual_data['DateTime'], actual_data['downstream'], marker='s', linestyle='-', label='瀹為檯鐩愬害', color='orange', linewidth=2) + std_dev = hist_data['downstream'].std() * 0.5 + ax.fill_between(future_dates, predictions - std_dev, predictions + std_dev, color='red', alpha=0.2) + ax.set_xlabel('鏃ユ湡') + ax.set_ylabel('鐩愬害') + ax.set_title(f"浠� {start_time_dt.strftime('%Y-%m-%d %H:%M:%S')} 寮�濮嬬殑鐩愬害棰勬祴") + ax.legend(loc='upper left') + fig.tight_layout() + canvas.draw() + predict_time = time() - predict_start + status_label.config(text=f"棰勬祴瀹屾垚 (鑰楁椂: {predict_time:.2f}绉�)") + result_text = "棰勬祴缁撴灉:\n" + for i, (date, pred) in enumerate(zip(future_dates, predictions)): + result_text += f"绗� {i+1} 澶� ({date.strftime('%Y-%m-%d')}): {pred:.2f}\n" + result_label.config(text=result_text) + except Exception as e: + status_label.config(text=f"閿欒: {str(e)}") + + def on_scroll(event): + xlim = ax.get_xlim() + ylim = ax.get_ylim() + zoom_factor = 1.1 + x_data = event.xdata if event.xdata is not None else (xlim[0]+xlim[1])/2 + y_data = event.ydata if event.ydata is not None else (ylim[0]+ylim[1])/2 + x_rel = (x_data - xlim[0]) / (xlim[1] - xlim[0]) + y_rel = (y_data - ylim[0]) / (ylim[1] - ylim[0]) + if event.step > 0: + new_width = (xlim[1]-xlim[0]) / zoom_factor + new_height = (ylim[1]-ylim[0]) / zoom_factor + x0 = x_data - x_rel * new_width + y0 = y_data - y_rel * new_height + ax.set_xlim([x0, x0+new_width]) + ax.set_ylim([y0, y0+new_height]) + else: + new_width = (xlim[1]-xlim[0]) * zoom_factor + new_height = (ylim[1]-ylim[0]) * zoom_factor + x0 = x_data - x_rel * new_width + y0 = y_data - y_rel * new_height + ax.set_xlim([x0, x0+new_width]) + ax.set_ylim([y0, y0+new_height]) + canvas.draw_idle() + + def update_cursor(event): + if event.inaxes == ax: + canvas.get_tk_widget().config(cursor="fleur") + else: + canvas.get_tk_widget().config(cursor="") + + def reset_view(): + display_history() + status_label.config(text="鍥捐〃瑙嗗浘宸查噸缃�") + + root = tk.Tk() + root.title("闈掗緳娓�-闄堣鐩愬害棰勬祴绯荤粺") + try: + configure_gui_fonts() + except Exception as e: + print("瀛椾綋閰嶇疆寮傚父:", e) + input_frame = ttk.Frame(root, padding="10") + input_frame.pack(fill=tk.X) + control_frame = ttk.Frame(root, padding="5") + control_frame.pack(fill=tk.X) + result_frame = ttk.Frame(root, padding="10") + result_frame.pack(fill=tk.BOTH, expand=True) + ttk.Label(input_frame, text="杈撳叆寮�濮嬫椂闂� (YYYY-MM-DD HH:MM:SS)").pack(side=tk.LEFT) + entry = ttk.Entry(input_frame, width=25) + entry.pack(side=tk.LEFT, padx=5) + predict_button = ttk.Button(input_frame, text="棰勬祴", command=on_predict) + predict_button.pack(side=tk.LEFT) + status_label = ttk.Label(input_frame, text="鎻愮ず: 绗竴娆¤繍琛岃鍕鹃��'寮哄埗閲嶆柊璁粌妯″瀷'") + status_label.pack(side=tk.LEFT, padx=10) + retrain_var = tk.BooleanVar(value=False) + ttk.Checkbutton(control_frame, text="寮哄埗閲嶆柊璁粌妯″瀷", variable=retrain_var).pack(side=tk.LEFT) + legend_label = ttk.Label(control_frame, text="鍥句緥: 绱壊=闈掗緳娓笂娓告暟鎹�, 钃濊壊=涓�鍙栨按涓嬫父鏁版嵁, 绾㈣壊=棰勬祴鍊�, 缁胯壊=瀹為檯鍊�") + legend_label.pack(side=tk.LEFT, padx=10) + reset_button = ttk.Button(control_frame, text="閲嶇疆瑙嗗浘", command=reset_view) + reset_button.pack(side=tk.LEFT, padx=5) + + # 娣诲姞鏄剧ず妯″瀷鍑嗙‘搴︾殑鏍囩 + metrics_frame = ttk.Frame(root, padding="5") + metrics_frame.pack(fill=tk.X) + model_metrics = get_model_metrics() + metrics_text = "妯″瀷鍑嗙‘搴�: 鏈煡" if not model_metrics else f"妯″瀷鍑嗙‘搴� - RMSE: {model_metrics['rmse']:.4f}, MAE: {model_metrics['mae']:.4f}" + metrics_label = ttk.Label(metrics_frame, text=metrics_text) + metrics_label.pack(side=tk.LEFT, padx=10) + + result_label = ttk.Label(result_frame, text="", justify=tk.LEFT) + result_label.pack(side=tk.RIGHT, fill=tk.Y) + fig, ax = plt.subplots(figsize=(10,5), dpi=100) + canvas = FigureCanvasTkAgg(fig, master=result_frame) + canvas.get_tk_widget().pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + toolbar_frame = ttk.Frame(result_frame) + toolbar_frame.pack(side=tk.BOTTOM, fill=tk.X) + toolbar = NavigationToolbar2Tk(canvas, toolbar_frame) + toolbar.update() + canvas.mpl_connect('scroll_event', on_scroll) + canvas.mpl_connect('motion_notify_event', update_cursor) + + def display_history(): + ax.clear() + end_date = df['DateTime'].max() + start_date = max(df['DateTime'].min(), end_date - timedelta(days=60)) + hist_data = df[(df['DateTime']>=start_date)&(df['DateTime']<=end_date)] + ax.plot(hist_data['DateTime'], hist_data['downstream'], label='涓�鍙栨按(涓嬫父)鐩愬害', color='blue', linewidth=1.5) + ax.plot(hist_data['DateTime'], hist_data['upstream_smooth'], label='闈掗緳娓�(涓婃父)鐩愬害', color='purple', linewidth=1.5, alpha=0.7) + ax.set_xlabel('鏃ユ湡') + ax.set_ylabel('鐩愬害') + ax.set_title('鍘嗗彶鐩愬害鏁版嵁瀵规瘮') + ax.legend() + fig.tight_layout() + canvas.draw() + + display_history() + root.mainloop() + +# ------------------------------- +# 涓荤▼搴忓叆鍙o細鍔犺浇鏁版嵁銆佹坊鍔犵壒寰併�佺敓鎴愬欢杩熺壒寰佸悗鍚姩GUI +# ------------------------------- +def save_processed_data(df, filename='processed_data.pkl'): + try: + df.to_pickle(filename) + print(f"宸蹭繚瀛樺鐞嗗悗鐨勬暟鎹埌 {filename}") + return True + except Exception as e: + print(f"淇濆瓨鏁版嵁澶辫触: {e}") + return False + +def load_processed_data(filename='processed_data.pkl'): + try: + if os.path.exists(filename): + df = pd.read_pickle(filename) + print(f"宸蹭粠 {filename} 鍔犺浇澶勭悊鍚庣殑鏁版嵁") + return df + else: + print(f"鎵句笉鍒板鐞嗗悗鐨勬暟鎹枃浠� {filename}") + return None + except Exception as e: + print(f"鍔犺浇鏁版嵁澶辫触: {e}") + return None + +# 灏濊瘯鍔犺浇澶勭悊鍚庣殑鏁版嵁锛屽鏋滀笉瀛樺湪鍒欓噸鏂板鐞� +processed_data = load_processed_data() +if processed_data is not None: + df = processed_data +else: + df = load_data('闈掗緳娓�1.csv', '涓�鍙栨按.csv') + if df is not None: + df = add_lunar_features(df) + delay_hours = [1,2,3,4,6,12,24,36,48,60,72,84,96,108,120] + df = batch_create_delay_features(df, delay_hours) + + # 娣诲姞缁熻鐗瑰緛 + df['mean_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).mean() + df['mean_3d_up'] = df['upstream'].rolling(window=72, min_periods=1).mean() + df['std_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).std() + df['max_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).max() + df['min_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).min() + + df['mean_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).mean() + df['mean_3d_down'] = df['downstream'].rolling(window=72, min_periods=1).mean() + df['std_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).std() + df['max_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).max() + df['min_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).min() + + # 淇濆瓨澶勭悊鍚庣殑鏁版嵁 + save_processed_data(df) + +if df is not None: + run_gui() +else: + print("鏁版嵁鍔犺浇澶辫触锛屾棤娉曡繍琛岄娴嬨��") -- Gitblit v1.9.3