IStation/shys/ChXcAi.git

¶Ô±ÈÐÂÎÄ¼þ
			@@ -0,0 +1,611 @@
			import os
			import pickle
			import pandas as pd
			import numpy as np
			import tkinter as tk
			import tkinter.font as tkfont
			from tkinter import ttk
			from datetime import timedelta
			from time import time
			import matplotlib.pyplot as plt
			from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk
			from xgboost import XGBRegressor
			from lunardate import LunarDate
			from sklearn.model_selection import train_test_split
			from sklearn.metrics import mean_squared_error, mean_absolute_error
			import matplotlib

			# éç½® matplotlib ä¸ææ¾ç¤º
			matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'SimSun', 'Arial Unicode MS']
			matplotlib.rcParams['axes.unicode_minus'] = False
			matplotlib.rcParams['font.family'] = 'sans-serif'

			# å¨å±ç¼ååéåç¹å¾åç§°ï¼æ¤å¤ feature_columns ä»ä¸ºå ä½ï¼
			cached_model = None
			last_training_time = None
			feature_columns = None

			# -------------------------------
			# æ°æ®å è½½ä¸é¢å¤çå½æ°
			# -------------------------------
			def load_data(upstream_file, downstream_file, qinglong_lake_file=None):
			try:
			upstream_df = pd.read_csv(upstream_file)
			downstream_df = pd.read_csv(downstream_file)
			if qinglong_lake_file:
			qinglong_lake_df = pd.read_csv(qinglong_lake_file)
			except FileNotFoundError:
			print("æä»¶æªæ¾å°ï¼è¯·æ£æ¥è·¯å¾")
			return None

			# åè®¾åå§æ°æ®åä¾æ¬¡ä¸º ['DateTime', 'TagName', 'Value']
			upstream_df.columns = ['DateTime', 'TagName', 'Value']
			downstream_df.columns = ['DateTime', 'TagName', 'Value']
			if qinglong_lake_file:
			qinglong_lake_df.columns = ['DateTime', 'TagName', 'Value']

			# è½¬æ¢æ¶é´æ ¼å¼åæ°å¼å¤ç
			upstream_df['DateTime'] = pd.to_datetime(upstream_df['DateTime'])
			downstream_df['DateTime'] = pd.to_datetime(downstream_df['DateTime'])
			if qinglong_lake_file:
			qinglong_lake_df['DateTime'] = pd.to_datetime(qinglong_lake_df['DateTime'])
			upstream_df['Value'] = pd.to_numeric(upstream_df['Value'], errors='coerce')
			downstream_df['Value'] = pd.to_numeric(downstream_df['Value'], errors='coerce')
			if qinglong_lake_file:
			qinglong_lake_df['Value'] = pd.to_numeric(qinglong_lake_df['Value'], errors='coerce')

			# è¿æ»¤çåº¦å°äº5çæ°æ®
			upstream_df = upstream_df[upstream_df['Value'] >= 5]
			downstream_df = downstream_df[downstream_df['Value'] >= 5]
			if qinglong_lake_file:
			qinglong_lake_df = qinglong_lake_df[qinglong_lake_df['Value'] >= 5]

			# å°0æ¿æ¢ä¸ºNaNï¼å¹¶å©ç¨3åæ åå·®æ³å¤çå¼å¸¸å¼
			for df in [upstream_df, downstream_df]:
			df.loc[df['Value'] == 0, 'Value'] = np.nan
			mean_val, std_val = df['Value'].mean(), df['Value'].std()
			lower_bound, upper_bound = mean_val - 3 * std_val, mean_val + 3 * std_val
			df.loc[(df['Value'] < lower_bound) \| (df['Value'] > upper_bound), 'Value'] = np.nan
			if qinglong_lake_file:
			qinglong_lake_df.loc[qinglong_lake_df['Value'] == 0, 'Value'] = np.nan
			mean_val, std_val = qinglong_lake_df['Value'].mean(), qinglong_lake_df['Value'].std()
			lower_bound, upper_bound = mean_val - 3 * std_val, mean_val + 3 * std_val
			qinglong_lake_df.loc[(qinglong_lake_df['Value'] < lower_bound) \| (qinglong_lake_df['Value'] > upper_bound), 'Value'] = np.nan

			# éå½å Value åå¹¶ä¿çéè¦çå
			upstream_df = upstream_df.rename(columns={'Value': 'upstream'})[['DateTime', 'upstream']]
			downstream_df = downstream_df.rename(columns={'Value': 'downstream'})[['DateTime', 'downstream']]
			if qinglong_lake_file:
			qinglong_lake_df = qinglong_lake_df.rename(columns={'Value': 'qinglong_lake'})[['DateTime', 'qinglong_lake']]

			# åå¹¶æ°æ®
			merged_df = pd.merge(upstream_df, downstream_df, on='DateTime', how='inner')
			if qinglong_lake_file:
			merged_df = pd.merge(merged_df, qinglong_lake_df, on='DateTime', how='left')

			print(f"åå¹¶åæ°æ®è¡æ°: {len(merged_df)}")
			merged_df = merged_df.set_index('DateTime')

			# æå¼ï¼åç¨çº¿æ§ï¼åç¨æ¶é´æå¼ï¼æåç¨ååååå¡«å
			merged_df['upstream'] = merged_df['upstream'].interpolate(method='linear', limit=4)
			merged_df['downstream'] = merged_df['downstream'].interpolate(method='linear', limit=4)
			if qinglong_lake_file:
			merged_df['qinglong_lake'] = merged_df['qinglong_lake'].interpolate(method='linear', limit=4)
			merged_df['upstream'] = merged_df['upstream'].interpolate(method='time', limit=24)
			merged_df['downstream'] = merged_df['downstream'].interpolate(method='time', limit=24)
			if qinglong_lake_file:
			merged_df['qinglong_lake'] = merged_df['qinglong_lake'].interpolate(method='time', limit=24)
			merged_df['upstream'] = merged_df['upstream'].fillna(method='ffill').fillna(method='bfill')
			merged_df['downstream'] = merged_df['downstream'].fillna(method='ffill').fillna(method='bfill')
			if qinglong_lake_file:
			merged_df['qinglong_lake'] = merged_df['qinglong_lake'].fillna(method='ffill').fillna(method='bfill')

			# å¹³æ»å¤çï¼ä½¿ç¨æ»å¨çªå£ç§»å¨å¹³å
			merged_df['upstream_smooth'] = merged_df['upstream'].rolling(window=24, min_periods=1, center=True).mean()
			merged_df['downstream_smooth'] = merged_df['downstream'].rolling(window=24, min_periods=1, center=True).mean()
			if qinglong_lake_file:
			merged_df['qinglong_lake_smooth'] = merged_df['qinglong_lake'].rolling(window=24, min_periods=1, center=True).mean()
			# å¯¹ä½çåº¦é¨åç¨æ´å¤§çªå£å¹³æ»
			low_sal_mask = merged_df['upstream'] < 50
			if low_sal_mask.any():
			merged_df.loc[low_sal_mask, 'upstream_smooth'] = merged_df.loc[low_sal_mask, 'upstream']\
			.rolling(window=48, min_periods=1, center=True).mean()

			merged_df = merged_df.dropna()
			merged_df = merged_df[merged_df['upstream'].apply(np.isfinite)]
			merged_df = merged_df[merged_df['downstream'].apply(np.isfinite)]
			if qinglong_lake_file:
			merged_df = merged_df[merged_df['qinglong_lake'].apply(np.isfinite)]
			merged_df = merged_df.reset_index()
			print(f"æ¸æ´åæ°æ®è¡æ°: {len(merged_df)}")
			print(f"ä¸æ¸¸çåº¦èå´: {merged_df['upstream'].min()} - {merged_df['upstream'].max()}")
			print(f"ä¸æ¸¸çåº¦èå´: {merged_df['downstream'].min()} - {merged_df['downstream'].max()}")
			if qinglong_lake_file:
			print(f"éé¾æ¹çåº¦èå´: {merged_df['qinglong_lake'].min()} - {merged_df['qinglong_lake'].max()}")
			merged_df = merged_df.sort_values('DateTime')
			return merged_df

			# -------------------------------
			# æ·»å ååï¼æ½®æ±ï¼ç¹å¾
			# -------------------------------
			def add_lunar_features(df):
			lunar_day, lunar_phase_sin, lunar_phase_cos, is_high_tide = [], [], [], []
			for dt in df['DateTime']:
			ld = LunarDate.fromSolarDate(dt.year, dt.month, dt.day)
			lunar_day.append(ld.day)
			lunar_phase_sin.append(np.sin(2 * np.pi * ld.day / 15))
			lunar_phase_cos.append(np.cos(2 * np.pi * ld.day / 15))
			is_high_tide.append(1 if (ld.day <= 5 or (ld.day >= 16 and ld.day <= 20)) else 0)
			df['lunar_day'] = lunar_day
			df['lunar_phase_sin'] = lunar_phase_sin
			df['lunar_phase_cos'] = lunar_phase_cos
			df['is_high_tide'] = is_high_tide
			return df

			# -------------------------------
			# æ¹éçæå»¶è¿ç¹å¾ï¼åéåï¼å©ç¨ shiftï¼
			# -------------------------------
			def batch_create_delay_features(df, delay_hours):
			for delay in delay_hours:
			df[f'upstream_delay_{delay}h'] = df['upstream'].shift(delay)
			df[f'downstream_delay_{delay}h'] = df['downstream'].shift(delay)
			return df

			# -------------------------------
			# åéåæé è®ç»æ ·æ¬ï¼ä¼åç¹å¾å·¥ç¨ï¼
			# -------------------------------
			def create_features_vectorized(df, look_back=96, forecast_horizon=5):
			"""
			å©ç¨ numpy ç sliding_window_view å¯¹åå²çªå£ãä¸æ¸¸çªå£ãæ ç¾è¿è¡æ¹éåçï¼
			å¶ä»ç¹å¾ï¼æ¶é´ãååãç»è®¡ãå»¶è¿ç¹å¾ï¼ç´æ¥æ¹éè¯»ååæ¼æ¥
			"""
			# è¿éå®ä¹ total_samples ä¸ºï¼
			total_samples = len(df) - look_back - forecast_horizon + 1
			if total_samples <= 0:
			print("æ°æ®ä¸è¶³ä»¥åå»ºç¹å¾")
			return np.array([]), np.array([])

			# å©ç¨ sliding_window_view æé åå²çªå£ï¼ä¸æ¸¸è¿ç» look_back ä¸ªæ°æ®ï¼
			upstream_array = df['upstream'].values # shape (n,)
			# æ»å¨çªå£ï¼ç»æ shape (n - look_back + 1, look_back)
			from numpy.lib.stride_tricks import sliding_window_view
			window_up = sliding_window_view(upstream_array, window_shape=look_back)[:total_samples, :]

			# ä¸æ¸¸æè¿ 24 å°æ¶ï¼å©ç¨æ»å¨çªå£æé ï¼çªå£å¤§å°ä¸º 24
			downstream_array = df['downstream'].values
			window_down_full = sliding_window_view(downstream_array, window_shape=24)
			# å¯¹äºæ ç¾åä¸æ¸¸çªå£ï¼åé»è¾ï¼å df['downstream'].iloc[i+look_back-24:i+look_back]
			# åå¯¹åºç´¢å¼ä¸º i+look_back-24ï¼ i ä» 0 å° total_samples-1
			window_down = window_down_full[look_back-24 : look_back-24 + total_samples, :]

			# æ¶é´ç¹å¾ä¸ååç¹å¾çï¼åæ ·åºé´ä¸º df.iloc[look_back: len(df)-forecast_horizon+1]
			sample_df = df.iloc[look_back: len(df)-forecast_horizon+1].copy()
			basic_time = sample_df['DateTime'].dt.hour.values.reshape(-1, 1) / 24.0
			weekday = sample_df['DateTime'].dt.dayofweek.values.reshape(-1, 1) / 7.0
			month = sample_df['DateTime'].dt.month.values.reshape(-1, 1) / 12.0
			basic_time_feats = np.hstack([basic_time, weekday, month])

			lunar_feats = sample_df[['lunar_phase_sin','lunar_phase_cos','is_high_tide']].values
			# ç»è®¡ç¹å¾ï¼é¢åå©ç¨ rolling å·²è®¡ç®å¥½ï¼æ³¨æååºå¯¹åºè¡ï¼
			try:
			stats_up = sample_df[['mean_1d_up','mean_3d_up','std_1d_up','max_1d_up','min_1d_up']].values
			stats_down = sample_df[['mean_1d_down','mean_3d_down','std_1d_down','max_1d_down','min_1d_down']].values
			except KeyError as e:
			print(f"ç»è®¡ç¹å¾åä¸åå¨: {e}ï¼è¯·ç¡®ä¿åè®¡ç®ç»è®¡ç¹å¾")
			return np.array([]), np.array([])

			# å»¶è¿ç¹å¾ï¼åè®¾ææå»¶è¿ç¹å¾åååä»¥ "upstream_delay_" æ "downstream_delay_" å¼å¤´
			delay_cols = [col for col in sample_df.columns if col.startswith('upstream_delay_') or col.startswith('downstream_delay_')]
			delay_feats = sample_df[delay_cols].values

			# æ¼æ¥ææç¹å¾ï¼åå°åå²çªå£ï¼window_upï¼ä¸ä¸æ¸¸çªå£ï¼window_downï¼æ¼æ¥ï¼åæ¼æ¥å¶ä»ç¹å¾
			X = np.hstack([window_up, window_down, basic_time_feats, lunar_feats, stats_up, stats_down, delay_feats])

			# æé æ ç¾ï¼å©ç¨æ»å¨çªå£æé forecast_horizon åçä¸æ¸¸æ°æ®
			label_full = sliding_window_view(downstream_array, window_shape=forecast_horizon)
			# æ ç¾åºé´å¯¹åºä» index = look_back å° look_back + total_samples
			y = label_full[look_back: look_back + total_samples, :]
			global feature_columns
			feature_columns = ["combined_vector_features"]
			print(f"åéåç¹å¾å·¥ç¨å®æï¼æææ ·æ¬æ°: {X.shape[0]}")
			return X, y

			# -------------------------------
			# è·åæ¨¡ååç¡®åº¦ææ
			# -------------------------------
			def get_model_metrics():
			"""è·åä¿åå¨æ¨¡åç¼åä¸çåç¡®åº¦ææ """
			model_cache_file = 'salinity_model.pkl'
			if os.path.exists(model_cache_file):
			try:
			with open(model_cache_file, 'rb') as f:
			model_data = pickle.load(f)
			return {
			'rmse': model_data.get('rmse', None),
			'mae': model_data.get('mae', None)
			}
			except Exception as e:
			print(f"è·åæ¨¡åææ å¤±è´¥: {e}")
			return None

			# -------------------------------
			# æ¨¡åè®ç»ä¸é¢æµï¼å±ç¤ºéªè¯åç¡®åº¦ï¼RMSE, MAEï¼
			# -------------------------------
			def train_and_predict(df, start_time, force_retrain=False):
			global cached_model, last_training_time
			model_cache_file = 'salinity_model.pkl'
			model_needs_training = True

			if os.path.exists(model_cache_file) and force_retrain:
			try:
			os.remove(model_cache_file)
			print("å·²å é¤æ§æ¨¡åç¼åï¼å¼ºå¶éæ°è®ç»ï¼")
			except Exception as e:
			print("å é¤ç¼åå¼å¸¸:", e)

			train_df = df[df['DateTime'] < start_time].copy()
			if not force_retrain and cached_model is not None and last_training_time is not None:
			if last_training_time >= train_df['DateTime'].max():
			model_needs_training = False
			print(f"ä½¿ç¨ç¼åæ¨¡åï¼è®ç»æ¶é´: {last_training_time}")
			elif not force_retrain and os.path.exists(model_cache_file):
			try:
			with open(model_cache_file, 'rb') as f:
			model_data = pickle.load(f)
			cached_model = model_data['model']
			last_training_time = model_data['training_time']
			if last_training_time >= train_df['DateTime'].max():
			model_needs_training = False
			print(f"ä»æä»¶å è½½æ¨¡åï¼è®ç»æ¶é´: {last_training_time}")
			except Exception as e:
			print("å è½½æ¨¡åå¤±è´¥:", e)

			if model_needs_training:
			print("å¼å§è®ç»æ°æ¨¡å...")
			if len(train_df) < 100:
			print("è®ç»æ°æ®ä¸è¶³")
			return None, None, None, None

			start_train = time()
			X, y = create_features_vectorized(train_df, look_back=96, forecast_horizon=5)
			if len(X) == 0 or len(y) == 0:
			print("æ ·æ¬çæä¸è¶³ï¼è®ç»ç»æ¢")
			return None, None, None, None
			print(f"è®ç»æ ·æ¬æ°é: {X.shape[0]}")
			X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
			model = XGBRegressor(
			n_estimators=300,
			learning_rate=0.03,
			max_depth=5,
			min_child_weight=2,
			subsample=0.85,
			colsample_bytree=0.85,
			gamma=0.1,
			reg_alpha=0.2,
			reg_lambda=1.5,
			n_jobs=-1,
			random_state=42
			)
			try:
			model.fit(X_train, y_train,
			eval_set=[(X_val, y_val)], eval_metric='rmse',
			early_stopping_rounds=20, verbose=False)
			# å¨éªè¯éä¸è®¡ç® RMSE å MAE
			y_val_pred = model.predict(X_val)
			rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))
			mae = mean_absolute_error(y_val, y_val_pred)
			print(f"éªè¯é RMSE: {rmse:.4f}, MAE: {mae:.4f}")
			last_training_time = start_time
			cached_model = model
			with open(model_cache_file, 'wb') as f:
			pickle.dump({
			'model': model,
			'training_time': last_training_time,
			'feature_columns': feature_columns,
			'rmse': rmse,
			'mae': mae
			}, f)
			print(f"æ¨¡åè®ç»å®æï¼èæ¶: {time() - start_train:.2f}ç§")
			except Exception as e:
			print("æ¨¡åè®ç»å¼å¸¸:", e)
			return None, None, None, None
			else:
			model = cached_model

			# é¢æµé¨åï¼æé åä¸ªé¢æµæ ·æ¬ï¼ä¸è®ç»æ¶ç¹å¾æé ä¸è´ï¼
			try:
			# è¿ééç¨ä¸ create_features_vectorized ç±»ä¼¼çæè·¯æé é¢æµæ ·æ¬
			# åæè¿æ°æ®è¶³å¤ææåå²çªå£åå¶ä»ç¹å¾
			n = len(df)
			if n < 96 + 5:
			print("é¢æµæ°æ®ä¸è¶³")
			return None, None, None, None

			# ä½¿ç¨ sliding_window_view æé ææ°çä¸æ¸¸åä¸æ¸¸çªå£
			upstream_array = df['upstream'].values
			window_up = np.lib.stride_tricks.sliding_window_view(upstream_array, window_shape=96)[-1, :]
			downstream_array = df['downstream'].values
			window_down = np.lib.stride_tricks.sliding_window_view(downstream_array, window_shape=24)[-1, :]

			# æ¶é´ç¹å¾åååç¹å¾åºäºå½åé¢æµå¼å§æ¶å»
			hour_norm = start_time.hour / 24.0
			weekday_norm = start_time.dayofweek / 7.0
			month_norm = start_time.month / 12.0
			basic_time_feats = np.array([hour_norm, weekday_norm, month_norm]).reshape(1, -1)
			ld = LunarDate.fromSolarDate(start_time.year, start_time.month, start_time.day)
			lunar_feats = np.array([np.sin(2np.pild.day/15),
			np.cos(2np.pild.day/15),
			1 if (ld.day <=5 or (ld.day >=16 and ld.day<=20)) else 0]).reshape(1, -1)

			# ç»è®¡ç¹å¾ï¼ç¨ææ° 24/72 å°æ¶æ°æ®ï¼åæ«å°¾24/72ï¼
			try:
			# ä¼åä½¿ç¨DataFrameä¸å·²è®¡ç®çç»è®¡ç¹å¾
			stats_up = df[['mean_1d_up','mean_3d_up','std_1d_up','max_1d_up','min_1d_up']].iloc[-1:].values
			stats_down = df[['mean_1d_down','mean_3d_down','std_1d_down','max_1d_down','min_1d_down']].iloc[-1:].values
			except KeyError:
			# å¦æä¸åå¨ï¼åç´æ¥è®¡ç®
			recent_up = df['upstream'].values[-24:]
			stats_up = np.array([np.mean(recent_up),
			np.mean(df['upstream'].values[-72:]),
			np.std(recent_up),
			np.max(recent_up),
			np.min(recent_up)]).reshape(1, -1)
			recent_down = df['downstream'].values[-24:]
			stats_down = np.array([np.mean(recent_down),
			np.mean(df['downstream'].values[-72:]),
			np.std(recent_down),
			np.max(recent_down),
			np.min(recent_down)]).reshape(1, -1)

			# å»¶è¿ç¹å¾ï¼ç´æ¥ä»æåä¸è¡å»¶è¿ç¹å¾åå¼
			delay_cols = [col for col in df.columns if col.startswith('upstream_delay_') or col.startswith('downstream_delay_')]
			delay_feats = df[delay_cols].iloc[-1:].values # shape (1, ?)

			# æ¼æ¥ææé¢æµç¹å¾
			X_pred = np.hstack([window_up.reshape(1, -1),
			window_down.reshape(1, -1),
			basic_time_feats, lunar_feats, stats_up, stats_down, delay_feats])
			if np.isnan(X_pred).any() or np.isinf(X_pred).any():
			X_pred = np.nan_to_num(X_pred, nan=0.0, posinf=1e6, neginf=-1e6)
			predictions = model.predict(X_pred)
			# çææªæ¥æ¥ææ ç¾ï¼é¢æµæªæ¥ 5 å¤©ï¼
			future_dates = [start_time + timedelta(days=i) for i in range(5)]
			print("é¢æµå®æ")

			# è·åæ¨¡åææ
			metrics = None
			if os.path.exists(model_cache_file):
			try:
			with open(model_cache_file, 'rb') as f:
			model_data = pickle.load(f)
			metrics = {
			'rmse': model_data.get('rmse', None),
			'mae': model_data.get('mae', None)
			}
			except Exception as e:
			print(f"è·åæ¨¡åææ å¤±è´¥: {e}")

			return future_dates, predictions.flatten(), model, metrics
			except Exception as e:
			print("é¢æµè¿ç¨å¼å¸¸:", e)
			return None, None, None, None

			# -------------------------------
			# GUIçé¢é¨å
			# -------------------------------
			def run_gui():
			def configure_gui_fonts():
			font_names = ['å¾®è½¯éé»', 'Microsoft YaHei', 'SimSun', 'SimHei']
			for font_name in font_names:
			try:
			default_font = tkfont.nametofont("TkDefaultFont")
			default_font.configure(family=font_name)
			text_font = tkfont.nametofont("TkTextFont")
			text_font.configure(family=font_name)
			fixed_font = tkfont.nametofont("TkFixedFont")
			fixed_font.configure(family=font_name)
			return True
			except Exception as e:
			continue
			return False

			def on_predict():
			try:
			predict_start = time()
			status_label.config(text="é¢æµä¸...")
			root.update()
			start_time_dt = pd.to_datetime(entry.get())
			force_retrain = retrain_var.get()
			future_dates, predictions, model, metrics = train_and_predict(df, start_time_dt, force_retrain)
			if future_dates is None or predictions is None:
			status_label.config(text="é¢æµå¤±è´¥")
			return

			# è·åå¹¶æ¾ç¤ºæ¨¡ååç¡®åº¦ææ
			if metrics:
			metrics_text = f"æ¨¡ååç¡®åº¦ - RMSE: {metrics['rmse']:.4f}, MAE: {metrics['mae']:.4f}"
			metrics_label.config(text=metrics_text)

			ax.clear()
			# ç»å¶åå²æ°æ®ï¼æè¿ 120 å¤©ï¼
			history_end = min(start_time_dt, df['DateTime'].max())
			history_start = history_end - timedelta(days=120)
			hist_data = df[(df['DateTime'] >= history_start) & (df['DateTime'] <= history_end)]
			ax.plot(hist_data['DateTime'], hist_data['downstream'], label='ä¸åæ°´(ä¸æ¸¸)çåº¦', color='blue', linewidth=1.5)
			ax.plot(hist_data['DateTime'], hist_data['upstream_smooth'], label='éé¾æ¸¯(ä¸æ¸¸)çåº¦', color='purple', linewidth=1.5, alpha=0.7)
			if 'qinglong_lake_smooth' in hist_data.columns:
			ax.plot(hist_data['DateTime'], hist_data['qinglong_lake_smooth'], label='éé¾æ¹çåº¦', color='green', linewidth=1.5, alpha=0.7)
			ax.plot(future_dates, predictions, marker='o', linestyle='--', label='é¢æµçåº¦', color='red', linewidth=2)
			actual_data = df[(df['DateTime'] >= start_time_dt) & (df['DateTime'] <= future_dates[-1])]
			if not actual_data.empty:
			ax.plot(actual_data['DateTime'], actual_data['downstream'], marker='s', linestyle='-', label='å®éçåº¦', color='orange', linewidth=2)
			std_dev = hist_data['downstream'].std() * 0.5
			ax.fill_between(future_dates, predictions - std_dev, predictions + std_dev, color='red', alpha=0.2)
			ax.set_xlabel('æ¥æ')
			ax.set_ylabel('çåº¦')
			ax.set_title(f"ä» {start_time_dt.strftime('%Y-%m-%d %H:%M:%S')} å¼å§ççåº¦é¢æµ")
			ax.legend(loc='upper left')
			fig.tight_layout()
			canvas.draw()
			predict_time = time() - predict_start
			status_label.config(text=f"é¢æµå®æ (èæ¶: {predict_time:.2f}ç§)")
			result_text = "é¢æµç»æ:\n"
			for i, (date, pred) in enumerate(zip(future_dates, predictions)):
			result_text += f"ç¬¬ {i+1} å¤© ({date.strftime('%Y-%m-%d')}): {pred:.2f}\n"
			result_label.config(text=result_text)
			except Exception as e:
			status_label.config(text=f"éè¯¯: {str(e)}")

			def on_scroll(event):
			xlim = ax.get_xlim()
			ylim = ax.get_ylim()
			zoom_factor = 1.1
			x_data = event.xdata if event.xdata is not None else (xlim[0]+xlim[1])/2
			y_data = event.ydata if event.ydata is not None else (ylim[0]+ylim[1])/2
			x_rel = (x_data - xlim[0]) / (xlim[1] - xlim[0])
			y_rel = (y_data - ylim[0]) / (ylim[1] - ylim[0])
			if event.step > 0:
			new_width = (xlim[1]-xlim[0]) / zoom_factor
			new_height = (ylim[1]-ylim[0]) / zoom_factor
			x0 = x_data - x_rel * new_width
			y0 = y_data - y_rel * new_height
			ax.set_xlim([x0, x0+new_width])
			ax.set_ylim([y0, y0+new_height])
			else:
			new_width = (xlim[1]-xlim[0]) * zoom_factor
			new_height = (ylim[1]-ylim[0]) * zoom_factor
			x0 = x_data - x_rel * new_width
			y0 = y_data - y_rel * new_height
			ax.set_xlim([x0, x0+new_width])
			ax.set_ylim([y0, y0+new_height])
			canvas.draw_idle()

			def update_cursor(event):
			if event.inaxes == ax:
			canvas.get_tk_widget().config(cursor="fleur")
			else:
			canvas.get_tk_widget().config(cursor="")

			def reset_view():
			display_history()
			status_label.config(text="å¾è¡¨è§å¾å·²éç½®")

			root = tk.Tk()
			root.title("éé¾æ¸¯-éè¡çåº¦é¢æµç³»ç»")
			try:
			configure_gui_fonts()
			except Exception as e:
			print("åä½éç½®å¼å¸¸:", e)
			input_frame = ttk.Frame(root, padding="10")
			input_frame.pack(fill=tk.X)
			control_frame = ttk.Frame(root, padding="5")
			control_frame.pack(fill=tk.X)
			result_frame = ttk.Frame(root, padding="10")
			result_frame.pack(fill=tk.BOTH, expand=True)
			ttk.Label(input_frame, text="è¾å¥å¼å§æ¶é´ (YYYY-MM-DD HH:MM:SS)").pack(side=tk.LEFT)
			entry = ttk.Entry(input_frame, width=25)
			entry.pack(side=tk.LEFT, padx=5)
			predict_button = ttk.Button(input_frame, text="é¢æµ", command=on_predict)
			predict_button.pack(side=tk.LEFT)
			status_label = ttk.Label(input_frame, text="æç¤º: ç¬¬ä¸æ¬¡è¿è¡è¯·å¾é'å¼ºå¶éæ°è®ç»æ¨¡å'")
			status_label.pack(side=tk.LEFT, padx=10)
			retrain_var = tk.BooleanVar(value=False)
			ttk.Checkbutton(control_frame, text="å¼ºå¶éæ°è®ç»æ¨¡å", variable=retrain_var).pack(side=tk.LEFT)
			legend_label = ttk.Label(control_frame, text="å¾ä¾: ç´«è²=éé¾æ¸¯ä¸æ¸¸æ°æ®, èè²=ä¸åæ°´ä¸æ¸¸æ°æ®, çº¢è²=é¢æµå¼, ç»¿è²=å®éå¼")
			legend_label.pack(side=tk.LEFT, padx=10)
			reset_button = ttk.Button(control_frame, text="éç½®è§å¾", command=reset_view)
			reset_button.pack(side=tk.LEFT, padx=5)

			# æ·»å æ¾ç¤ºæ¨¡ååç¡®åº¦çæ ç¾
			metrics_frame = ttk.Frame(root, padding="5")
			metrics_frame.pack(fill=tk.X)
			model_metrics = get_model_metrics()
			metrics_text = "æ¨¡ååç¡®åº¦: æªç¥" if not model_metrics else f"æ¨¡ååç¡®åº¦ - RMSE: {model_metrics['rmse']:.4f}, MAE: {model_metrics['mae']:.4f}"
			metrics_label = ttk.Label(metrics_frame, text=metrics_text)
			metrics_label.pack(side=tk.LEFT, padx=10)

			result_label = ttk.Label(result_frame, text="", justify=tk.LEFT)
			result_label.pack(side=tk.RIGHT, fill=tk.Y)
			fig, ax = plt.subplots(figsize=(10,5), dpi=100)
			canvas = FigureCanvasTkAgg(fig, master=result_frame)
			canvas.get_tk_widget().pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
			toolbar_frame = ttk.Frame(result_frame)
			toolbar_frame.pack(side=tk.BOTTOM, fill=tk.X)
			toolbar = NavigationToolbar2Tk(canvas, toolbar_frame)
			toolbar.update()
			canvas.mpl_connect('scroll_event', on_scroll)
			canvas.mpl_connect('motion_notify_event', update_cursor)

			def display_history():
			ax.clear()
			end_date = df['DateTime'].max()
			start_date = max(df['DateTime'].min(), end_date - timedelta(days=60))
			hist_data = df[(df['DateTime']>=start_date)&(df['DateTime']<=end_date)]
			ax.plot(hist_data['DateTime'], hist_data['downstream'], label='ä¸åæ°´(ä¸æ¸¸)çåº¦', color='blue', linewidth=1.5)
			ax.plot(hist_data['DateTime'], hist_data['upstream_smooth'], label='éé¾æ¸¯(ä¸æ¸¸)çåº¦', color='purple', linewidth=1.5, alpha=0.7)
			ax.set_xlabel('æ¥æ')
			ax.set_ylabel('çåº¦')
			ax.set_title('åå²çåº¦æ°æ®å¯¹æ¯')
			ax.legend()
			fig.tight_layout()
			canvas.draw()

			display_history()
			root.mainloop()

			# -------------------------------
			# ä¸»ç¨åºå¥å£ï¼å è½½æ°æ®ãæ·»å ç¹å¾ãçæå»¶è¿ç¹å¾åå¯å¨GUI
			# -------------------------------
			def save_processed_data(df, filename='processed_data.pkl'):
			try:
			df.to_pickle(filename)
			print(f"å·²ä¿åå¤çåçæ°æ®å° {filename}")
			return True
			except Exception as e:
			print(f"ä¿åæ°æ®å¤±è´¥: {e}")
			return False

			def load_processed_data(filename='processed_data.pkl'):
			try:
			if os.path.exists(filename):
			df = pd.read_pickle(filename)
			print(f"å·²ä» {filename} å è½½å¤çåçæ°æ®")
			return df
			else:
			print(f"æ¾ä¸å°å¤çåçæ°æ®æä»¶ {filename}")
			return None
			except Exception as e:
			print(f"å è½½æ°æ®å¤±è´¥: {e}")
			return None

			# å°è¯å è½½å¤çåçæ°æ®ï¼å¦æä¸åå¨åéæ°å¤ç
			processed_data = load_processed_data()
			if processed_data is not None:
			df = processed_data
			else:
			df = load_data('éé¾æ¸¯1.csv', 'ä¸åæ°´.csv')
			if df is not None:
			df = add_lunar_features(df)
			delay_hours = [1,2,3,4,6,12,24,36,48,60,72,84,96,108,120]
			df = batch_create_delay_features(df, delay_hours)

			# æ·»å ç»è®¡ç¹å¾
			df['mean_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).mean()
			df['mean_3d_up'] = df['upstream'].rolling(window=72, min_periods=1).mean()
			df['std_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).std()
			df['max_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).max()
			df['min_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).min()

			df['mean_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).mean()
			df['mean_3d_down'] = df['downstream'].rolling(window=72, min_periods=1).mean()
			df['std_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).std()
			df['max_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).max()
			df['min_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).min()

			# ä¿åå¤çåçæ°æ®
			save_processed_data(df)

			if df is not None:
			run_gui()
			else:
			print("æ°æ®å è½½å¤±è´¥ï¼æ æ³è¿è¡é¢æµã")