IStation/shys/ChXcAi.git

提交次数
对比
2025-04-08
模型更换
137b11
对比 | 目录
已添加1个文件
yd_lstm_test.py
825 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
 yd_lstm_test.py

¶Ô±ÈÐÂÎÄ¼þ
@@ -0,0 +1,825 @@
import pandas as pd
import numpy as np
import tkinter as tk
from tkinter import ttk
from datetime import timedelta
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model, save_model
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import pickle
import os
from time import time
import matplotlib

# éç½®matplotlibä¸ææ¾ç¤º
# è®¾ç½®ä¸æåä½ï¼ä½¿ç¨ç³»ç»æä¾çåä½
matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'SimSun', 'Arial Unicode MS']  # ä¼åä½¿ç¨çä¸æåä½
matplotlib.rcParams['axes.unicode_minus'] = False  # è§£å³è´å·æ¾ç¤ºé®é¢
matplotlib.rcParams['font.family'] = 'sans-serif'  # ä½¿ç¨æ è¡¬çº¿åä½

# ç¼ååé
cached_model = None
last_training_time = None
feature_columns = None
feature_scaler = None
target_scaler = None

# æ°æ®å è½½å½æ°
def load_data(upstream_file, downstream_file, qinglong_lake_file=None):
    try:
        # ä½¿ç¨éå·ä½ä¸ºåéç¬¦è¯»åæ°æ®
        upstream_df = pd.read_csv(upstream_file)
        downstream_df = pd.read_csv(downstream_file)
        if qinglong_lake_file:
            qinglong_lake_df = pd.read_csv(qinglong_lake_file)
    except FileNotFoundError:
        print("æä»¶æªæ¾å°ï¼è¯·æ£æ¥æä»¶è·¯å¾")
        return None
    
    # åè®¾ååè¢«è¯»åä¸º 'DateTime,TagName,Value'ï¼æä»¬éè¦åå²
    upstream_df.columns = ['DateTime', 'TagName', 'Value']
    downstream_df.columns = ['DateTime', 'TagName', 'Value']
    if qinglong_lake_file:
        qinglong_lake_df.columns = ['DateTime', 'TagName', 'Value']
    
    # å° 'DateTime' åè½¬æ¢ä¸ºæ¥ææ ¼å¼
    upstream_df['DateTime'] = pd.to_datetime(upstream_df['DateTime'])
    downstream_df['DateTime'] = pd.to_datetime(downstream_df['DateTime'])
    if qinglong_lake_file:
        qinglong_lake_df['DateTime'] = pd.to_datetime(qinglong_lake_df['DateTime'])
    
    # æ£æµå¹¶å¤çå¼å¸¸å¼ (åè½¬æ¢ä¸ºæ°å¼å)
    upstream_df['Value'] = pd.to_numeric(upstream_df['Value'], errors='coerce')
    downstream_df['Value'] = pd.to_numeric(downstream_df['Value'], errors='coerce')
    if qinglong_lake_file:
        qinglong_lake_df['Value'] = pd.to_numeric(qinglong_lake_df['Value'], errors='coerce')
    
    # è¿æ»¤æçåº¦å¼å°äº5çæ°æ®
    upstream_df = upstream_df[upstream_df['Value'] >= 5]
    downstream_df = downstream_df[downstream_df['Value'] >= 5]
    if qinglong_lake_file:
        qinglong_lake_df = qinglong_lake_df[qinglong_lake_df['Value'] >= 5]
    
    # å°0å¼æ¿æ¢ä¸ºNaNï¼ä»¥ä¾¿åç»è¿è¡æå¼å¤ç
    upstream_df.loc[upstream_df['Value'] == 0, 'Value'] = np.nan
    downstream_df.loc[downstream_df['Value'] == 0, 'Value'] = np.nan
    if qinglong_lake_file:
        qinglong_lake_df.loc[qinglong_lake_df['Value'] == 0, 'Value'] = np.nan
    
    # ä½¿ç¨åºæ¬ç»è®¡æ¹æ³è¯å«å¹¶æ¿æ¢å¼å¸¸å¼ (3åæ åå·®æ³)
    for df in [upstream_df, downstream_df]:
        mean = df['Value'].mean()
        std = df['Value'].std()
        lower_bound = mean - 3 * std
        upper_bound = mean + 3 * std
        # å°è¶åºèå´çå¼æ¿æ¢ä¸ºNaN
        df.loc[(df['Value'] < lower_bound) | (df['Value'] > upper_bound), 'Value'] = np.nan
    
    if qinglong_lake_file:
        mean = qinglong_lake_df['Value'].mean()
        std = qinglong_lake_df['Value'].std()
        lower_bound = mean - 3 * std
        upper_bound = mean + 3 * std
        qinglong_lake_df.loc[(qinglong_lake_df['Value'] < lower_bound) | (qinglong_lake_df['Value'] > upper_bound), 'Value'] = np.nan
    
    # éå½å 'Value' ä¸º 'upstream' å 'downstream'
    upstream_df = upstream_df.rename(columns={'Value': 'upstream'})[['DateTime', 'upstream']]
    downstream_df = downstream_df.rename(columns={'Value': 'downstream'})[['DateTime', 'downstream']]
    if qinglong_lake_file:
        qinglong_lake_df = qinglong_lake_df.rename(columns={'Value': 'qinglong_lake'})[['DateTime', 'qinglong_lake']]
    
    # åå¹¶æ°æ®
    merged_df = pd.merge(upstream_df, downstream_df, on='DateTime', how='inner')
    if qinglong_lake_file:
        merged_df = pd.merge(merged_df, qinglong_lake_df, on='DateTime', how='left')
    
    # å¤ç NaN åæ æå¼
    print(f"åå¹¶åæ°æ®è¡æ°: {len(merged_df)}")
    
    # è®¾ç½®DateTimeä¸ºç´¢å¼ä»¥åè®¸æ¶é´æå¼
    merged_df = merged_df.set_index('DateTime')
    
    # ä½¿ç¨å¤ç§æå¼æ¹æ³å¤çNaNå¼
    # 1. é¦åä½¿ç¨çº¿æ§æå¼å¡«åçæ¶é´çNaN
    merged_df['upstream'] = merged_df['upstream'].interpolate(method='linear', limit=4)
    merged_df['downstream'] = merged_df['downstream'].interpolate(method='linear', limit=4)
    if qinglong_lake_file:
        merged_df['qinglong_lake'] = merged_df['qinglong_lake'].interpolate(method='linear', limit=4)
    
    # 2. å¯¹äºè¾é¿æ¶é´çNaNï¼ä½¿ç¨æ¶é´å ææå¼
    merged_df['upstream'] = merged_df['upstream'].interpolate(method='time', limit=24)
    merged_df['downstream'] = merged_df['downstream'].interpolate(method='time', limit=24)
    if qinglong_lake_file:
        merged_df['qinglong_lake'] = merged_df['qinglong_lake'].interpolate(method='time', limit=24)
    
    # 3. å¯¹äºä»ç¶åå¨çNaNï¼ä½¿ç¨ååå¡«ååååå¡«å
    merged_df['upstream'] = merged_df['upstream'].fillna(method='ffill').fillna(method='bfill')
    merged_df['downstream'] = merged_df['downstream'].fillna(method='ffill').fillna(method='bfill')
    if qinglong_lake_file:
        merged_df['qinglong_lake'] = merged_df['qinglong_lake'].fillna(method='ffill').fillna(method='bfill')
    
    # 4. æ·»å å¹³æ»å¤ç
    # ä½¿ç¨ç§»å¨å¹³åè¿è¡å¹³æ»å¤ç
    merged_df['upstream_smooth'] = merged_df['upstream'].rolling(window=24, min_periods=1, center=True).mean()
    merged_df['downstream_smooth'] = merged_df['downstream'].rolling(window=24, min_periods=1, center=True).mean()
    if qinglong_lake_file:
        merged_df['qinglong_lake_smooth'] = merged_df['qinglong_lake'].rolling(window=24, min_periods=1, center=True).mean()
    
    # å¯¹éé¾æ¸¯æ°æ®ä¸çåº¦å¼ä½äº50çé¨åè¿è¡é¢å¤å¹³æ»å¤ç
    low_salinity_mask = merged_df['upstream'] < 50
    if low_salinity_mask.any():
        # å¯¹ä½çåº¦é¨åä½¿ç¨æ´å¤§çå¹³æ»çªå£
        merged_df.loc[low_salinity_mask, 'upstream_smooth'] = merged_df.loc[low_salinity_mask, 'upstream'].rolling(
            window=48, min_periods=1, center=True).mean()
    
    # å é¤å©ä½çNaNåæ ç©·å¤§å¼
    merged_df = merged_df.dropna()
    merged_df = merged_df[merged_df['upstream'].apply(lambda x: np.isfinite(x))]
    merged_df = merged_df[merged_df['downstream'].apply(lambda x: np.isfinite(x))]
    if qinglong_lake_file:
        merged_df = merged_df[merged_df['qinglong_lake'].apply(lambda x: np.isfinite(x))]
    
    # éç½®ç´¢å¼ï¼å°DateTimeéæ°ä½ä¸ºå
    merged_df = merged_df.reset_index()
    
    # æç»æ£æ¥æ°æ®
    print(f"æ¸æ´åæ°æ®è¡æ°: {len(merged_df)}")
    print(f"ä¸æ¸¸çåº¦èå´: {merged_df['upstream'].min()} - {merged_df['upstream'].max()}")
    print(f"ä¸æ¸¸çåº¦èå´: {merged_df['downstream'].min()} - {merged_df['downstream'].max()}")
    if qinglong_lake_file:
        print(f"éé¾æ¹çåº¦èå´: {merged_df['qinglong_lake'].min()} - {merged_df['qinglong_lake'].max()}")
    
    # ç¡®ä¿æ°æ®ææ¶é´æåº
    merged_df = merged_df.sort_values('DateTime')
    return merged_df

# ç¹å¾å·¥ç¨ - LSTMçæ¬
def create_sequences(df, look_back=96, forecast_horizon=5):
    print("å¼å§ç¹å¾å·¥ç¨(LSTMåºåæ¨¡å¼)...")
    start_time = time()
    
    # æåä¸»è¦ç¹å¾å
    upstream = df['upstream'].values
    downstream = df['downstream'].values
    
    # é¢åè®¡ç®æ¶é´ç¹å¾
    date_features = np.array([
        [x.hour/24, x.dayofweek/7, x.month/12] 
        for x in df['DateTime']
    ])
    
    # åå»ºXåyåºå
    X = []
    y = []
    
    # è®¡ç®å¯ç¨çæ ·æ¬æ°
    total_samples = len(df) - look_back - forecast_horizon
    if total_samples <= 0:
        print("æ°æ®ä¸è¶³ä»¥åå»ºç¹å¾")
        return np.array([]), np.array([])
    
    print(f"å¼å§åå»ºåºåï¼æ»æ ·æ¬æ°: {total_samples}")
    
    # æ¹éå¤çä»¥æé«æç
    batch_size = 1000
    for batch_start in range(0, total_samples, batch_size):
        batch_end = min(batch_start + batch_size, total_samples)
        print(f"å¤çæ ·æ¬æ¹æ¬¡: {batch_start}-{batch_end}/{total_samples}")
        
        for i in range(batch_start, batch_end):
            # è·åå½åæ¶é´çªå£ç´¢å¼
            end_idx = i + look_back
            
            # åºæ¬åºåç¹å¾
            upstream_seq = upstream[i:end_idx]
            downstream_seq = downstream[i:end_idx]
            time_seq = date_features[i:end_idx]
            
            # è·³è¿å«æNaNççªå£
            if np.isnan(upstream_seq).any() or np.isnan(downstream_seq).any():
                continue
            
            # åå¹¶ç¹å¾ [samples, timesteps, features]
            # æ¯ä¸ªæ¶é´æ¥åå«: ä¸æ¸¸çåº¦, ä¸æ¸¸çåº¦, æ¶é´ç¹å¾(å°æ¶,ææ,æä»½)
            input_seq = np.column_stack([
                upstream_seq,
                downstream_seq,
                time_seq
            ])
            
            # ç®æ æ¯é¢æµæªæ¥forecast_horizonå¤©çä¸æ¸¸çåº¦
            target_seq = downstream[end_idx:end_idx+forecast_horizon]
            
            # ç¡®ä¿ç®æ æ²¡æNaN
            if not np.isnan(target_seq).any():
                X.append(input_seq)
                y.append(target_seq)
    
    X = np.array(X)
    y = np.array(y)
    
    if len(X) == 0 or len(y) == 0:
        print("è¦åï¼æ²¡æè½å¤çæææçç¹å¾åæ ç¾å¯¹")
        return np.array([]), np.array([])
    
    end_time = time()
    print(f"ç¹å¾å·¥ç¨å®æï¼æææ ·æ¬æ°: {len(X)}ï¼ç¹å¾å½¢ç¶: {X.shape}ï¼æ ç¾å½¢ç¶: {y.shape}ï¼èæ¶: {end_time-start_time:.2f}ç§")
    
    # ä¿åç¹å¾ååç§°ä»¥å¤å°æ¥ä½¿ç¨
    global feature_columns
    feature_columns = ['upstream', 'downstream', 'hour', 'day_of_week', 'month']
    
    return X, y

# æå»ºLSTMæ¨¡å
def build_lstm_model(input_shape, output_length):
    model = Sequential([
        LSTM(units=64, return_sequences=True, input_shape=input_shape),
        Dropout(0.2),
        LSTM(units=32),
        Dropout(0.2),
        Dense(output_length)
    ])
    
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

# è®ç»åé¢æµ
def train_and_predict(df, start_time, force_retrain=False):
    global cached_model, last_training_time, feature_scaler, target_scaler
    
    # æ£æ¥æ¯å¦æç¼åçæ¨¡ååä¸æ¬¡è®ç»çæ¶é´
    model_cache_file = 'salinity_lstm_model.h5'
    scaler_cache_file = 'salinity_scalers.pkl'
    model_needs_training = True
    
    # ç¹å¾ç»æå·²æ¹åï¼å¼ºå¶éæ°è®ç» - å é¤æ§æ¨¡åæä»¶
    if os.path.exists(model_cache_file) and force_retrain:
        try:
            os.remove(model_cache_file)
            if os.path.exists(scaler_cache_file):
                os.remove(scaler_cache_file)
            print("ç¹å¾ç»æå·²æ´æ¹ï¼å·²å é¤æ§æ¨¡åç¼å")
        except:
            pass
    
    # åªä½¿ç¨æ¶é´ç¹ä¹åçæ°æ®
    train_df = df[df['DateTime'] < start_time].copy()
    
    # å¦ææç¼åæ¨¡åä¸ä¸éè¦å¼ºå¶éæ°è®ç»
    if not force_retrain and cached_model is not None and last_training_time is not None and feature_scaler is not None and target_scaler is not None:
        # å¦æä¸æ¬¡è®ç»åæ²¡ææ°æ°æ®ï¼ä½¿ç¨ç¼åçæ¨¡å
        if last_training_time >= train_df['DateTime'].max():
            model_needs_training = False
            print(f"ä½¿ç¨ç¼åæ¨¡å (ä¸æ¬¡è®ç»æ¶é´: {last_training_time})")
    # å¦ææä»¶åå¨ä¸ä¸éè¦å¼ºå¶éæ°è®ç»
    elif not force_retrain and os.path.exists(model_cache_file) and os.path.exists(scaler_cache_file):
        try:
            cached_model = load_model(model_cache_file)
            with open(scaler_cache_file, 'rb') as f:
                scalers = pickle.load(f)
                feature_scaler = scalers['feature_scaler']
                target_scaler = scalers['target_scaler']
                last_training_time = scalers['training_time']
                
                # å¦æä¸æ¬¡è®ç»åæ²¡ææ°æ°æ®ï¼ä½¿ç¨ç¼åçæ¨¡å
                if last_training_time >= train_df['DateTime'].max():
                    model_needs_training = False
                    print(f"ä»æä»¶å è½½æ¨¡å (ä¸æ¬¡è®ç»æ¶é´: {last_training_time})")
        except Exception as e:
            print(f"å è½½æ¨¡åå¤±è´¥: {e}")
    
    if model_needs_training:
        print("éè¦è®ç»æ°æ¨¡å...")
        
        if len(train_df) < 10:  # ç¡®ä¿æè¶³å¤çè®ç»æ°æ®
            print("è®ç»æ°æ®ä¸è¶³")
            return None, None, None
        
        # æ£æ¥è®ç»æ°æ®è´¨é
        print(f"è®ç»æ°æ®èå´: {train_df['DateTime'].min()} å° {train_df['DateTime'].max()}")
        print(f"è®ç»æ°æ®ä¸NaNå¼ç»è®¡:\nä¸æ¸¸: {train_df['upstream'].isna().sum()}\nä¸æ¸¸: {train_df['downstream'].isna().sum()}")
        
        # è®¡æ¶å¼å§
        start_time_training = time()
        
        # è¿è¡ç¹å¾å·¥ç¨
        X, y = create_sequences(train_df)
        
        # æ£æ¥æ¯å¦æè¶³å¤çæ ·æ¬
        if len(X) == 0 or len(y) == 0:
            print("æ²¡æè¶³å¤çæææ ·æ¬è¿è¡è®ç»")
            return None, None, None
        
        # æ£æ¥æ ·æ¬æ°é
        print(f"ç¨äºè®ç»çæ ·æ¬æ°: {len(X)}")
        
        # åå»ºå¹¶åºç¨ç¼©æ¾å¨ - ä¸ºLSTMç¼©æ¾æ°æ®
        feature_scaler = MinMaxScaler(feature_range=(0, 1))
        
        # å¯¹æ¯ä¸ªæ ·æ¬çæ¯ä¸ªæ¶é´ç¹è¿è¡ç¼©æ¾
        # åå§å½¢ç¶ [samples, timesteps, features]
        n_samples, n_timesteps, n_features = X.shape
        X_reshaped = X.reshape(n_samples * n_timesteps, n_features)
        X_scaled = feature_scaler.fit_transform(X_reshaped)
        X_scaled = X_scaled.reshape(n_samples, n_timesteps, n_features)
        
        # å¯¹ç®æ åéè¿è¡ç¼©æ¾
        target_scaler = MinMaxScaler(feature_range=(0, 1))
        y_scaled = target_scaler.fit_transform(y)
        
        # ååè®ç»éåéªè¯é
        X_train, X_val, y_train, y_val = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
        
        # æå»ºLSTMæ¨¡å
        input_shape = (X_train.shape[1], X_train.shape[2])
        output_length = y_train.shape[1]
        model = build_lstm_model(input_shape, output_length)
        
        # è®ç»æ¨¡å
        try:
            print("å¼å§è®ç»æ¨¡å...")
            
            # æ©åè®¾ç½®
            early_stopping = EarlyStopping(
                monitor='val_loss',
                patience=20,
                restore_best_weights=True
            )
            
            # è®ç»æ¨¡å
            history = model.fit(
                X_train, y_train,
                validation_data=(X_val, y_val),
                epochs=10,    #ä¿®æ¹è®°å¾
                batch_size=32,
                callbacks=[early_stopping],
                verbose=1
            )
            
            # è®¡ç®éªè¯éæ§è½
            val_pred_scaled = model.predict(X_val)
            val_pred = target_scaler.inverse_transform(val_pred_scaled)
            y_val_inv = target_scaler.inverse_transform(y_val)
            rmse = np.sqrt(np.mean((val_pred - y_val_inv) ** 2))
            print(f"éªè¯éRMSE: {rmse:.4f}")
            
            # è®°å½è®ç»æ¶é´
            last_training_time = start_time
            
            # ç¼åæ¨¡å
            cached_model = model
            
            # ä¿åæ¨¡åå°æä»¶
            model.save(model_cache_file)
            with open(scaler_cache_file, 'wb') as f:
                pickle.dump({
                    'feature_scaler': feature_scaler,
                    'target_scaler': target_scaler,
                    'training_time': last_training_time,
                    'feature_columns': feature_columns,
                    'rmse': rmse
                }, f)
                
            print(f"æ¨¡åè®ç»å®æï¼èæ¶: {time()-start_time_training:.2f}ç§")
            
        except Exception as e:
            print(f"æ¨¡åè®ç»å¤±è´¥: {e}")
            return None, None, None
    else:
        # ä½¿ç¨ç¼åçæ¨¡å
        model = cached_model
        
    # åå¤ææ°æ°æ®è¿è¡é¢æµ
    try:
        print("åå¤é¢æµæ°æ®...")
        look_back = 96  # ç¡®ä¿ä¸è®ç»æ¶ä¸è´
        
        # è·åæ´å¤çåå²æ°æ®ä»¥èèå»¶è¿
        latest_data = df[df['DateTime'] < start_time].tail(look_back).copy()
        if len(latest_data) < look_back:  # ç¡®ä¿æè¶³å¤çåå²æ°æ®
            print("é¢æµæéçåå²æ°æ®ä¸è¶³")
            return None, None, None
        
        # å¤çå¯è½çNaNå¼
        if latest_data['upstream'].isna().any() or latest_data['downstream'].isna().any():
            latest_data['upstream'] = latest_data['upstream'].fillna(method='ffill').fillna(method='bfill')
            latest_data['downstream'] = latest_data['downstream'].fillna(method='ffill').fillna(method='bfill')
        
        # æåç¹å¾åºå
        upstream_seq = latest_data['upstream'].values
        downstream_seq = latest_data['downstream'].values
        
        # æ¶é´ç¹å¾
        time_seq = np.array([
            [x.hour/24, x.dayofweek/7, x.month/12] 
            for x in latest_data['DateTime']
        ])
        
        # åå¹¶ç¹å¾
        input_seq = np.column_stack([
            upstream_seq,
            downstream_seq,
            time_seq
        ])
        
        # æ£æ¥ç¹å¾æ¯å¦ææ
        if np.isnan(input_seq).any() or np.isinf(input_seq).any():
            print("é¢æµç¹å¾åå«æ æå¼")
            input_seq = np.nan_to_num(input_seq, nan=0.0, posinf=1e6, neginf=-1e6)
        
        # å¢å æ¹æ¬¡ç»´åº¦å¹¶ç¼©æ¾
        input_seq = input_seq.reshape(1, look_back, -1)
        input_seq_reshaped = input_seq.reshape(look_back, -1)
        input_seq_scaled = feature_scaler.transform(input_seq_reshaped)
        input_seq_scaled = input_seq_scaled.reshape(1, look_back, -1)
        
        # é¢æµ
        print("æ§è¡é¢æµ...")
        predictions_scaled = model.predict(input_seq_scaled)
        
        # ååç¼©æ¾
        predictions = target_scaler.inverse_transform(predictions_scaled)[0]
        
        # çææªæ¥æ¥æ
        forecast_horizon = 5  # ç¡®ä¿ä¸è®ç»æ¶ä¸è´
        future_dates = [start_time + timedelta(days=i) for i in range(forecast_horizon)]
        
        print("é¢æµæåå®æ")
        return future_dates, predictions, model
        
    except Exception as e:
        print(f"é¢æµè¿ç¨åçéè¯¯: {e}")
        return None, None, None

# GUI çé¢
def run_gui():
    # éç½®tkinterä¸ææ¾ç¤º
    def configure_gui_fonts():
        # å°è¯è®¾ç½®æ¯æä¸æçåä½
        font_names = ['å¾®è½¯éé»', 'Microsoft YaHei', 'SimSun', 'SimHei']
        for font_name in font_names:
            try:
                default_font = tk.font.nametofont("TkDefaultFont")
                default_font.configure(family=font_name)
                text_font = tk.font.nametofont("TkTextFont")
                text_font.configure(family=font_name)
                fixed_font = tk.font.nametofont("TkFixedFont")
                fixed_font.configure(family=font_name)
                return True
            except:
                continue
        return False
    
    def on_predict():
        try:
            predict_start_time = time()
            status_label.config(text="é¢æµä¸...")
            root.update()
            
            start_time = pd.to_datetime(entry.get())
            # æ£æ¥æ¨¡åç¼åæåµ
            cache_exists = os.path.exists('salinity_lstm_model.h5')
            if cache_exists and not retrain_var.get():
                try:
                    with open('salinity_scalers.pkl', 'rb') as f:
                        scalers = pickle.load(f)
                        # æ£æ¥æ¨¡åç¹å¾æ°éæ¯å¦ä¸è´
                        model_features = scalers.get('feature_columns', [])
                        expected_features = ['upstream', 'downstream', 'hour', 'day_of_week', 'month']
                        
                        if len(model_features) != len(expected_features):
                            status_label.config(text="ç¹å¾ç»æå·²æ´æ¹ï¼è¯·å¾é'å¼ºå¶éæ°è®ç»æ¨¡å'")
                            return
                except:
                    pass
            
            force_retrain = retrain_var.get()
            future_dates, predictions, model = train_and_predict(df, start_time, force_retrain)
            
            if future_dates is None or predictions is None:
                status_label.config(text="é¢æµå¤±è´¥")
                return
            
            # æ¸ç©ºä¹åçå¾å½¢
            ax.clear()
            
            # ç»å¶åå²æ°æ®
            history_end = min(start_time, df['DateTime'].max())
            history_start = history_end - timedelta(days=120)  # æ¾ç¤ºè¿30å¤©çåå²æ°æ®
            history_data = df[(df['DateTime'] >= history_start) & (df['DateTime'] <= history_end)]
            
            # ç»å¶ä¸åæ°´(ä¸æ¸¸)çåå²æ°æ®
            ax.plot(history_data['DateTime'], history_data['downstream'], 
                    label='ä¸åæ°´(ä¸æ¸¸)çåº¦', color='blue', linewidth=1.5)
            
            # ç»å¶éé¾æ¸¯(ä¸æ¸¸)çåå²æ°æ® - ä½¿ç¨å¹³æ»åçæ°æ®
            ax.plot(history_data['DateTime'], history_data['upstream_smooth'], 
                    label='éé¾æ¸¯(ä¸æ¸¸)çåº¦', color='purple', linewidth=1.5, alpha=0.7)
            
            # ç»å¶éé¾æ¹çåå²æ°æ® - ä½¿ç¨å¹³æ»åçæ°æ®
            if 'qinglong_lake_smooth' in history_data.columns:
                ax.plot(history_data['DateTime'], history_data['qinglong_lake_smooth'], 
                        label='éé¾æ¹çåº¦', color='green', linewidth=1.5, alpha=0.7)
            
            # è·åé¢æµæé´ççå®å¼ï¼å¦ææï¼
            actual_data = df[(df['DateTime'] >= start_time) & 
                             (df['DateTime'] <= future_dates[-1])]
            
            # ç»å¶é¢æµæ°æ®
            ax.plot(future_dates, predictions, marker='o', linestyle='--', 
                    label='é¢æµçåº¦', color='red', linewidth=2)
            
            # å¦ææçå®å¼ï¼ç»å¶çå®å¼
            if not actual_data.empty:
                ax.plot(actual_data['DateTime'], actual_data['downstream'], 
                        marker='s', linestyle='-', label='çå®çåº¦', 
                        color='orange', linewidth=2)
                
                # è®¡ç®é¢æµè¯¯å·®
                # æ¾å°ææ¥è¿é¢æµæ¥æçå®éå¼
                actual_values = []
                for pred_date in future_dates:
                    # æ¾å°ææ¥è¿çæ¥æ
                    closest_idx = (actual_data['DateTime'] - pred_date).abs().idxmin()
                    actual_values.append(actual_data.loc[closest_idx, 'downstream'])
                
                if len(actual_values) == len(predictions):
                    mse = np.mean((np.array(actual_values) - predictions) ** 2)
                    rmse = np.sqrt(mse)
                    mae = np.mean(np.abs(np.array(actual_values) - predictions))
                    
                    # å¨å¾ä¸æ¾ç¤ºè¯¯å·®ææ 
                    error_text = f"RMSE: {rmse:.2f}, MAE: {mae:.2f}"
                    ax.text(0.02, 0.05, error_text, transform=ax.transAxes, 
                            bbox=dict(facecolor='white', alpha=0.8))
            
            # æ·»å ç½®ä¿¡åºé´
            std_dev = history_data['downstream'].std() * 0.5  # ä½¿ç¨åå²æ°æ®çæ åå·®ä½ä¸ºé¢æµä¸ç¡®å®æ§çä¼°è®¡
            ax.fill_between(future_dates, 
                            predictions - std_dev, 
                            predictions + std_dev, 
                            color='red', alpha=0.2)
            
            # è®¾ç½®æ é¢ãæ ç¾åå¾ä¾
            ax.set_xlabel('æ¥æ')
            ax.set_ylabel('çåº¦')
            ax.set_title(f"ä» {start_time.strftime('%Y-%m-%d %H:%M:%S')} å¼å§ççåº¦é¢æµ")
            ax.legend(loc='upper left')
            
            # è°æ´å¸å±ï¼é²æ¢æ ç¾è¢«é®æ¡
            fig.tight_layout()
            
            # æ´æ°ç»å¸æ¾ç¤º
            canvas.draw()
            
            # è®¡ç®é¢æµæ»èæ¶
            predict_time = time() - predict_start_time
            
            # æ´æ°ç¶æ
            status_label.config(text=f"é¢æµå®æ (èæ¶: {predict_time:.2f}ç§)")
            
            # å±ç¤ºé¢æµç»æ
            result_text = "é¢æµç»æ:\n"
            for i, (date, pred) in enumerate(zip(future_dates, predictions)):
                result_text += f"ç¬¬ {i+1} å¤© ({date.strftime('%Y-%m-%d')}): {pred:.2f}\n"
                
                # å¦ææçå®å¼ï¼æ·»å å°ç»æä¸
                if not actual_data.empty and i < len(actual_values):
                    result_text += f"   å®éçåº¦: {actual_values[i]:.2f}\n"
                    result_text += f"   è¯¯å·®: {abs(actual_values[i] - pred):.2f}\n"
            
            result_label.config(text=result_text)
            
        except Exception as e:
            status_label.config(text=f"éè¯¯: {str(e)}")

    def on_scroll(event):
        # è·åå½ååæ è½´çèå´
        xlim = ax.get_xlim()
        ylim = ax.get_ylim()
        
        # è®¾ç½®æ»è½®ç¼©æ¾çå¢é
        zoom_factor = 1.1
        
        # ç¡®å®é¼ æ ä½ç½®å°è½´çç¸å¯¹ä½ç½®
        x_data = event.xdata
        y_data = event.ydata
        
        # å¦æé¼ æ ä¸å¨åæ è½´åï¼åä½¿ç¨è½´ä¸å¿
        if x_data is None:
            x_data = (xlim[0] + xlim[1]) / 2
        if y_data is None:
            y_data = (ylim[0] + ylim[1]) / 2
            
        # è®¡ç®ç¸å¯¹ä½ç½®
        x_rel = (x_data - xlim[0]) / (xlim[1] - xlim[0])
        y_rel = (y_data - ylim[0]) / (ylim[1] - ylim[0])
        
        # æ£æ¥æ»è½®çæ»å¨æ¹å - ä½¿ç¨event.stepæ¿ä»£event.buttonï¼æ´åç¡®
        # åä¸æ»å¨(æ¾å¤§)ä¸ºæ£å¼ï¼åä¸æ»å¨(ç¼©å°)ä¸ºè´å¼
        if event.step > 0:  # åä¸æ»å¨ = æ¾å¤§
            # è®¡ç®æ°çåºé´
            new_width = (xlim[1] - xlim[0]) / zoom_factor
            new_height = (ylim[1] - ylim[0]) / zoom_factor
            
            # è®¡ç®æ°çåºé´è¾¹çï¼ä¿æé¼ æ ä½ç½®ç¸å¯¹ä¸å
            x0 = x_data - x_rel * new_width
            y0 = y_data - y_rel * new_height
            x1 = x0 + new_width
            y1 = y0 + new_height
            
            ax.set_xlim([x0, x1])
            ax.set_ylim([y0, y1])
        else:  # åä¸æ»å¨ = ç¼©å°
            # è®¡ç®æ°çåºé´
            new_width = (xlim[1] - xlim[0]) * zoom_factor
            new_height = (ylim[1] - ylim[0]) * zoom_factor
            
            # è®¡ç®æ°çåºé´è¾¹çï¼ä¿æé¼ æ ä½ç½®ç¸å¯¹ä¸å
            x0 = x_data - x_rel * new_width
            y0 = y_data - y_rel * new_height
            x1 = x0 + new_width
            y1 = y0 + new_height
            
            ax.set_xlim([x0, x1])
            ax.set_ylim([y0, y1])
        
        # æ´æ°ç»å¸æ¾ç¤º
        canvas.draw_idle()
    
    # å®ä¹é¼ æ æå¨åè½
    def on_mouse_press(event):
        if event.button == 1:  # å·¦é®
            canvas.mpl_disconnect(hover_cid)
            canvas._pan_start = (event.x, event.y)
            canvas._xlim = ax.get_xlim()
            canvas._ylim = ax.get_ylim()
            canvas.mpl_connect('motion_notify_event', on_mouse_move)
    
    def on_mouse_release(event):
        if event.button == 1:  # å·¦é®
            canvas.mpl_disconnect(move_cid[0])
            global hover_cid
            hover_cid = canvas.mpl_connect('motion_notify_event', update_cursor)
    
    def on_mouse_move(event):
        if event.button == 1 and hasattr(canvas, '_pan_start'):
            dx = event.x - canvas._pan_start[0]
            dy = event.y - canvas._pan_start[1]
            
            # è½¬æ¢åç´ ç§»å¨å°æ°æ®åæ ç§»å¨
            x_span = canvas._xlim[1] - canvas._xlim[0]
            y_span = canvas._ylim[1] - canvas._ylim[0]
            
            # è½¬æ¢å å(ç¨äºå°åç´ ç§»å¨è½¬æ¢ä¸ºæ°æ®èå´ç§»å¨)
            width, height = canvas.get_width_height()
            x_scale = x_span / width
            y_scale = y_span / height
            
            # è®¡ç®æ°çéå¶
            xlim = [canvas._xlim[0] - dx * x_scale, 
                    canvas._xlim[1] - dx * x_scale]
            ylim = [canvas._ylim[0] + dy * y_scale, 
                    canvas._ylim[1] + dy * y_scale]
            
            # è®¾ç½®æ°çéå¶
            ax.set_xlim(xlim)
            ax.set_ylim(ylim)
            canvas.draw_idle()
    
    # æ´æ°é¼ æ æéæ ·å¼
    def update_cursor(event):
        if event.inaxes == ax:
            canvas.get_tk_widget().config(cursor="fleur")  # æå½¢åæ è¡¨ç¤ºå¯æå¨
        else:
            canvas.get_tk_widget().config(cursor="")
    
    # éç½®è§å¾
    def reset_view():
        display_history()
        status_label.config(text="å¾è¡¨è§å¾å·²éç½®")

    root = tk.Tk()
    root.title("éé¾æ¸¯-éè¡çåº¦é¢æµç³»ç»")  # ä¿®æ¹ä¸ºä¸ææ é¢
    
    # å°è¯éç½®ä¸æåä½
    try:
        import tkinter.font as tkfont
        configure_gui_fonts()
    except:
        print("æ æ³éç½®GUIåä½ï¼å¯è½å½±åä¸ææ¾ç¤º")
        
    # åå»ºæ¡æ¶
    input_frame = ttk.Frame(root, padding="10")
    input_frame.pack(fill=tk.X)
    
    control_frame = ttk.Frame(root, padding="5")
    control_frame.pack(fill=tk.X)
    
    result_frame = ttk.Frame(root, padding="10")
    result_frame.pack(fill=tk.BOTH, expand=True)

    # è¾å¥æ¡åé¢æµæé®
    ttk.Label(input_frame, text="è¾å¥å¼å§æ¶é´ (YYYY-MM-DD HH:MM:SS)").pack(side=tk.LEFT)
    entry = ttk.Entry(input_frame, width=25)
    entry.pack(side=tk.LEFT, padx=5)
    predict_button = ttk.Button(input_frame, text="é¢æµ", command=on_predict)
    predict_button.pack(side=tk.LEFT)
    
    # ç¶ææ ç¾
    status_label = ttk.Label(input_frame, text="æç¤º: ç¬¬ä¸æ¬¡è¿è¡è¯·å¾é'å¼ºå¶éæ°è®ç»æ¨¡å'")
    status_label.pack(side=tk.LEFT, padx=10)
    
    # æ§å¶éé¡¹
    retrain_var = tk.BooleanVar(value=False)
    ttk.Checkbutton(control_frame, text="å¼ºå¶éæ°è®ç»æ¨¡å", variable=retrain_var).pack(side=tk.LEFT)
    
    # æ·»å å¾ä¾è¯´æ
    legend_label = ttk.Label(control_frame, text="å¾ä¾: ç´«è²=éé¾æ¸¯ä¸æ¸¸æ°æ®, èè²=ä¸åæ°´ä¸æ¸¸æ°æ®, çº¢è²=é¢æµå¼, ç»¿è²=å®éå¼")
    legend_label.pack(side=tk.LEFT, padx=10)
    
    # ç»ææ ç¾
    result_label = ttk.Label(result_frame, text="", justify=tk.LEFT)
    result_label.pack(side=tk.RIGHT, fill=tk.Y)

    # ç»å¾åºå - è®¾ç½®dpiæé«æ¸æ°åº¦
    fig, ax = plt.subplots(figsize=(10, 5), dpi=100)
    
    # åå»ºç»å¸å¹¶æ·»å å·¥å·æ 
    canvas = FigureCanvasTkAgg(fig, master=result_frame)
    canvas.get_tk_widget().pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
    
    # æ·»å å·¥å·æ 
    toolbar_frame = ttk.Frame(result_frame)
    toolbar_frame.pack(side=tk.BOTTOM, fill=tk.X)
    toolbar = NavigationToolbar2Tk(canvas, toolbar_frame)
    toolbar.update()
    
    # æ·»å èªå®ä¹éç½®æé®
    reset_button = ttk.Button(control_frame, text="éç½®è§å¾", command=reset_view)
    reset_button.pack(side=tk.LEFT, padx=5)
    
    # è¿æ¥é¼ æ äºä»¶
    canvas.mpl_connect('button_press_event', on_mouse_press)
    canvas.mpl_connect('button_release_event', on_mouse_release)
    canvas.mpl_connect('scroll_event', on_scroll)
    
    # å¨å±åéï¼ç¨äºåå¨äºä»¶è¿æ¥ID
    move_cid = [None]
    hover_cid = canvas.mpl_connect('motion_notify_event', update_cursor)
    
    # é»è®¤å è½½åå²æ°æ®
    def display_history():
        # æ¸ç©ºä¹åçå¾å½¢
        ax.clear()
        
        # ç¡®ä¿æ¾ç¤ºå¨é¨åå²æ°æ®ä½ä¸è¶è¿60å¤©
        end_date = df['DateTime'].max()
        start_date = max(df['DateTime'].min(), end_date - timedelta(days=60))
        display_data = df[(df['DateTime'] >= start_date) & (df['DateTime'] <= end_date)]
        
        # ç»å¶ä¸åæ°´(ä¸æ¸¸)åå²æ°æ®
        ax.plot(display_data['DateTime'], display_data['downstream'], 
                label='ä¸åæ°´(ä¸æ¸¸)çåº¦', color='blue', linewidth=1.5)
        
        # ç»å¶éé¾æ¸¯(ä¸æ¸¸)çåå²æ°æ® - ä½¿ç¨å¹³æ»åçæ°æ®
        ax.plot(display_data['DateTime'], display_data['upstream_smooth'], 
                label='éé¾æ¸¯(ä¸æ¸¸)çåº¦', color='purple', linewidth=1.5, alpha=0.7)
        
        # # ç»å¶éé¾æ¸¯åå²æ°æ® - ä½¿ç¨å¹³æ»åçæ°æ®
        # if 'qinglong_lake_smooth' in display_data.columns:
        #     ax.plot(display_data['DateTime'], display_data['qinglong_lake_smooth'], 
        #             label='éé¾æ¹çåº¦', color='green', linewidth=1.5, alpha=0.7)
        
        # è®¾ç½®æ é¢ãæ ç¾åå¾ä¾
        ax.set_xlabel('æ¥æ')
        ax.set_ylabel('çåº¦')
        ax.set_title('åå²çåº¦æ°æ®å¯¹æ¯')
        ax.legend()
        
        # è°æ´å¸å±ï¼é²æ¢æ ç¾è¢«é®æ¡
        fig.tight_layout()
        
        # æ´æ°ç»å¸æ¾ç¤º
        canvas.draw()

    # é»è®¤å è½½åå²æ°æ®
    display_history()

    # å¯å¨GUI
    root.mainloop()

# è¿è¡
df = load_data('éé¾æ¸¯1.csv', 'ä¸åæ°´.csv', )

# å¦ææ°æ®å è½½æåï¼åè¿è¡GUIçé¢
if df is not None:
    run_gui()
else:
    print("æ°æ®å è½½å¤±è´¥ï¼æ æ³è¿è¡é¢æµã")