¶Ô±ÈÐÂÎļþ |
| | |
| | | import os |
| | | import pickle |
| | | import pandas as pd |
| | | import numpy as np |
| | | import tkinter as tk |
| | | import tkinter.font as tkfont |
| | | from tkinter import ttk |
| | | from datetime import timedelta |
| | | from time import time |
| | | import matplotlib.pyplot as plt |
| | | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk |
| | | from xgboost import XGBRegressor |
| | | from lunardate import LunarDate |
| | | from sklearn.model_selection import train_test_split |
| | | from sklearn.metrics import mean_squared_error, mean_absolute_error |
| | | import matplotlib |
| | | |
| | | # é
ç½® matplotlib 䏿æ¾ç¤º |
| | | matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'SimSun', 'Arial Unicode MS'] |
| | | matplotlib.rcParams['axes.unicode_minus'] = False |
| | | matplotlib.rcParams['font.family'] = 'sans-serif' |
| | | |
| | | # å
¨å±ç¼ååéåç¹å¾åç§°ï¼æ¤å¤ feature_columns ä»
为å ä½ï¼ |
| | | cached_model = None |
| | | last_training_time = None |
| | | feature_columns = None |
| | | |
| | | # ------------------------------- |
| | | # æ°æ®å è½½ä¸é¢å¤ç彿° |
| | | # ------------------------------- |
| | | def load_data(upstream_file, downstream_file, qinglong_lake_file=None): |
| | | try: |
| | | upstream_df = pd.read_csv(upstream_file) |
| | | downstream_df = pd.read_csv(downstream_file) |
| | | if qinglong_lake_file: |
| | | qinglong_lake_df = pd.read_csv(qinglong_lake_file) |
| | | except FileNotFoundError: |
| | | print("æä»¶æªæ¾å°ï¼è¯·æ£æ¥è·¯å¾") |
| | | return None |
| | | |
| | | # å设åå§æ°æ®å便¬¡ä¸º ['DateTime', 'TagName', 'Value'] |
| | | upstream_df.columns = ['DateTime', 'TagName', 'Value'] |
| | | downstream_df.columns = ['DateTime', 'TagName', 'Value'] |
| | | if qinglong_lake_file: |
| | | qinglong_lake_df.columns = ['DateTime', 'TagName', 'Value'] |
| | | |
| | | # è½¬æ¢æ¶é´æ ¼å¼åæ°å¼å¤ç |
| | | upstream_df['DateTime'] = pd.to_datetime(upstream_df['DateTime']) |
| | | downstream_df['DateTime'] = pd.to_datetime(downstream_df['DateTime']) |
| | | if qinglong_lake_file: |
| | | qinglong_lake_df['DateTime'] = pd.to_datetime(qinglong_lake_df['DateTime']) |
| | | upstream_df['Value'] = pd.to_numeric(upstream_df['Value'], errors='coerce') |
| | | downstream_df['Value'] = pd.to_numeric(downstream_df['Value'], errors='coerce') |
| | | if qinglong_lake_file: |
| | | qinglong_lake_df['Value'] = pd.to_numeric(qinglong_lake_df['Value'], errors='coerce') |
| | | |
| | | # è¿æ»¤ç度å°äº5çæ°æ® |
| | | upstream_df = upstream_df[upstream_df['Value'] >= 5] |
| | | downstream_df = downstream_df[downstream_df['Value'] >= 5] |
| | | if qinglong_lake_file: |
| | | qinglong_lake_df = qinglong_lake_df[qinglong_lake_df['Value'] >= 5] |
| | | |
| | | # å°0æ¿æ¢ä¸ºNaNï¼å¹¶å©ç¨3åæ åå·®æ³å¤çå¼å¸¸å¼ |
| | | for df in [upstream_df, downstream_df]: |
| | | df.loc[df['Value'] == 0, 'Value'] = np.nan |
| | | mean_val, std_val = df['Value'].mean(), df['Value'].std() |
| | | lower_bound, upper_bound = mean_val - 3 * std_val, mean_val + 3 * std_val |
| | | df.loc[(df['Value'] < lower_bound) | (df['Value'] > upper_bound), 'Value'] = np.nan |
| | | if qinglong_lake_file: |
| | | qinglong_lake_df.loc[qinglong_lake_df['Value'] == 0, 'Value'] = np.nan |
| | | mean_val, std_val = qinglong_lake_df['Value'].mean(), qinglong_lake_df['Value'].std() |
| | | lower_bound, upper_bound = mean_val - 3 * std_val, mean_val + 3 * std_val |
| | | qinglong_lake_df.loc[(qinglong_lake_df['Value'] < lower_bound) | (qinglong_lake_df['Value'] > upper_bound), 'Value'] = np.nan |
| | | |
| | | # éå½å Value åå¹¶ä¿çéè¦çå |
| | | upstream_df = upstream_df.rename(columns={'Value': 'upstream'})[['DateTime', 'upstream']] |
| | | downstream_df = downstream_df.rename(columns={'Value': 'downstream'})[['DateTime', 'downstream']] |
| | | if qinglong_lake_file: |
| | | qinglong_lake_df = qinglong_lake_df.rename(columns={'Value': 'qinglong_lake'})[['DateTime', 'qinglong_lake']] |
| | | |
| | | # åå¹¶æ°æ® |
| | | merged_df = pd.merge(upstream_df, downstream_df, on='DateTime', how='inner') |
| | | if qinglong_lake_file: |
| | | merged_df = pd.merge(merged_df, qinglong_lake_df, on='DateTime', how='left') |
| | | |
| | | print(f"åå¹¶åæ°æ®è¡æ°: {len(merged_df)}") |
| | | merged_df = merged_df.set_index('DateTime') |
| | | |
| | | # æå¼ï¼å
ç¨çº¿æ§ï¼åç¨æ¶é´æå¼ï¼æåç¨ååååå¡«å
|
| | | merged_df['upstream'] = merged_df['upstream'].interpolate(method='linear', limit=4) |
| | | merged_df['downstream'] = merged_df['downstream'].interpolate(method='linear', limit=4) |
| | | if qinglong_lake_file: |
| | | merged_df['qinglong_lake'] = merged_df['qinglong_lake'].interpolate(method='linear', limit=4) |
| | | merged_df['upstream'] = merged_df['upstream'].interpolate(method='time', limit=24) |
| | | merged_df['downstream'] = merged_df['downstream'].interpolate(method='time', limit=24) |
| | | if qinglong_lake_file: |
| | | merged_df['qinglong_lake'] = merged_df['qinglong_lake'].interpolate(method='time', limit=24) |
| | | merged_df['upstream'] = merged_df['upstream'].fillna(method='ffill').fillna(method='bfill') |
| | | merged_df['downstream'] = merged_df['downstream'].fillna(method='ffill').fillna(method='bfill') |
| | | if qinglong_lake_file: |
| | | merged_df['qinglong_lake'] = merged_df['qinglong_lake'].fillna(method='ffill').fillna(method='bfill') |
| | | |
| | | # å¹³æ»å¤çï¼ä½¿ç¨æ»å¨çªå£ç§»å¨å¹³å |
| | | merged_df['upstream_smooth'] = merged_df['upstream'].rolling(window=24, min_periods=1, center=True).mean() |
| | | merged_df['downstream_smooth'] = merged_df['downstream'].rolling(window=24, min_periods=1, center=True).mean() |
| | | if qinglong_lake_file: |
| | | merged_df['qinglong_lake_smooth'] = merged_df['qinglong_lake'].rolling(window=24, min_periods=1, center=True).mean() |
| | | # 对ä½ç度é¨åç¨æ´å¤§çªå£å¹³æ» |
| | | low_sal_mask = merged_df['upstream'] < 50 |
| | | if low_sal_mask.any(): |
| | | merged_df.loc[low_sal_mask, 'upstream_smooth'] = merged_df.loc[low_sal_mask, 'upstream']\ |
| | | .rolling(window=48, min_periods=1, center=True).mean() |
| | | |
| | | merged_df = merged_df.dropna() |
| | | merged_df = merged_df[merged_df['upstream'].apply(np.isfinite)] |
| | | merged_df = merged_df[merged_df['downstream'].apply(np.isfinite)] |
| | | if qinglong_lake_file: |
| | | merged_df = merged_df[merged_df['qinglong_lake'].apply(np.isfinite)] |
| | | merged_df = merged_df.reset_index() |
| | | print(f"æ¸
æ´åæ°æ®è¡æ°: {len(merged_df)}") |
| | | print(f"䏿¸¸ç度èå´: {merged_df['upstream'].min()} - {merged_df['upstream'].max()}") |
| | | print(f"䏿¸¸ç度èå´: {merged_df['downstream'].min()} - {merged_df['downstream'].max()}") |
| | | if qinglong_lake_file: |
| | | print(f"é龿¹ç度èå´: {merged_df['qinglong_lake'].min()} - {merged_df['qinglong_lake'].max()}") |
| | | merged_df = merged_df.sort_values('DateTime') |
| | | return merged_df |
| | | |
| | | # ------------------------------- |
| | | # æ·»å ååï¼æ½®æ±ï¼ç¹å¾ |
| | | # ------------------------------- |
| | | def add_lunar_features(df): |
| | | lunar_day, lunar_phase_sin, lunar_phase_cos, is_high_tide = [], [], [], [] |
| | | for dt in df['DateTime']: |
| | | ld = LunarDate.fromSolarDate(dt.year, dt.month, dt.day) |
| | | lunar_day.append(ld.day) |
| | | lunar_phase_sin.append(np.sin(2 * np.pi * ld.day / 15)) |
| | | lunar_phase_cos.append(np.cos(2 * np.pi * ld.day / 15)) |
| | | is_high_tide.append(1 if (ld.day <= 5 or (ld.day >= 16 and ld.day <= 20)) else 0) |
| | | df['lunar_day'] = lunar_day |
| | | df['lunar_phase_sin'] = lunar_phase_sin |
| | | df['lunar_phase_cos'] = lunar_phase_cos |
| | | df['is_high_tide'] = is_high_tide |
| | | return df |
| | | |
| | | # ------------------------------- |
| | | # æ¹éçæå»¶è¿ç¹å¾ï¼åéåï¼å©ç¨ shiftï¼ |
| | | # ------------------------------- |
| | | def batch_create_delay_features(df, delay_hours): |
| | | for delay in delay_hours: |
| | | df[f'upstream_delay_{delay}h'] = df['upstream'].shift(delay) |
| | | df[f'downstream_delay_{delay}h'] = df['downstream'].shift(delay) |
| | | return df |
| | | |
| | | # ------------------------------- |
| | | # åéåæé è®ç»æ ·æ¬ï¼ä¼åç¹å¾å·¥ç¨ï¼ |
| | | # ------------------------------- |
| | | def create_features_vectorized(df, look_back=96, forecast_horizon=5): |
| | | """ |
| | | å©ç¨ numpy ç sliding_window_view 对åå²çªå£ã䏿¸¸çªå£ãæ ç¾è¿è¡æ¹éåçï¼ |
| | | å
¶ä»ç¹å¾ï¼æ¶é´ãååãç»è®¡ãå»¶è¿ç¹å¾ï¼ç´æ¥æ¹é读ååæ¼æ¥ |
| | | """ |
| | | # è¿éå®ä¹ total_samples ä¸ºï¼ |
| | | total_samples = len(df) - look_back - forecast_horizon + 1 |
| | | if total_samples <= 0: |
| | | print("æ°æ®ä¸è¶³ä»¥å建ç¹å¾") |
| | | return np.array([]), np.array([]) |
| | | |
| | | # å©ç¨ sliding_window_view æé åå²çªå£ï¼ä¸æ¸¸è¿ç» look_back ä¸ªæ°æ®ï¼ |
| | | upstream_array = df['upstream'].values # shape (n,) |
| | | # æ»å¨çªå£ï¼ç»æ shape (n - look_back + 1, look_back) |
| | | from numpy.lib.stride_tricks import sliding_window_view |
| | | window_up = sliding_window_view(upstream_array, window_shape=look_back)[:total_samples, :] |
| | | |
| | | # 䏿¸¸æè¿ 24 å°æ¶ï¼å©ç¨æ»å¨çªå£æé ï¼çªå£å¤§å°ä¸º 24 |
| | | downstream_array = df['downstream'].values |
| | | window_down_full = sliding_window_view(downstream_array, window_shape=24) |
| | | # å¯¹äºæ ç¾å䏿¸¸çªå£ï¼åé»è¾ï¼å df['downstream'].iloc[i+look_back-24:i+look_back] |
| | | # å对åºç´¢å¼ä¸º i+look_back-24ï¼ i ä» 0 å° total_samples-1 |
| | | window_down = window_down_full[look_back-24 : look_back-24 + total_samples, :] |
| | | |
| | | # æ¶é´ç¹å¾ä¸ååç¹å¾çï¼åæ ·åºé´ä¸º df.iloc[look_back: len(df)-forecast_horizon+1] |
| | | sample_df = df.iloc[look_back: len(df)-forecast_horizon+1].copy() |
| | | basic_time = sample_df['DateTime'].dt.hour.values.reshape(-1, 1) / 24.0 |
| | | weekday = sample_df['DateTime'].dt.dayofweek.values.reshape(-1, 1) / 7.0 |
| | | month = sample_df['DateTime'].dt.month.values.reshape(-1, 1) / 12.0 |
| | | basic_time_feats = np.hstack([basic_time, weekday, month]) |
| | | |
| | | lunar_feats = sample_df[['lunar_phase_sin','lunar_phase_cos','is_high_tide']].values |
| | | # ç»è®¡ç¹å¾ï¼é¢å
å©ç¨ rolling 已计ç®å¥½ï¼æ³¨æååºå¯¹åºè¡ï¼ |
| | | try: |
| | | stats_up = sample_df[['mean_1d_up','mean_3d_up','std_1d_up','max_1d_up','min_1d_up']].values |
| | | stats_down = sample_df[['mean_1d_down','mean_3d_down','std_1d_down','max_1d_down','min_1d_down']].values |
| | | except KeyError as e: |
| | | print(f"ç»è®¡ç¹å¾åä¸åå¨: {e}ï¼è¯·ç¡®ä¿å
计ç®ç»è®¡ç¹å¾") |
| | | return np.array([]), np.array([]) |
| | | |
| | | # å»¶è¿ç¹å¾ï¼å设ææå»¶è¿ç¹å¾ååå以 "upstream_delay_" æ "downstream_delay_" å¼å¤´ |
| | | delay_cols = [col for col in sample_df.columns if col.startswith('upstream_delay_') or col.startswith('downstream_delay_')] |
| | | delay_feats = sample_df[delay_cols].values |
| | | |
| | | # æ¼æ¥ææç¹å¾ï¼å
å°åå²çªå£ï¼window_upï¼ä¸ä¸æ¸¸çªå£ï¼window_downï¼æ¼æ¥ï¼åæ¼æ¥å
¶ä»ç¹å¾ |
| | | X = np.hstack([window_up, window_down, basic_time_feats, lunar_feats, stats_up, stats_down, delay_feats]) |
| | | |
| | | # æé æ ç¾ï¼å©ç¨æ»å¨çªå£æé forecast_horizon å
ç䏿¸¸æ°æ® |
| | | label_full = sliding_window_view(downstream_array, window_shape=forecast_horizon) |
| | | # æ ç¾åºé´å¯¹åºä» index = look_back å° look_back + total_samples |
| | | y = label_full[look_back: look_back + total_samples, :] |
| | | global feature_columns |
| | | feature_columns = ["combined_vector_features"] |
| | | print(f"åéåç¹å¾å·¥ç¨å®æï¼æææ ·æ¬æ°: {X.shape[0]}") |
| | | return X, y |
| | | |
| | | # ------------------------------- |
| | | # è·å模ååç¡®åº¦ææ |
| | | # ------------------------------- |
| | | def get_model_metrics(): |
| | | """è·åä¿å卿¨¡åç¼åä¸çåç¡®åº¦ææ """ |
| | | model_cache_file = 'salinity_model.pkl' |
| | | if os.path.exists(model_cache_file): |
| | | try: |
| | | with open(model_cache_file, 'rb') as f: |
| | | model_data = pickle.load(f) |
| | | return { |
| | | 'rmse': model_data.get('rmse', None), |
| | | 'mae': model_data.get('mae', None) |
| | | } |
| | | except Exception as e: |
| | | print(f"è·åæ¨¡åææ 失败: {e}") |
| | | return None |
| | | |
| | | # ------------------------------- |
| | | # 模åè®ç»ä¸é¢æµï¼å±ç¤ºéªè¯å确度ï¼RMSE, MAEï¼ |
| | | # ------------------------------- |
| | | def train_and_predict(df, start_time, force_retrain=False): |
| | | global cached_model, last_training_time |
| | | model_cache_file = 'salinity_model.pkl' |
| | | model_needs_training = True |
| | | |
| | | if os.path.exists(model_cache_file) and force_retrain: |
| | | try: |
| | | os.remove(model_cache_file) |
| | | print("å·²å 餿§æ¨¡åç¼åï¼å¼ºå¶éæ°è®ç»ï¼") |
| | | except Exception as e: |
| | | print("å é¤ç¼åå¼å¸¸:", e) |
| | | |
| | | train_df = df[df['DateTime'] < start_time].copy() |
| | | if not force_retrain and cached_model is not None and last_training_time is not None: |
| | | if last_training_time >= train_df['DateTime'].max(): |
| | | model_needs_training = False |
| | | print(f"使ç¨ç¼å模åï¼è®ç»æ¶é´: {last_training_time}") |
| | | elif not force_retrain and os.path.exists(model_cache_file): |
| | | try: |
| | | with open(model_cache_file, 'rb') as f: |
| | | model_data = pickle.load(f) |
| | | cached_model = model_data['model'] |
| | | last_training_time = model_data['training_time'] |
| | | if last_training_time >= train_df['DateTime'].max(): |
| | | model_needs_training = False |
| | | print(f"仿件å 载模åï¼è®ç»æ¶é´: {last_training_time}") |
| | | except Exception as e: |
| | | print("å 载模å失败:", e) |
| | | |
| | | if model_needs_training: |
| | | print("å¼å§è®ç»æ°æ¨¡å...") |
| | | if len(train_df) < 100: |
| | | print("è®ç»æ°æ®ä¸è¶³") |
| | | return None, None, None, None |
| | | |
| | | start_train = time() |
| | | X, y = create_features_vectorized(train_df, look_back=96, forecast_horizon=5) |
| | | if len(X) == 0 or len(y) == 0: |
| | | print("æ ·æ¬çæä¸è¶³ï¼è®ç»ç»æ¢") |
| | | return None, None, None, None |
| | | print(f"è®ç»æ ·æ¬æ°é: {X.shape[0]}") |
| | | X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42) |
| | | model = XGBRegressor( |
| | | n_estimators=300, |
| | | learning_rate=0.03, |
| | | max_depth=5, |
| | | min_child_weight=2, |
| | | subsample=0.85, |
| | | colsample_bytree=0.85, |
| | | gamma=0.1, |
| | | reg_alpha=0.2, |
| | | reg_lambda=1.5, |
| | | n_jobs=-1, |
| | | random_state=42 |
| | | ) |
| | | try: |
| | | model.fit(X_train, y_train, |
| | | eval_set=[(X_val, y_val)], eval_metric='rmse', |
| | | early_stopping_rounds=20, verbose=False) |
| | | # å¨éªè¯éä¸è®¡ç® RMSE å MAE |
| | | y_val_pred = model.predict(X_val) |
| | | rmse = np.sqrt(mean_squared_error(y_val, y_val_pred)) |
| | | mae = mean_absolute_error(y_val, y_val_pred) |
| | | print(f"éªè¯é RMSE: {rmse:.4f}, MAE: {mae:.4f}") |
| | | last_training_time = start_time |
| | | cached_model = model |
| | | with open(model_cache_file, 'wb') as f: |
| | | pickle.dump({ |
| | | 'model': model, |
| | | 'training_time': last_training_time, |
| | | 'feature_columns': feature_columns, |
| | | 'rmse': rmse, |
| | | 'mae': mae |
| | | }, f) |
| | | print(f"模åè®ç»å®æï¼èæ¶: {time() - start_train:.2f}ç§") |
| | | except Exception as e: |
| | | print("模åè®ç»å¼å¸¸:", e) |
| | | return None, None, None, None |
| | | else: |
| | | model = cached_model |
| | | |
| | | # 颿µé¨åï¼æé åä¸ªé¢æµæ ·æ¬ï¼ä¸è®ç»æ¶ç¹å¾æé ä¸è´ï¼ |
| | | try: |
| | | # è¿ééç¨ä¸ create_features_vectorized 类似çæè·¯æé 颿µæ ·æ¬ |
| | | # åæè¿æ°æ®è¶³å¤ææåå²çªå£åå
¶ä»ç¹å¾ |
| | | n = len(df) |
| | | if n < 96 + 5: |
| | | print("颿µæ°æ®ä¸è¶³") |
| | | return None, None, None, None |
| | | |
| | | # ä½¿ç¨ sliding_window_view æé ææ°ç䏿¸¸å䏿¸¸çªå£ |
| | | upstream_array = df['upstream'].values |
| | | window_up = np.lib.stride_tricks.sliding_window_view(upstream_array, window_shape=96)[-1, :] |
| | | downstream_array = df['downstream'].values |
| | | window_down = np.lib.stride_tricks.sliding_window_view(downstream_array, window_shape=24)[-1, :] |
| | | |
| | | # æ¶é´ç¹å¾åååç¹å¾åºäºå½å颿µå¼å§æ¶å» |
| | | hour_norm = start_time.hour / 24.0 |
| | | weekday_norm = start_time.dayofweek / 7.0 |
| | | month_norm = start_time.month / 12.0 |
| | | basic_time_feats = np.array([hour_norm, weekday_norm, month_norm]).reshape(1, -1) |
| | | ld = LunarDate.fromSolarDate(start_time.year, start_time.month, start_time.day) |
| | | lunar_feats = np.array([np.sin(2*np.pi*ld.day/15), |
| | | np.cos(2*np.pi*ld.day/15), |
| | | 1 if (ld.day <=5 or (ld.day >=16 and ld.day<=20)) else 0]).reshape(1, -1) |
| | | |
| | | # ç»è®¡ç¹å¾ï¼ç¨ææ° 24/72 å°æ¶æ°æ®ï¼åæ«å°¾24/72ï¼ |
| | | try: |
| | | # ä¼å
使ç¨DataFrameä¸å·²è®¡ç®çç»è®¡ç¹å¾ |
| | | stats_up = df[['mean_1d_up','mean_3d_up','std_1d_up','max_1d_up','min_1d_up']].iloc[-1:].values |
| | | stats_down = df[['mean_1d_down','mean_3d_down','std_1d_down','max_1d_down','min_1d_down']].iloc[-1:].values |
| | | except KeyError: |
| | | # 妿ä¸åå¨ï¼åç´æ¥è®¡ç® |
| | | recent_up = df['upstream'].values[-24:] |
| | | stats_up = np.array([np.mean(recent_up), |
| | | np.mean(df['upstream'].values[-72:]), |
| | | np.std(recent_up), |
| | | np.max(recent_up), |
| | | np.min(recent_up)]).reshape(1, -1) |
| | | recent_down = df['downstream'].values[-24:] |
| | | stats_down = np.array([np.mean(recent_down), |
| | | np.mean(df['downstream'].values[-72:]), |
| | | np.std(recent_down), |
| | | np.max(recent_down), |
| | | np.min(recent_down)]).reshape(1, -1) |
| | | |
| | | # å»¶è¿ç¹å¾ï¼ç´æ¥ä»æåä¸è¡å»¶è¿ç¹å¾åå¼ |
| | | delay_cols = [col for col in df.columns if col.startswith('upstream_delay_') or col.startswith('downstream_delay_')] |
| | | delay_feats = df[delay_cols].iloc[-1:].values # shape (1, ?) |
| | | |
| | | # æ¼æ¥ææé¢æµç¹å¾ |
| | | X_pred = np.hstack([window_up.reshape(1, -1), |
| | | window_down.reshape(1, -1), |
| | | basic_time_feats, lunar_feats, stats_up, stats_down, delay_feats]) |
| | | if np.isnan(X_pred).any() or np.isinf(X_pred).any(): |
| | | X_pred = np.nan_to_num(X_pred, nan=0.0, posinf=1e6, neginf=-1e6) |
| | | predictions = model.predict(X_pred) |
| | | # çææªæ¥æ¥ææ ç¾ï¼é¢æµæªæ¥ 5 å¤©ï¼ |
| | | future_dates = [start_time + timedelta(days=i) for i in range(5)] |
| | | print("颿µå®æ") |
| | | |
| | | # è·åæ¨¡åææ |
| | | metrics = None |
| | | if os.path.exists(model_cache_file): |
| | | try: |
| | | with open(model_cache_file, 'rb') as f: |
| | | model_data = pickle.load(f) |
| | | metrics = { |
| | | 'rmse': model_data.get('rmse', None), |
| | | 'mae': model_data.get('mae', None) |
| | | } |
| | | except Exception as e: |
| | | print(f"è·åæ¨¡åææ 失败: {e}") |
| | | |
| | | return future_dates, predictions.flatten(), model, metrics |
| | | except Exception as e: |
| | | print("颿µè¿ç¨å¼å¸¸:", e) |
| | | return None, None, None, None |
| | | |
| | | # ------------------------------- |
| | | # GUIçé¢é¨å |
| | | # ------------------------------- |
| | | def run_gui(): |
| | | def configure_gui_fonts(): |
| | | font_names = ['微软é
é»', 'Microsoft YaHei', 'SimSun', 'SimHei'] |
| | | for font_name in font_names: |
| | | try: |
| | | default_font = tkfont.nametofont("TkDefaultFont") |
| | | default_font.configure(family=font_name) |
| | | text_font = tkfont.nametofont("TkTextFont") |
| | | text_font.configure(family=font_name) |
| | | fixed_font = tkfont.nametofont("TkFixedFont") |
| | | fixed_font.configure(family=font_name) |
| | | return True |
| | | except Exception as e: |
| | | continue |
| | | return False |
| | | |
| | | def on_predict(): |
| | | try: |
| | | predict_start = time() |
| | | status_label.config(text="颿µä¸...") |
| | | root.update() |
| | | start_time_dt = pd.to_datetime(entry.get()) |
| | | force_retrain = retrain_var.get() |
| | | future_dates, predictions, model, metrics = train_and_predict(df, start_time_dt, force_retrain) |
| | | if future_dates is None or predictions is None: |
| | | status_label.config(text="颿µå¤±è´¥") |
| | | return |
| | | |
| | | # è·åå¹¶æ¾ç¤ºæ¨¡ååç¡®åº¦ææ |
| | | if metrics: |
| | | metrics_text = f"模åå确度 - RMSE: {metrics['rmse']:.4f}, MAE: {metrics['mae']:.4f}" |
| | | metrics_label.config(text=metrics_text) |
| | | |
| | | ax.clear() |
| | | # ç»å¶å岿°æ®ï¼æè¿ 120 å¤©ï¼ |
| | | history_end = min(start_time_dt, df['DateTime'].max()) |
| | | history_start = history_end - timedelta(days=120) |
| | | hist_data = df[(df['DateTime'] >= history_start) & (df['DateTime'] <= history_end)] |
| | | ax.plot(hist_data['DateTime'], hist_data['downstream'], label='ä¸åæ°´(䏿¸¸)ç度', color='blue', linewidth=1.5) |
| | | ax.plot(hist_data['DateTime'], hist_data['upstream_smooth'], label='é龿¸¯(䏿¸¸)ç度', color='purple', linewidth=1.5, alpha=0.7) |
| | | if 'qinglong_lake_smooth' in hist_data.columns: |
| | | ax.plot(hist_data['DateTime'], hist_data['qinglong_lake_smooth'], label='é龿¹ç度', color='green', linewidth=1.5, alpha=0.7) |
| | | ax.plot(future_dates, predictions, marker='o', linestyle='--', label='颿µç度', color='red', linewidth=2) |
| | | actual_data = df[(df['DateTime'] >= start_time_dt) & (df['DateTime'] <= future_dates[-1])] |
| | | if not actual_data.empty: |
| | | ax.plot(actual_data['DateTime'], actual_data['downstream'], marker='s', linestyle='-', label='å®é
ç度', color='orange', linewidth=2) |
| | | std_dev = hist_data['downstream'].std() * 0.5 |
| | | ax.fill_between(future_dates, predictions - std_dev, predictions + std_dev, color='red', alpha=0.2) |
| | | ax.set_xlabel('æ¥æ') |
| | | ax.set_ylabel('ç度') |
| | | ax.set_title(f"ä» {start_time_dt.strftime('%Y-%m-%d %H:%M:%S')} å¼å§ççåº¦é¢æµ") |
| | | ax.legend(loc='upper left') |
| | | fig.tight_layout() |
| | | canvas.draw() |
| | | predict_time = time() - predict_start |
| | | status_label.config(text=f"颿µå®æ (èæ¶: {predict_time:.2f}ç§)") |
| | | result_text = "颿µç»æ:\n" |
| | | for i, (date, pred) in enumerate(zip(future_dates, predictions)): |
| | | result_text += f"第 {i+1} 天 ({date.strftime('%Y-%m-%d')}): {pred:.2f}\n" |
| | | result_label.config(text=result_text) |
| | | except Exception as e: |
| | | status_label.config(text=f"é误: {str(e)}") |
| | | |
| | | def on_scroll(event): |
| | | xlim = ax.get_xlim() |
| | | ylim = ax.get_ylim() |
| | | zoom_factor = 1.1 |
| | | x_data = event.xdata if event.xdata is not None else (xlim[0]+xlim[1])/2 |
| | | y_data = event.ydata if event.ydata is not None else (ylim[0]+ylim[1])/2 |
| | | x_rel = (x_data - xlim[0]) / (xlim[1] - xlim[0]) |
| | | y_rel = (y_data - ylim[0]) / (ylim[1] - ylim[0]) |
| | | if event.step > 0: |
| | | new_width = (xlim[1]-xlim[0]) / zoom_factor |
| | | new_height = (ylim[1]-ylim[0]) / zoom_factor |
| | | x0 = x_data - x_rel * new_width |
| | | y0 = y_data - y_rel * new_height |
| | | ax.set_xlim([x0, x0+new_width]) |
| | | ax.set_ylim([y0, y0+new_height]) |
| | | else: |
| | | new_width = (xlim[1]-xlim[0]) * zoom_factor |
| | | new_height = (ylim[1]-ylim[0]) * zoom_factor |
| | | x0 = x_data - x_rel * new_width |
| | | y0 = y_data - y_rel * new_height |
| | | ax.set_xlim([x0, x0+new_width]) |
| | | ax.set_ylim([y0, y0+new_height]) |
| | | canvas.draw_idle() |
| | | |
| | | def update_cursor(event): |
| | | if event.inaxes == ax: |
| | | canvas.get_tk_widget().config(cursor="fleur") |
| | | else: |
| | | canvas.get_tk_widget().config(cursor="") |
| | | |
| | | def reset_view(): |
| | | display_history() |
| | | status_label.config(text="å¾è¡¨è§å¾å·²éç½®") |
| | | |
| | | root = tk.Tk() |
| | | root.title("é龿¸¯-éè¡çåº¦é¢æµç³»ç»") |
| | | try: |
| | | configure_gui_fonts() |
| | | except Exception as e: |
| | | print("åä½é
ç½®å¼å¸¸:", e) |
| | | input_frame = ttk.Frame(root, padding="10") |
| | | input_frame.pack(fill=tk.X) |
| | | control_frame = ttk.Frame(root, padding="5") |
| | | control_frame.pack(fill=tk.X) |
| | | result_frame = ttk.Frame(root, padding="10") |
| | | result_frame.pack(fill=tk.BOTH, expand=True) |
| | | ttk.Label(input_frame, text="è¾å
¥å¼å§æ¶é´ (YYYY-MM-DD HH:MM:SS)").pack(side=tk.LEFT) |
| | | entry = ttk.Entry(input_frame, width=25) |
| | | entry.pack(side=tk.LEFT, padx=5) |
| | | predict_button = ttk.Button(input_frame, text="颿µ", command=on_predict) |
| | | predict_button.pack(side=tk.LEFT) |
| | | status_label = ttk.Label(input_frame, text="æç¤º: ç¬¬ä¸æ¬¡è¿è¡è¯·å¾é'强å¶éæ°è®ç»æ¨¡å'") |
| | | status_label.pack(side=tk.LEFT, padx=10) |
| | | retrain_var = tk.BooleanVar(value=False) |
| | | ttk.Checkbutton(control_frame, text="强å¶éæ°è®ç»æ¨¡å", variable=retrain_var).pack(side=tk.LEFT) |
| | | legend_label = ttk.Label(control_frame, text="å¾ä¾: ç´«è²=é龿¸¯ä¸æ¸¸æ°æ®, èè²=ä¸åæ°´ä¸æ¸¸æ°æ®, 红è²=颿µå¼, 绿è²=å®é
å¼") |
| | | legend_label.pack(side=tk.LEFT, padx=10) |
| | | reset_button = ttk.Button(control_frame, text="éç½®è§å¾", command=reset_view) |
| | | reset_button.pack(side=tk.LEFT, padx=5) |
| | | |
| | | # æ·»å æ¾ç¤ºæ¨¡ååç¡®åº¦çæ ç¾ |
| | | metrics_frame = ttk.Frame(root, padding="5") |
| | | metrics_frame.pack(fill=tk.X) |
| | | model_metrics = get_model_metrics() |
| | | metrics_text = "模åå确度: æªç¥" if not model_metrics else f"模åå确度 - RMSE: {model_metrics['rmse']:.4f}, MAE: {model_metrics['mae']:.4f}" |
| | | metrics_label = ttk.Label(metrics_frame, text=metrics_text) |
| | | metrics_label.pack(side=tk.LEFT, padx=10) |
| | | |
| | | result_label = ttk.Label(result_frame, text="", justify=tk.LEFT) |
| | | result_label.pack(side=tk.RIGHT, fill=tk.Y) |
| | | fig, ax = plt.subplots(figsize=(10,5), dpi=100) |
| | | canvas = FigureCanvasTkAgg(fig, master=result_frame) |
| | | canvas.get_tk_widget().pack(side=tk.LEFT, fill=tk.BOTH, expand=True) |
| | | toolbar_frame = ttk.Frame(result_frame) |
| | | toolbar_frame.pack(side=tk.BOTTOM, fill=tk.X) |
| | | toolbar = NavigationToolbar2Tk(canvas, toolbar_frame) |
| | | toolbar.update() |
| | | canvas.mpl_connect('scroll_event', on_scroll) |
| | | canvas.mpl_connect('motion_notify_event', update_cursor) |
| | | |
| | | def display_history(): |
| | | ax.clear() |
| | | end_date = df['DateTime'].max() |
| | | start_date = max(df['DateTime'].min(), end_date - timedelta(days=60)) |
| | | hist_data = df[(df['DateTime']>=start_date)&(df['DateTime']<=end_date)] |
| | | ax.plot(hist_data['DateTime'], hist_data['downstream'], label='ä¸åæ°´(䏿¸¸)ç度', color='blue', linewidth=1.5) |
| | | ax.plot(hist_data['DateTime'], hist_data['upstream_smooth'], label='é龿¸¯(䏿¸¸)ç度', color='purple', linewidth=1.5, alpha=0.7) |
| | | ax.set_xlabel('æ¥æ') |
| | | ax.set_ylabel('ç度') |
| | | ax.set_title('åå²çåº¦æ°æ®å¯¹æ¯') |
| | | ax.legend() |
| | | fig.tight_layout() |
| | | canvas.draw() |
| | | |
| | | display_history() |
| | | root.mainloop() |
| | | |
| | | # ------------------------------- |
| | | # 主ç¨åºå
¥å£ï¼å è½½æ°æ®ãæ·»å ç¹å¾ãçæå»¶è¿ç¹å¾åå¯å¨GUI |
| | | # ------------------------------- |
| | | def save_processed_data(df, filename='processed_data.pkl'): |
| | | try: |
| | | df.to_pickle(filename) |
| | | print(f"å·²ä¿åå¤çåçæ°æ®å° {filename}") |
| | | return True |
| | | except Exception as e: |
| | | print(f"ä¿åæ°æ®å¤±è´¥: {e}") |
| | | return False |
| | | |
| | | def load_processed_data(filename='processed_data.pkl'): |
| | | try: |
| | | if os.path.exists(filename): |
| | | df = pd.read_pickle(filename) |
| | | print(f"å·²ä» {filename} å è½½å¤çåçæ°æ®") |
| | | return df |
| | | else: |
| | | print(f"æ¾ä¸å°å¤çåçæ°æ®æä»¶ {filename}") |
| | | return None |
| | | except Exception as e: |
| | | print(f"å è½½æ°æ®å¤±è´¥: {e}") |
| | | return None |
| | | |
| | | # å°è¯å è½½å¤çåçæ°æ®ï¼å¦æä¸åå¨åéæ°å¤ç |
| | | processed_data = load_processed_data() |
| | | if processed_data is not None: |
| | | df = processed_data |
| | | else: |
| | | df = load_data('é龿¸¯1.csv', 'ä¸åæ°´.csv') |
| | | if df is not None: |
| | | df = add_lunar_features(df) |
| | | delay_hours = [1,2,3,4,6,12,24,36,48,60,72,84,96,108,120] |
| | | df = batch_create_delay_features(df, delay_hours) |
| | | |
| | | # æ·»å ç»è®¡ç¹å¾ |
| | | df['mean_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).mean() |
| | | df['mean_3d_up'] = df['upstream'].rolling(window=72, min_periods=1).mean() |
| | | df['std_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).std() |
| | | df['max_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).max() |
| | | df['min_1d_up'] = df['upstream'].rolling(window=24, min_periods=1).min() |
| | | |
| | | df['mean_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).mean() |
| | | df['mean_3d_down'] = df['downstream'].rolling(window=72, min_periods=1).mean() |
| | | df['std_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).std() |
| | | df['max_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).max() |
| | | df['min_1d_down'] = df['downstream'].rolling(window=24, min_periods=1).min() |
| | | |
| | | # ä¿åå¤çåçæ°æ® |
| | | save_processed_data(df) |
| | | |
| | | if df is not None: |
| | | run_gui() |
| | | else: |
| | | print("æ°æ®å è½½å¤±è´¥ï¼æ æ³è¿è¡é¢æµã") |