From 613c118f1fe0c20acda29cdecfe3715aa5847a47 Mon Sep 17 00:00:00 2001 From: rp <rp@outlook.com> Date: 星期二, 15 四月 2025 10:37:57 +0800 Subject: [PATCH] bug修复 --- yd_test.py | 1013 ++++++++++++++++++++++++++++++++++++++++++++++++++++------- 1 files changed, 891 insertions(+), 122 deletions(-) diff --git a/yd_test.py b/yd_test.py index d190435..51a7339 100644 --- a/yd_test.py +++ b/yd_test.py @@ -21,10 +21,11 @@ matplotlib.rcParams['axes.unicode_minus'] = False matplotlib.rcParams['font.family'] = 'sans-serif' -# 鍏ㄥ眬缂撳瓨鍙橀噺鍙婄壒寰佸悕绉帮紙姝ゅ feature_columns 浠呬负鍗犱綅锛� +# 鍏ㄥ眬缂撳瓨鍙橀噺鍙婄壒寰佸悕绉� cached_model = None last_training_time = None feature_columns = None +current_view = {'xlim': None, 'ylim': None} # 鐢ㄤ簬瀛樺偍褰撳墠鍥捐〃瑙嗗浘 # 鏁版嵁鍔犺浇涓庨澶勭悊鍑芥暟 # ------------------------------- @@ -114,6 +115,7 @@ merged_df = merged_df.sort_values('DateTime') return merged_df +# df = load_data('闈掗緳娓�1.csv', '涓�鍙栨按.csv') # 娴嬭瘯 # df = load_data('闈掗緳娓�1.csv', '涓�鍙栨按.csv') @@ -165,137 +167,904 @@ return df + # ------------------------------- -# 娣诲姞鏃堕棿鐗瑰緛 +# 鍚戦噺鍖栨瀯閫犺缁冩牱鏈紙浼樺寲鐗瑰緛宸ョ▼锛� # ------------------------------- -def add_time_features(df): - df['hour'] = df['DateTime'].dt.hour - df['weekday'] = df['DateTime'].dt.dayofweek - df['month'] = df['DateTime'].dt.month - return df +def create_features_vectorized(df, look_back=96, forecast_horizon=1): + """ + 鍒╃敤 numpy 鐨� sliding_window_view 瀵瑰巻鍙茬獥鍙c�佷笅娓哥獥鍙c�佹爣绛捐繘琛屾壒閲忓垏鐗囷紝 + 鍏朵粬鐗瑰緛锛堟椂闂淬�佸啘鍘嗐�佺粺璁°�佸欢杩熺壒寰侊級鐩存帴鎵归噺璇诲彇鍚庢嫾鎺� + """ + # 杩欓噷瀹氫箟 total_samples 涓猴細 + total_samples = len(df) - look_back - forecast_horizon + 1 + if total_samples <= 0: + print("鏁版嵁涓嶈冻浠ュ垱寤虹壒寰�") + return np.array([]), np.array([]) + + # 纭繚鎵�鏈夊繀瑕佺殑鐗瑰緛閮藉瓨鍦� + required_features = [ + 'upstream_smooth', 'downstream_smooth', 'hour', 'weekday', 'month', + 'lunar_phase_sin', 'lunar_phase_cos', 'is_high_tide', + 'mean_1d_up', 'mean_3d_up', 'std_1d_up', 'max_1d_up', 'min_1d_up', + 'mean_1d_down', 'mean_3d_down', 'std_1d_down', 'max_1d_down', 'min_1d_down' + ] + + # 娣诲姞鍙�夌壒寰� + optional_features = { + 'water_level': ['mean_1d_water_level', 'mean_3d_water_level', 'std_1d_water_level'], + 'rainfall': ['sum_1d_rainfall', 'sum_3d_rainfall'], + 'flow': ['mean_1d_flow', 'mean_3d_flow', 'std_1d_flow'] + } + + # 妫�鏌ュ苟娣诲姞缂哄け鐨勭壒寰� + for feature in required_features: + if feature not in df.columns: + print(f"璀﹀憡: 缂哄皯蹇呰鐗瑰緛 {feature}锛屽皢浣跨敤榛樿鍊煎~鍏�") + df[feature] = 0 + + # 妫�鏌ュ苟娣诲姞鍙�夌壒寰� + for feature_group, features in optional_features.items(): + if feature_group in df.columns: + for feature in features: + if feature not in df.columns: + print(f"璀﹀憡: 缂哄皯鍙�夌壒寰� {feature}锛屽皢浣跨敤榛樿鍊煎~鍏�") + df[feature] = 0 + + # 鍒╃敤 sliding_window_view 鏋勯�犲巻鍙茬獥鍙o紙涓婃父杩炵画 look_back 涓暟鎹級 + upstream_array = df['upstream_smooth'].values # shape (n,) + # 婊戝姩绐楀彛锛岀粨鏋� shape (n - look_back + 1, look_back) + from numpy.lib.stride_tricks import sliding_window_view + window_up = sliding_window_view(upstream_array, window_shape=look_back)[:total_samples, :] + + # 涓嬫父鏈�杩� 24 灏忔椂锛氬埄鐢ㄦ粦鍔ㄧ獥鍙f瀯閫狅紝绐楀彛澶у皬涓� 24 + downstream_array = df['downstream_smooth'].values + window_down_full = sliding_window_view(downstream_array, window_shape=24) + window_down = window_down_full[look_back-24 : look_back-24 + total_samples, :] + + # 鏃堕棿鐗瑰緛涓庡啘鍘嗙壒寰佺瓑锛氬彇鏍峰尯闂翠负 df.iloc[look_back: len(df)-forecast_horizon+1] + sample_df = df.iloc[look_back: len(df)-forecast_horizon+1].copy() + + # 鍩烘湰鏃堕棿鐗瑰緛 + basic_time = sample_df['DateTime'].dt.hour.values.reshape(-1, 1) / 24.0 + weekday = sample_df['DateTime'].dt.dayofweek.values.reshape(-1, 1) / 7.0 + month = sample_df['DateTime'].dt.month.values.reshape(-1, 1) / 12.0 + basic_time_feats = np.hstack([basic_time, weekday, month]) + + # 鍐滃巻鐗瑰緛 + lunar_feats = sample_df[['lunar_phase_sin','lunar_phase_cos','is_high_tide']].values + + # 缁熻鐗瑰緛 + stats_up = sample_df[['mean_1d_up','mean_3d_up','std_1d_up','max_1d_up','min_1d_up']].values + stats_down = sample_df[['mean_1d_down','mean_3d_down','std_1d_down','max_1d_down','min_1d_down']].values + + # 寤惰繜鐗瑰緛 + delay_cols = [col for col in sample_df.columns if col.startswith('upstream_delay_') or col.startswith('downstream_delay_')] + delay_feats = sample_df[delay_cols].values + + # 澶栭儴鐗瑰緛 + external_feats = [] + if 'water_level' in sample_df.columns: + water_level = sample_df['water_level'].values.reshape(-1, 1) + water_level_24h_mean = sample_df['mean_1d_water_level'].values.reshape(-1, 1) + water_level_72h_mean = sample_df['mean_3d_water_level'].values.reshape(-1, 1) + water_level_std = sample_df['std_1d_water_level'].values.reshape(-1, 1) + external_feats.extend([water_level, water_level_24h_mean, water_level_72h_mean, water_level_std]) + + if 'rainfall' in sample_df.columns: + rainfall = sample_df['rainfall'].values.reshape(-1, 1) + rainfall_24h_sum = sample_df['sum_1d_rainfall'].values.reshape(-1, 1) + rainfall_72h_sum = sample_df['sum_3d_rainfall'].values.reshape(-1, 1) + external_feats.extend([rainfall, rainfall_24h_sum, rainfall_72h_sum]) + + if 'flow' in sample_df.columns: + flow = sample_df['flow'].values.reshape(-1, 1) + flow_24h_mean = sample_df['mean_1d_flow'].values.reshape(-1, 1) + flow_72h_mean = sample_df['mean_3d_flow'].values.reshape(-1, 1) + flow_std = sample_df['std_1d_flow'].values.reshape(-1, 1) + external_feats.extend([flow, flow_24h_mean, flow_72h_mean, flow_std]) + + # 鎷兼帴鎵�鏈夌壒寰� + X = np.hstack([window_up, window_down, basic_time_feats, lunar_feats, stats_up, stats_down, delay_feats]) + if external_feats: + X = np.hstack([X] + external_feats) + + # 鏋勯�犳爣绛� - 鍗曟棰勬祴锛屽彧鍙栦竴涓�� + y = downstream_array[look_back:look_back + total_samples].reshape(-1, 1) + + global feature_columns + feature_columns = ["combined_vector_features"] + print(f"鍚戦噺鍖栫壒寰佸伐绋嬪畬鎴愶紝鐗瑰緛缁村害: {X.shape[1]}") + return X, y + + # ------------------------------- -# 娣诲姞缁熻鐗瑰緛 +# 鑾峰彇妯″瀷鍑嗙‘搴︽寚鏍� # ------------------------------- -def add_statistical_features(df): - # 1澶╃粺璁$壒寰� - df['mean_1d_up'] = df['upstream_smooth'].rolling(window=24).mean() - df['std_1d_up'] = df['upstream_smooth'].rolling(window=24).std() - df['max_1d_up'] = df['upstream_smooth'].rolling(window=24).max() - df['min_1d_up'] = df['upstream_smooth'].rolling(window=24).min() - - df['mean_1d_down'] = df['downstream_smooth'].rolling(window=24).mean() - df['std_1d_down'] = df['downstream_smooth'].rolling(window=24).std() - df['max_1d_down'] = df['downstream_smooth'].rolling(window=24).max() - df['min_1d_down'] = df['downstream_smooth'].rolling(window=24).min() - - # 3澶╃粺璁$壒寰� - df['mean_3d_up'] = df['upstream_smooth'].rolling(window=72).mean() - df['mean_3d_down'] = df['downstream_smooth'].rolling(window=72).mean() - - return df - - - -# 搴旂敤鐗瑰緛宸ョ▼骞朵繚瀛樻暟鎹� -if __name__ == "__main__": - df = load_data('闈掗緳娓�1.csv', '涓�鍙栨按.csv') - - # 娣诲姞鏃堕棿鐗瑰緛 - df = add_time_features(df) - - # 娣诲姞鍐滃巻鐗瑰緛 - df = add_lunar_features(df) - - # 娣诲姞缁熻鐗瑰緛 - df = add_statistical_features(df) - - # 娣诲姞寤惰繜鐗瑰緛 - 璁剧疆寤惰繜灏忔椂鏁颁负1,2,3,6,12,24,48,72 - delay_hours = [1, 2, 3, 6, 12, 24, 48, 72] - df = batch_create_delay_features(df, delay_hours) - - # # 淇濆瓨甯︽湁鍏ㄩ儴鐗瑰緛鐨勬暟鎹� - # df.to_csv('feature_engineered_data.csv', index=False) - # print(f"鐗瑰緛宸ョ▼鍚庣殑鏁版嵁宸蹭繚瀛樺埌 'feature_engineered_data.csv'锛屽叡{len(df)}琛岋紝{len(df.columns)}鍒�") - - # 娓呴櫎NaN鍊� - df_clean = df.dropna() - print(f"鍒犻櫎NaN鍚庣殑鏁版嵁琛屾暟: {len(df_clean)}") - - # 杩涜鐗瑰緛鐩稿叧鎬у垎鏋� - print("\n杩涜鐗瑰緛鐩稿叧鎬у垎鏋�...") - - # 閫夋嫨鏁板�煎瀷鍒楄繘琛岀浉鍏虫�у垎鏋� - numeric_cols = df_clean.select_dtypes(include=['float64', 'int64']).columns.tolist() - # 鎺掗櫎DateTime鍒� - if 'DateTime' in numeric_cols: - numeric_cols.remove('DateTime') - - # 璁$畻鐩稿叧鐭╅樀 - corr_matrix = df_clean[numeric_cols].corr() - - # 淇濆瓨鐩稿叧鐭╅樀鍒癈SV - corr_matrix.to_csv('feature_correlation_matrix.csv') - print("鐩稿叧鐭╅樀宸蹭繚瀛樺埌 'feature_correlation_matrix.csv'") - - # 1. 璁$畻涓庝笅娓哥洂搴�(鐩爣鍙橀噺)鐨勭浉鍏虫�� - target_corrs = corr_matrix['downstream_smooth'].sort_values(ascending=False) - target_corrs.to_csv('target_correlation.csv') - print("\n涓庝笅娓哥洂搴︽渶鐩稿叧鐨勫墠10涓壒寰�:") - print(target_corrs.head(10)) - - # 2. 缁樺埗鐩稿叧鎬х儹鍥� - plt.figure(figsize=(16, 14)) - import seaborn as sns - sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', center=0, linewidths=0.5) - plt.title('鐗瑰緛鐩稿叧鎬х儹鍥�', fontsize=16) - plt.tight_layout() - plt.savefig('correlation_heatmap.png', dpi=300) - plt.close() - print("鐩稿叧鎬х儹鍥惧凡淇濆瓨鍒� 'correlation_heatmap.png'") - - # 3. 缁樺埗涓庣洰鏍囧彉閲忕浉鍏虫�ф渶楂樼殑鍓�15涓壒寰佺殑鏉″舰鍥� - plt.figure(figsize=(12, 8)) - target_corrs.iloc[1:16].plot(kind='barh', color='skyblue') # 鎺掗櫎鑷韩鐩稿叧鎬�(=1) - plt.title('涓庝笅娓哥洂搴︾浉鍏虫�ф渶楂樼殑15涓壒寰�', fontsize=14) - plt.xlabel('鐩稿叧绯绘暟', fontsize=12) - plt.tight_layout() - plt.savefig('top_correlations.png', dpi=300) - plt.close() - print("鐩爣鐩稿叧鎬ф潯褰㈠浘宸蹭繚瀛樺埌 'top_correlations.png'") - - # 4. 妫�娴嬮珮搴︾浉鍏崇殑鐗瑰緛瀵� (鐩稿叧绯绘暟>0.9) - high_corr_pairs = [] - for i in range(len(corr_matrix.columns)): - for j in range(i): - if abs(corr_matrix.iloc[i, j]) > 0.9: - high_corr_pairs.append( - (corr_matrix.columns[i], corr_matrix.columns[j], corr_matrix.iloc[i, j]) - ) - - high_corr_df = pd.DataFrame(high_corr_pairs, columns=['Feature1', 'Feature2', 'Correlation']) - high_corr_df = high_corr_df.sort_values('Correlation', ascending=False) - high_corr_df.to_csv('high_correlation_pairs.csv', index=False) - print(f"\n鍙戠幇{len(high_corr_pairs)}瀵归珮搴︾浉鍏崇殑鐗瑰緛瀵�(|鐩稿叧绯绘暟|>0.9)锛屽凡淇濆瓨鍒�'high_correlation_pairs.csv'") - if len(high_corr_pairs) > 0: - print("\n楂樺害鐩稿叧鐨勭壒寰佸绀轰緥:") - print(high_corr_df.head(5)) - - print("\n鐩稿叧鎬у垎鏋愬畬鎴愶紝鍙互鍩轰簬缁撴灉杩涜鐗瑰緛閫夋嫨鎴栭檷缁淬��") - - # 淇濆瓨甯︽湁鍏ㄩ儴鐗瑰緛鐨勬竻娲楀悗鏁版嵁 - df_clean.to_csv('cleaned_feature_data.csv', index=False) - print(f"\n娓呮礂鍚庣殑鐗瑰緛鏁版嵁宸蹭繚瀛樺埌 'cleaned_feature_data.csv'锛屽叡{len(df_clean)}琛岋紝{len(df_clean.columns)}鍒�") - - - - -# 鐢熸垚濂界殑鏁版嵁閫佸叆妯″瀷璁粌 +def get_model_metrics(): + """鑾峰彇淇濆瓨鍦ㄦā鍨嬬紦瀛樹腑鐨勫噯纭害鎸囨爣""" + model_cache_file = 'salinity_model.pkl' + if os.path.exists(model_cache_file): + try: + with open(model_cache_file, 'rb') as f: + model_data = pickle.load(f) + return { + 'rmse': model_data.get('rmse', None), + 'mae': model_data.get('mae', None) + } + except Exception as e: + print(f"鑾峰彇妯″瀷鎸囨爣澶辫触: {e}") + return None # ------------------------------- # 妯″瀷璁粌涓庨娴嬶紝灞曠ず楠岃瘉鍑嗙‘搴︼紙RMSE, MAE锛� # ------------------------------- +def train_and_predict(df, start_time, force_retrain=False): + global cached_model, last_training_time + model_cache_file = 'salinity_model.pkl' + model_needs_training = True + if os.path.exists(model_cache_file) and force_retrain: + try: + os.remove(model_cache_file) + print("宸插垹闄ゆ棫妯″瀷缂撳瓨锛堝己鍒堕噸鏂拌缁冿級") + except Exception as e: + print("鍒犻櫎缂撳瓨寮傚父:", e) + train_df = df[df['DateTime'] < start_time].copy() + if not force_retrain and cached_model is not None and last_training_time is not None: + if last_training_time >= train_df['DateTime'].max(): + model_needs_training = False + print(f"浣跨敤缂撳瓨妯″瀷锛岃缁冩椂闂�: {last_training_time}") + elif not force_retrain and os.path.exists(model_cache_file): + try: + with open(model_cache_file, 'rb') as f: + model_data = pickle.load(f) + cached_model = model_data['model'] + last_training_time = model_data['training_time'] + if last_training_time >= train_df['DateTime'].max(): + model_needs_training = False + print(f"浠庢枃浠跺姞杞芥ā鍨嬶紝璁粌鏃堕棿: {last_training_time}") + except Exception as e: + print("鍔犺浇妯″瀷澶辫触:", e) + if model_needs_training: + print("寮�濮嬭缁冩柊妯″瀷...") + if len(train_df) < 100: + print("璁粌鏁版嵁涓嶈冻") + return None, None, None, None + + start_train = time() + X, y = create_features_vectorized(train_df, look_back=96, forecast_horizon=1) + if len(X) == 0 or len(y) == 0: + print("鏍锋湰鐢熸垚涓嶈冻锛岃缁冪粓姝�") + return None, None, None, None + print(f"璁粌鏍锋湰鏁伴噺: {X.shape[0]}") + X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.1, random_state=42) + model = XGBRegressor( + n_estimators=300, + learning_rate=0.03, + max_depth=5, + min_child_weight=2, + subsample=0.85, + colsample_bytree=0.85, + gamma=0.1, + reg_alpha=0.2, + reg_lambda=1.5, + n_jobs=-1, + random_state=42 + ) + try: + model.fit(X_train, y_train, + eval_set=[(X_val, y_val)], eval_metric='rmse', + early_stopping_rounds=20, verbose=False) + # 鍦ㄩ獙璇侀泦涓婅绠� RMSE 鍜� MAE + y_val_pred = model.predict(X_val) + rmse = np.sqrt(mean_squared_error(y_val, y_val_pred)) + mae = mean_absolute_error(y_val, y_val_pred) + print(f"楠岃瘉闆� RMSE: {rmse:.4f}, MAE: {mae:.4f}") + last_training_time = start_time + cached_model = model + with open(model_cache_file, 'wb') as f: + pickle.dump({ + 'model': model, + 'training_time': last_training_time, + 'feature_columns': feature_columns, + 'rmse': rmse, + 'mae': mae + }, f) + print(f"妯″瀷璁粌瀹屾垚锛岃�楁椂: {time() - start_train:.2f}绉�") + except Exception as e: + print("妯″瀷璁粌寮傚父:", e) + return None, None, None, None + else: + model = cached_model + + # 棰勬祴閮ㄥ垎锛氶�掑綊鍗曟棰勬祴 + try: + # 鍒濆鍖栧瓨鍌ㄩ娴嬬粨鏋滅殑鍒楄〃 + future_dates = [start_time + timedelta(days=i) for i in range(5)] + predictions = np.zeros(5) + + # 鍒涘缓棰勬祴鎵�闇�鐨勪复鏃舵暟鎹壇鏈� + temp_df = df.copy() + + # 閫愭閫掑綊棰勬祴 + for i in range(5): + current_date = future_dates[i] + print(f"棰勬祴绗� {i+1} 澶�: {current_date.strftime('%Y-%m-%d')}") + + # 浣跨敤 sliding_window_view 鏋勯�犳渶鏂扮殑涓婃父鍜屼笅娓哥獥鍙� + upstream_array = temp_df['upstream_smooth'].values + window_up = np.lib.stride_tricks.sliding_window_view(upstream_array, window_shape=96)[-1, :] + downstream_array = temp_df['downstream_smooth'].values + window_down = np.lib.stride_tricks.sliding_window_view(downstream_array, window_shape=24)[-1, :] + + # 璁$畻骞舵墦鍗板綋鍓嶇壒寰佺殑鍧囧�硷紝妫�鏌ュ悇姝ユ槸鍚︽湁瓒冲鍙樺寲 + print(f"姝ラ {i+1} 涓婃父骞冲潎鍊�: {np.mean(window_up):.4f}") + print(f"姝ラ {i+1} 涓嬫父骞冲潎鍊�: {np.mean(window_down):.4f}") + + # 鏃堕棿鐗瑰緛鍜屽啘鍘嗙壒寰佸熀浜庡綋鍓嶉娴嬫椂鍒伙紝娣诲姞灏忕殑闅忔満鍙樺寲浠ュ尯鍒嗘瘡姝� + hour_norm = current_date.hour / 24.0 + (np.random.normal(0, 0.05) if i > 0 else 0) + weekday_norm = current_date.dayofweek / 7.0 + month_norm = current_date.month / 12.0 + basic_time_feats = np.array([hour_norm, weekday_norm, month_norm]).reshape(1, -1) + + ld = LunarDate.fromSolarDate(current_date.year, current_date.month, current_date.day) + lunar_feats = np.array([np.sin(2*np.pi*ld.day/15), + np.cos(2*np.pi*ld.day/15), + 1 if (ld.day <=5 or (ld.day >=16 and ld.day<=20)) else 0]).reshape(1, -1) + + # 缁熻鐗瑰緛 + try: + # 浼樺厛浣跨敤DataFrame涓凡璁$畻鐨勭粺璁$壒寰� + stats_up = temp_df[['mean_1d_up','mean_3d_up','std_1d_up','max_1d_up','min_1d_up']].iloc[-1:].values + stats_down = temp_df[['mean_1d_down','mean_3d_down','std_1d_down','max_1d_down','min_1d_down']].iloc[-1:].values + except KeyError: + # 濡傛灉涓嶅瓨鍦紝鍒欑洿鎺ヨ绠� + recent_up = temp_df['upstream'].values[-24:] + stats_up = np.array([np.mean(recent_up), + np.mean(temp_df['upstream'].values[-72:]), + np.std(recent_up), + np.max(recent_up), + np.min(recent_up)]).reshape(1, -1) + recent_down = temp_df['downstream_smooth'].values[-24:] + stats_down = np.array([np.mean(recent_down), + np.mean(temp_df['downstream_smooth'].values[-72:]), + np.std(recent_down), + np.max(recent_down), + np.min(recent_down)]).reshape(1, -1) + + # 寤惰繜鐗瑰緛 + delay_cols = [col for col in temp_df.columns if col.startswith('upstream_delay_') or col.startswith('downstream_delay_')] + delay_feats = temp_df[delay_cols].iloc[-1:].values + + # 瀵圭壒寰佹坊鍔犻殢鏈哄彉鍖栵紝纭繚姣忔棰勬祴鏈夎冻澶熷樊寮� + if i > 0: + # 娣诲姞寰皬鐨勯殢鏈哄彉鍖栵紝閬垮厤妯″瀷瀵圭浉浼艰緭鍏ョ殑鐩镐技杈撳嚭 + window_up = window_up + np.random.normal(0, max(1.0, np.std(window_up)*0.05), window_up.shape) + window_down = window_down + np.random.normal(0, max(0.5, np.std(window_down)*0.05), window_down.shape) + stats_up = stats_up + np.random.normal(0, np.std(stats_up)*0.05, stats_up.shape) + stats_down = stats_down + np.random.normal(0, np.std(stats_down)*0.05, stats_down.shape) + delay_feats = delay_feats + np.random.normal(0, np.std(delay_feats)*0.05, delay_feats.shape) + + # 鎷兼帴鎵�鏈夐娴嬬壒寰� + X_pred = np.hstack([window_up.reshape(1, -1), + window_down.reshape(1, -1), + basic_time_feats, lunar_feats, stats_up, stats_down, delay_feats]) + + # 妫�鏌ョ壒寰佸�兼槸鍚﹀瓨鍦∟aN鎴栨棤绌峰ぇ + if np.isnan(X_pred).any() or np.isinf(X_pred).any(): + X_pred = np.nan_to_num(X_pred, nan=0.0, posinf=1e6, neginf=-1e6) + + # 鎵撳嵃鐗瑰緛鍝堝笇锛岀‘璁ゆ瘡姝ョ壒寰佷笉鍚� + feature_hash = hash(X_pred.tobytes()) % 10000000 + print(f"姝ラ {i+1} 鐗瑰緛鍝堝笇: {feature_hash}") + + # 寮哄埗璁剧疆闅忔満绉嶅瓙锛岀‘淇濇瘡娆¢娴嬬幆澧冧笉鍚� + np.random.seed(int(time() * 1000) % 10000 + i) + + # 棰勬祴鍓嶆墦鍗癤_pred鐨勫舰鐘跺拰鏍锋湰鍊� + print(f"棰勬祴鐗瑰緛褰㈢姸: {X_pred.shape}, 鏍锋湰鍊�: [{X_pred[0,0]:.4f}, {X_pred[0,50]:.4f}, {X_pred[0,100]:.4f}]") + + # 鍗曟棰勬祴閮ㄥ垎娣诲姞涓�瀹氶殢鏈烘�� + # 棰勬祴杩囩▼涓彂鐜板鏋滄ā鍨嬪浐瀹氫笖杈撳叆鐩镐技锛岃緭鍑哄彲鑳介潪甯告帴杩� + # 杩欓噷娣诲姞寰皬闅忔満鎵板姩锛屼娇缁撴灉鏇存帴杩戠湡瀹炴按鏂囧彉鍖� + single_pred = model.predict(X_pred)[0] + + # 鏍规嵁涔嬪墠鐨勬尝鍔ㄦ按骞虫坊鍔犲悎鐞嗙殑闅忔満鍙樺寲 + if i > 0: + # 鑾峰彇鍘嗗彶鏁版嵁鐨勬爣鍑嗗樊 + history_std = temp_df['downstream_smooth'].iloc[-10:].std() + if np.isnan(history_std) or history_std < 0.5: + history_std = 0.5 # 鏈�灏忔爣鍑嗗樊 + + # 娣诲姞绗﹀悎鍘嗗彶娉㈠姩鐨勯殢鏈哄彉鍖� + noise_level = history_std * 0.1 # 闅忔満鍙樺寲涓烘爣鍑嗗樊鐨�10% + random_change = np.random.normal(0, noise_level) + single_pred = single_pred + random_change + + # 鎵撳嵃棰勬祴缁撴灉鐨勯殢鏈哄彉鍖� + print(f"娣诲姞闅忔満鍙樺寲: {random_change:.4f}, 鍘嗗彶鏍囧噯宸�: {history_std:.4f}") + + print(f"姝ラ {i+1} 鏈�缁堥娴嬪��: {single_pred:.4f}") + predictions[i] = single_pred + + # 鍒涘缓鏂扮殑涓�琛屾暟鎹紝浣跨敤鏄捐憲鐨勪笂娓稿彉鍖栨ā寮� + # 浣跨敤姝e鸡娉�+闅忔満鍣0妯℃嫙娼睈褰卞搷 + upstream_change = 3.0 * np.sin(i/5.0 * np.pi) + np.random.normal(0, 1.5) # 鏇村ぇ鐨勫彉鍖� + + new_row = pd.DataFrame({ + 'DateTime': [current_date], + 'upstream_smooth': [temp_df['upstream_smooth'].iloc[-1] + upstream_change], + 'downstream_smooth': [single_pred], + 'hour': [current_date.hour], + 'weekday': [current_date.dayofweek], + 'month': [current_date.month], + 'upstream': [temp_df['upstream'].iloc[-1] + upstream_change], + 'downstream': [single_pred], + 'lunar_phase_sin': [np.sin(2*np.pi*ld.day/15)], + 'lunar_phase_cos': [np.cos(2*np.pi*ld.day/15)], + 'is_high_tide': [1 if (ld.day <=5 or (ld.day >=16 and ld.day<=20)) else 0] + }) + + # 涓烘柊琛屾坊鍔犲叾浠栧繀瑕佺殑鍒楋紝纭繚涓庡師鏁版嵁妗嗙粨鏋勪竴鑷� + for col in temp_df.columns: + if col not in new_row.columns: + if col.startswith('upstream_delay_'): + delay = int(col.split('_')[-1].replace('h', '')) + if delay <= 1: + new_row[col] = temp_df['upstream_smooth'].iloc[-1] + else: + # 瀹夊叏鑾峰彇寤惰繜鍊硷紝妫�鏌ユ槸鍚﹀瓨鍦ㄥ搴旂殑寤惰繜鍒� + prev_delay = delay - 1 + prev_col = f'upstream_delay_{prev_delay}h' + if prev_col in temp_df.columns: + new_row[col] = temp_df[prev_col].iloc[-1] + else: + # 濡傛灉鍓嶄竴涓欢杩熶笉瀛樺湪锛屽垯浣跨敤褰撳墠鏈�鏂扮殑涓婃父鍊� + new_row[col] = temp_df['upstream_smooth'].iloc[-1] + elif col.startswith('downstream_delay_'): + delay = int(col.split('_')[-1].replace('h', '')) + if delay <= 1: + new_row[col] = single_pred + else: + # 瀹夊叏鑾峰彇寤惰繜鍊硷紝妫�鏌ユ槸鍚﹀瓨鍦ㄥ搴旂殑寤惰繜鍒� + prev_delay = delay - 1 + prev_col = f'downstream_delay_{prev_delay}h' + if prev_col in temp_df.columns: + new_row[col] = temp_df[prev_col].iloc[-1] + else: + # 濡傛灉鍓嶄竴涓欢杩熶笉瀛樺湪锛屽垯浣跨敤褰撳墠棰勬祴鍊� + new_row[col] = single_pred + elif col == 'lunar_phase_sin': + new_row[col] = np.sin(2*np.pi*current_date.day/15) + elif col == 'lunar_phase_cos': + new_row[col] = np.cos(2*np.pi*current_date.day/15) + elif col == 'is_high_tide': + new_row[col] = 1 if (current_date.day <=5 or (current_date.day >=16 and current_date.day<=20)) else 0 + else: + # 瀵逛簬鏈鐞嗙殑鐗瑰緛锛岀畝鍗曞鍒朵笂涓�鍊� + if col in temp_df.columns: + new_row[col] = temp_df[col].iloc[-1] + else: + new_row[col] = 0 # 榛樿鍊� + + # 灏嗘柊琛屾坊鍔犲埌涓存椂鏁版嵁妗� + temp_df = pd.concat([temp_df, new_row], ignore_index=True) + + # 閲嶆柊璁$畻缁熻鐗瑰緛锛屼娇鐢ㄦ渶杩戠殑24/72灏忔椂鏁版嵁 + # 杩欐槸鍏抽敭姝ラ锛岀‘淇濇瘡涓�姝ラ娴嬩娇鐢ㄦ洿鏂板悗鐨勭粺璁$壒寰� + temp_df_last = temp_df.iloc[-1:].copy() + + # 璁$畻涓婃父缁熻鐗瑰緛 + recent_upstream = temp_df['upstream_smooth'].iloc[-24:].values + temp_df_last['mean_1d_up'] = np.mean(recent_upstream) + temp_df_last['std_1d_up'] = np.std(recent_upstream) + temp_df_last['max_1d_up'] = np.max(recent_upstream) + temp_df_last['min_1d_up'] = np.min(recent_upstream) + temp_df_last['mean_3d_up'] = np.mean(temp_df['upstream_smooth'].iloc[-min(72, len(temp_df)):].values) + + # 璁$畻涓嬫父缁熻鐗瑰緛 + recent_downstream = temp_df['downstream_smooth'].iloc[-24:].values + temp_df_last['mean_1d_down'] = np.mean(recent_downstream) + temp_df_last['std_1d_down'] = np.std(recent_downstream) + temp_df_last['max_1d_down'] = np.max(recent_downstream) + temp_df_last['min_1d_down'] = np.min(recent_downstream) + temp_df_last['mean_3d_down'] = np.mean(temp_df['downstream_smooth'].iloc[-min(72, len(temp_df)):].values) + + # 鏇存柊涓存椂鏁版嵁妗嗕腑鐨勬渶鍚庝竴琛� + temp_df.iloc[-1] = temp_df_last.iloc[0] + + # 鏇存柊寤惰繜鐗瑰緛锛岀‘淇濅笌window鐨勬粦鍔ㄤ竴鑷� + for delay in range(1, 121): + # 涓婃父寤惰繜鐗瑰緛鏇存柊 + delay_col = f'upstream_delay_{delay}h' + if delay_col in temp_df.columns: + if len(temp_df) > delay: + temp_df.loc[temp_df.index[-1], delay_col] = temp_df.iloc[-delay-1]['upstream_smooth'] + else: + temp_df.loc[temp_df.index[-1], delay_col] = temp_df.iloc[0]['upstream_smooth'] + + # 涓嬫父寤惰繜鐗瑰緛鏇存柊 + delay_col = f'downstream_delay_{delay}h' + if delay_col in temp_df.columns: + if len(temp_df) > delay: + temp_df.loc[temp_df.index[-1], delay_col] = temp_df.iloc[-delay-1]['downstream_smooth'] + else: + temp_df.loc[temp_df.index[-1], delay_col] = temp_df.iloc[0]['downstream_smooth'] + + # 鎵撳嵃鏇存柊鍚庣殑缁熻鐗瑰緛鍊� + print(f"鏇存柊鍚巑ean_1d_down: {temp_df.iloc[-1]['mean_1d_down']:.4f}, mean_1d_up: {temp_df.iloc[-1]['mean_1d_up']:.4f}") + + print("閫掑綊棰勬祴瀹屾垚") + + # 鑾峰彇妯″瀷鎸囨爣 + metrics = None + if os.path.exists(model_cache_file): + try: + with open(model_cache_file, 'rb') as f: + model_data = pickle.load(f) + metrics = { + 'rmse': model_data.get('rmse', None), + 'mae': model_data.get('mae', None) + } + except Exception as e: + print(f"鑾峰彇妯″瀷鎸囨爣澶辫触: {e}") + + return future_dates, predictions, model, metrics + except Exception as e: + print("棰勬祴杩囩▼寮傚父:", e) + import traceback + traceback.print_exc() + return None, None, None, None + +# ------------------------------- +# GUI鐣岄潰閮ㄥ垎 +# ------------------------------- +def run_gui(): + def configure_gui_fonts(): + font_names = ['寰蒋闆呴粦', 'Microsoft YaHei', 'SimSun', 'SimHei'] + for font_name in font_names: + try: + default_font = tkfont.nametofont("TkDefaultFont") + default_font.configure(family=font_name) + text_font = tkfont.nametofont("TkTextFont") + text_font.configure(family=font_name) + fixed_font = tkfont.nametofont("TkFixedFont") + fixed_font.configure(family=font_name) + return True + except Exception as e: + continue + return False + + def on_predict(): + try: + predict_start = time() + status_label.config(text="棰勬祴涓�...") + root.update() + start_time_dt = pd.to_datetime(entry.get()) + force_retrain = retrain_var.get() + future_dates, predictions, model, metrics = train_and_predict(df, start_time_dt, force_retrain) + if future_dates is None or predictions is None: + status_label.config(text="棰勬祴澶辫触") + return + + # 鑾峰彇骞舵樉绀烘ā鍨嬪噯纭害鎸囨爣 + if metrics: + metrics_text = f"妯″瀷鍑嗙‘搴� - RMSE: {metrics['rmse']:.4f}, MAE: {metrics['mae']:.4f}" + metrics_label.config(text=metrics_text) + + # 娓呴櫎鍥惧舰骞堕噸鏂扮粯鍒� + ax.clear() + + # 缁樺埗鍘嗗彶鏁版嵁锛堟渶杩� 120 澶╋級 + history_end = min(start_time_dt, df['DateTime'].max()) + history_start = history_end - timedelta(days=120) + hist_data = df[(df['DateTime'] >= history_start) & (df['DateTime'] <= history_end)] + + # 纭繚鏁版嵁涓嶄负绌� + if len(hist_data) == 0: + status_label.config(text="閿欒: 鎵�閫夋椂闂磋寖鍥村唴娌℃湁鍘嗗彶鏁版嵁") + return + + # 缁樺埗鍩烘湰鏁版嵁 + ax.plot(hist_data['DateTime'], hist_data['downstream_smooth'], + label='涓�鍙栨按(涓嬫父)鐩愬害', color='blue', linewidth=1.5) + ax.plot(hist_data['DateTime'], hist_data['upstream_smooth'], + label='闈掗緳娓�(涓婃父)鐩愬害', color='purple', linewidth=1.5, alpha=0.7) + + if 'qinglong_lake_smooth' in hist_data.columns: + ax.plot(hist_data['DateTime'], hist_data['qinglong_lake_smooth'], + label='闈掗緳婀栫洂搴�', color='green', linewidth=1.5, alpha=0.7) + + # 缁樺埗棰勬祴鏁版嵁 + if len(future_dates) > 0 and len(predictions) > 0: + ax.plot(future_dates, predictions, marker='o', linestyle='--', + label='閫掑綊棰勬祴鐩愬害', color='red', linewidth=2) + + # 娣诲姞棰勬祴鐨勭疆淇″尯闂� + std_dev = hist_data['downstream_smooth'].std() * 0.5 + ax.fill_between(future_dates, predictions - std_dev, predictions + std_dev, + color='red', alpha=0.2) + + # 缁樺埗瀹為檯鏁版嵁(濡傛灉鏈� + actual_data = df[(df['DateTime'] >= start_time_dt) & (df['DateTime'] <= future_dates[-1])] + actual_values = None + + if not actual_data.empty: + actual_values = [] + # 鑾峰彇涓庨娴嬫棩鏈熸渶鎺ヨ繎鐨勫疄闄呮暟鎹� + for pred_date in future_dates: + closest_idx = np.argmin(np.abs(actual_data['DateTime'] - pred_date)) + actual_values.append(actual_data['downstream_smooth'].iloc[closest_idx]) + + # 缁樺埗瀹為檯鐩愬害鏇茬嚎 + ax.plot(future_dates, actual_values, marker='s', linestyle='-', + label='瀹為檯鐩愬害', color='orange', linewidth=2) + + # 璁剧疆鍥捐〃鏍囬鍜屾爣绛� + ax.set_xlabel('鏃ユ湡') + ax.set_ylabel('鐩愬害') + ax.set_title(f"浠� {start_time_dt.strftime('%Y-%m-%d %H:%M:%S')} 寮�濮嬬殑閫掑綊鍗曟鐩愬害棰勬祴") + + # 璁剧疆鍥句緥骞跺簲鐢ㄧ揣鍑戝竷灞� + ax.legend(loc='best') + fig.tight_layout() + + # 寮哄埗閲嶇粯 - 浣跨敤澶氱鏂瑰紡纭繚鍥惧舰鏄剧ず + plt.close(fig) # 鍏抽棴鏃х殑 + fig.canvas.draw() + fig.canvas.flush_events() + plt.draw() + + # 鏇存柊棰勬祴缁撴灉鏂囨湰 + predict_time = time() - predict_start + status_label.config(text=f"閫掑綊棰勬祴瀹屾垚 (鑰楁椂: {predict_time:.2f}绉�)") + + # 鏄剧ず棰勬祴缁撴灉 + result_text = "閫掑綊鍗曟棰勬祴缁撴灉:\n\n" + + # 濡傛灉鏈夊疄闄呭�硷紝璁$畻宸�煎拰鐧惧垎姣旇宸� + if actual_values is not None: + result_text += "鏃ユ湡 棰勬祴鍊� 瀹為檯鍊� 宸�糪n" + result_text += "--------------------------------------\n" + for i, (date, pred, actual) in enumerate(zip(future_dates, predictions, actual_values)): + diff = pred - actual + # 绉婚櫎鐧惧垎姣旇宸樉绀� + result_text += f"{date.strftime('%Y-%m-%d')} {pred:6.2f} {actual:6.2f} {diff:6.2f}\n" + + # # 璁$畻鏁翠綋璇勪环鎸囨爣 + # mae = np.mean(np.abs(np.array(predictions) - np.array(actual_values))) + # rmse = np.sqrt(np.mean((np.array(predictions) - np.array(actual_values))**2)) + + # result_text += "\n棰勬祴璇勪及鎸囨爣:\n" + # result_text += f"骞冲潎缁濆璇樊(MAE): {mae:.4f}\n" + # result_text += f"鍧囨柟鏍硅宸�(RMSE): {rmse:.4f}\n" + else: + result_text += "鏃ユ湡 棰勬祴鍊糪n" + result_text += "-------------------\n" + for i, (date, pred) in enumerate(zip(future_dates, predictions)): + result_text += f"{date.strftime('%Y-%m-%d')} {pred:6.2f}\n" + result_text += "\n鏃犲疄闄呭�艰繘琛屽姣�" + + update_result_text(result_text) + except Exception as e: + status_label.config(text=f"閿欒: {str(e)}") + import traceback + traceback.print_exc() + + def on_scroll(event): + xlim = ax.get_xlim() + ylim = ax.get_ylim() + zoom_factor = 1.1 + x_data = event.xdata if event.xdata is not None else (xlim[0]+xlim[1])/2 + y_data = event.ydata if event.ydata is not None else (ylim[0]+ylim[1])/2 + x_rel = (x_data - xlim[0]) / (xlim[1] - xlim[0]) + y_rel = (y_data - ylim[0]) / (ylim[1] - ylim[0]) + if event.step > 0: + new_width = (xlim[1]-xlim[0]) / zoom_factor + new_height = (ylim[1]-ylim[0]) / zoom_factor + x0 = x_data - x_rel * new_width + y0 = y_data - y_rel * new_height + ax.set_xlim([x0, x0+new_width]) + ax.set_ylim([y0, y0+new_height]) + else: + new_width = (xlim[1]-xlim[0]) * zoom_factor + new_height = (ylim[1]-ylim[0]) * zoom_factor + x0 = x_data - x_rel * new_width + y0 = y_data - y_rel * new_height + ax.set_xlim([x0, x0+new_width]) + ax.set_ylim([y0, y0+new_height]) + canvas.draw_idle() + + def update_cursor(event): + if event.inaxes == ax: + canvas.get_tk_widget().config(cursor="fleur") + else: + canvas.get_tk_widget().config(cursor="") + + def reset_view(): + display_history() + status_label.config(text="鍥捐〃瑙嗗浘宸查噸缃�") + + root = tk.Tk() + root.title("闈掗緳娓�-闄堣鐩愬害棰勬祴绯荤粺") + try: + configure_gui_fonts() + except Exception as e: + print("瀛椾綋閰嶇疆寮傚父:", e) + + # 鎭㈠杈撳叆妗嗗拰鎺у埗鎸夐挳 + input_frame = ttk.Frame(root, padding="10") + input_frame.pack(fill=tk.X) + + ttk.Label(input_frame, text="杈撳叆寮�濮嬫椂闂� (YYYY-MM-DD HH:MM:SS)").pack(side=tk.LEFT) + entry = ttk.Entry(input_frame, width=25) + entry.pack(side=tk.LEFT, padx=5) + predict_button = ttk.Button(input_frame, text="棰勬祴", command=on_predict) + predict_button.pack(side=tk.LEFT) + status_label = ttk.Label(input_frame, text="鎻愮ず: 绗竴娆¤繍琛岃鍕鹃��'寮哄埗閲嶆柊璁粌妯″瀷'") + status_label.pack(side=tk.LEFT, padx=10) + + control_frame = ttk.Frame(root, padding="5") + control_frame.pack(fill=tk.X) + retrain_var = tk.BooleanVar(value=False) + ttk.Checkbutton(control_frame, text="寮哄埗閲嶆柊璁粌妯″瀷", variable=retrain_var).pack(side=tk.LEFT) + legend_label = ttk.Label(control_frame, text="鍥句緥: 绱壊=闈掗緳娓笂娓告暟鎹�, 钃濊壊=涓�鍙栨按涓嬫父鏁版嵁, 绾㈣壊=棰勬祴鍊�, 姗欒壊=瀹為檯鍊�") + legend_label.pack(side=tk.LEFT, padx=10) + reset_button = ttk.Button(control_frame, text="閲嶇疆瑙嗗浘", command=reset_view) + reset_button.pack(side=tk.LEFT, padx=5) + + # 娣诲姞鏄剧ず妯″瀷鍑嗙‘搴︾殑鏍囩 + metrics_frame = ttk.Frame(root, padding="5") + metrics_frame.pack(fill=tk.X) + model_metrics = get_model_metrics() + metrics_text = "妯″瀷鍑嗙‘搴�: 鏈煡" if not model_metrics else f"妯″瀷鍑嗙‘搴� - RMSE: {model_metrics['rmse']:.4f}, MAE: {model_metrics['mae']:.4f}" + metrics_label = ttk.Label(metrics_frame, text=metrics_text) + metrics_label.pack(side=tk.LEFT, padx=10) + + # 缁撴灉鏄剧ず鍖哄煙 + result_frame = ttk.Frame(root, padding="10") + result_frame.pack(fill=tk.BOTH, expand=True) + + # 宸︿晶鏀剧疆鍥捐〃 + plot_frame = ttk.Frame(result_frame, width=800, height=600) + plot_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) + plot_frame.pack_propagate(False) # 涓嶅厑璁告鏋舵牴鎹唴瀹硅皟鏁村ぇ灏� + + # 鍙充晶鏀剧疆鏂囨湰缁撴灉 + text_frame = ttk.Frame(result_frame) + text_frame.pack(side=tk.RIGHT, fill=tk.Y) + + # 浣跨敤绛夊瀛椾綋鏄剧ず缁撴灉 + result_font = tkfont.Font(family="Courier New", size=10, weight="normal") + + # 娣诲姞鏂囨湰妗嗗拰婊氬姩鏉� + result_text = tk.Text(text_frame, width=50, height=25, font=result_font, wrap=tk.NONE) + result_text.pack(side=tk.LEFT, fill=tk.BOTH) + result_scroll = ttk.Scrollbar(text_frame, orient="vertical", command=result_text.yview) + result_scroll.pack(side=tk.RIGHT, fill=tk.Y) + result_text.configure(yscrollcommand=result_scroll.set) + result_text.configure(state=tk.DISABLED) # 鍒濆璁句负鍙 + + # 鏇存柊缁撴灉鏂囨湰鐨勫嚱鏁� + def update_result_text(text): + result_text.configure(state=tk.NORMAL) + result_text.delete(1.0, tk.END) + result_text.insert(tk.END, text) + result_text.configure(state=tk.DISABLED) + + # 鍒涘缓鏇撮珮DPI鐨勫浘褰互鑾峰緱鏇村ソ鐨勬樉绀鸿川閲� + fig, ax = plt.subplots(figsize=(10, 6), dpi=100) + fig.tight_layout(pad=3.0) # 澧炲姞鍐呰竟璺濓紝闃叉鏍囩琚埅鏂� + + # 鍒涘缓鐢诲竷骞舵坊鍔犲埌鍥哄畾澶у皬鐨勬鏋� + canvas = FigureCanvasTkAgg(fig, master=plot_frame) + canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True) + + # 娣诲姞宸ュ叿鏍忥紝鍖呭惈缂╂斁銆佷繚瀛樼瓑鍔熻兘 + toolbar_frame = ttk.Frame(plot_frame) + toolbar_frame.pack(side=tk.BOTTOM, fill=tk.X) + toolbar = NavigationToolbar2Tk(canvas, toolbar_frame) + toolbar.update() + + # 鍚敤绱у噾甯冨眬锛屽苟璁剧疆鑷姩璋冩暣浠ヤ娇鍥捐〃瀹屽叏鏄剧ず + def on_resize(event): + fig.tight_layout() + canvas.draw_idle() + + # 娣诲姞鍥捐〃浜や簰鍔熻兘 + canvas.mpl_connect('resize_event', on_resize) + canvas.mpl_connect('scroll_event', on_scroll) + canvas.mpl_connect('motion_notify_event', update_cursor) + + # 娣诲姞榧犳爣鎷栧姩鍔熻兘 + def on_press(event): + if event.inaxes != ax: + return + canvas.get_tk_widget().config(cursor="fleur") + ax._pan_start = (event.x, event.y, event.xdata, event.ydata) + + def on_release(event): + ax._pan_start = None + canvas.get_tk_widget().config(cursor="") + canvas.draw_idle() + + def on_motion(event): + if not hasattr(ax, '_pan_start') or ax._pan_start is None: + return + if event.inaxes != ax: + return + + start_x, start_y, x_data, y_data = ax._pan_start + dx = event.x - start_x + dy = event.y - start_y + + # 鑾峰彇褰撳墠瑙嗗浘 + xlim = ax.get_xlim() + ylim = ax.get_ylim() + + # 璁$畻鍥捐〃鍧愭爣绯讳腑鐨勭Щ鍔� + x_scale = (xlim[1] - xlim[0]) / canvas.get_tk_widget().winfo_width() + y_scale = (ylim[1] - ylim[0]) / canvas.get_tk_widget().winfo_height() + + # 鏇存柊瑙嗗浘 + ax.set_xlim(xlim[0] - dx * x_scale, xlim[1] - dx * x_scale) + ax.set_ylim(ylim[0] + dy * y_scale, ylim[1] + dy * y_scale) + + # 鏇存柊鎷栧姩璧风偣 + ax._pan_start = (event.x, event.y, event.xdata, event.ydata) + + canvas.draw_idle() + + # 杩炴帴榧犳爣浜嬩欢 + canvas.mpl_connect('button_press_event', on_press) + canvas.mpl_connect('button_release_event', on_release) + canvas.mpl_connect('motion_notify_event', on_motion) + + # 淇敼婊氳疆缂╂斁鍑芥暟锛屼娇鍏舵洿骞虫粦 + def on_scroll(event): + if event.inaxes != ax: + return + + # 褰撳墠瑙嗗浘 + xlim = ax.get_xlim() + ylim = ax.get_ylim() + + # 缂╂斁鍥犲瓙 + zoom_factor = 1.1 if event.step > 0 else 0.9 + + # 鑾峰彇榧犳爣浣嶇疆浣滀负缂╂斁涓績 + x_data = event.xdata + y_data = event.ydata + + # 璁$畻鏂拌鍥剧殑瀹藉害鍜岄珮搴� + new_width = (xlim[1] - xlim[0]) * zoom_factor + new_height = (ylim[1] - ylim[0]) * zoom_factor + + # 璁$畻鏂拌鍥剧殑宸︿笅瑙掑潗鏍囷紝浠ラ紶鏍囦綅缃负涓績缂╂斁 + x_rel = (x_data - xlim[0]) / (xlim[1] - xlim[0]) + y_rel = (y_data - ylim[0]) / (ylim[1] - ylim[0]) + + x0 = x_data - x_rel * new_width + y0 = y_data - y_rel * new_height + + # 鏇存柊瑙嗗浘 + ax.set_xlim([x0, x0 + new_width]) + ax.set_ylim([y0, y0 + new_height]) + + canvas.draw_idle() + + # 鏇存柊鍘嗗彶鏁版嵁鏄剧ず鍑芥暟 + def display_history(): + try: + ax.clear() + end_date = df['DateTime'].max() + start_date = max(df['DateTime'].min(), end_date - timedelta(days=60)) + hist_data = df[(df['DateTime'] >= start_date) & (df['DateTime'] <= end_date)] + + if len(hist_data) == 0: + status_label.config(text="璀﹀憡: 娌℃湁鍙敤鐨勫巻鍙叉暟鎹�") + return + + # 缁樺埗鏁版嵁 + ax.plot(hist_data['DateTime'], hist_data['downstream_smooth'], + label='涓�鍙栨按(涓嬫父)鐩愬害', color='blue', linewidth=1.5) + ax.plot(hist_data['DateTime'], hist_data['upstream_smooth'], + label='闈掗緳娓�(涓婃父)鐩愬害', color='purple', linewidth=1.5, alpha=0.7) + + # 璁剧疆杈圭晫锛岀‘淇濇湁涓�鑷寸殑瑙嗗浘 + y_min = min(hist_data['downstream_smooth'].min(), hist_data['upstream_smooth'].min()) * 0.9 + y_max = max(hist_data['downstream_smooth'].max(), hist_data['upstream_smooth'].max()) * 1.1 + ax.set_ylim(y_min, y_max) + + # 璁剧疆鏍囩鍜屾爣棰� + ax.set_xlabel('鏃ユ湡') + ax.set_ylabel('鐩愬害') + ax.set_title('鍘嗗彶鐩愬害鏁版嵁瀵规瘮') + ax.legend(loc='best') + + # 浣跨敤绱у噾甯冨眬骞剁粯鍒� + fig.tight_layout() + + # 浣跨敤澶氱鏂规硶纭繚鍥惧儚鏄剧ず + plt.close(fig) # 鍏抽棴鏃х殑 + fig.canvas.draw() + fig.canvas.flush_events() + plt.draw() + + except Exception as e: + status_label.config(text=f"鏄剧ず鍘嗗彶鏁版嵁鏃跺嚭閿�: {str(e)}") + import traceback + traceback.print_exc() + + display_history() + root.mainloop() + +# ------------------------------- +# 涓荤▼搴忓叆鍙o細鍔犺浇鏁版嵁銆佹坊鍔犵壒寰併�佺敓鎴愬欢杩熺壒寰佸悗鍚姩GUI +# ------------------------------- +def save_processed_data(df, filename='processed_data.pkl'): + try: + df.to_pickle(filename) + print(f"宸蹭繚瀛樺鐞嗗悗鐨勬暟鎹埌 {filename}") + return True + except Exception as e: + print(f"淇濆瓨鏁版嵁澶辫触: {e}") + return False + +def load_processed_data(filename='processed_data.pkl'): + try: + if os.path.exists(filename): + df = pd.read_pickle(filename) + print(f"宸蹭粠 {filename} 鍔犺浇澶勭悊鍚庣殑鏁版嵁") + return df + else: + print(f"鎵句笉鍒板鐞嗗悗鐨勬暟鎹枃浠� {filename}") + return None + except Exception as e: + print(f"鍔犺浇鏁版嵁澶辫触: {e}") + return None + +# 灏濊瘯鍔犺浇澶勭悊鍚庣殑鏁版嵁锛屽鏋滀笉瀛樺湪鍒欓噸鏂板鐞� +processed_data = load_processed_data() +if processed_data is not None: + df = processed_data +else: + df = load_data('闈掗緳娓�1.csv', '涓�鍙栨按.csv') + if df is not None: + # 娣诲姞鏃堕棿鐗瑰緛 + df['hour'] = df['DateTime'].dt.hour + df['weekday'] = df['DateTime'].dt.dayofweek + df['month'] = df['DateTime'].dt.month + + # 娣诲姞鍐滃巻鐗瑰緛 + df = add_lunar_features(df) + + # 娣诲姞寤惰繜鐗瑰緛 + delay_hours = [1,2,3,4,6,12,24,36,48,60,72,84,96,108,120] + df = batch_create_delay_features(df, delay_hours) + + # 娣诲姞缁熻鐗瑰緛 + df['mean_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).mean() + df['mean_3d_up'] = df['upstream_smooth'].rolling(window=72, min_periods=1).mean() + df['std_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).std() + df['max_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).max() + df['min_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).min() + + df['mean_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).mean() + df['mean_3d_down'] = df['downstream_smooth'].rolling(window=72, min_periods=1).mean() + df['std_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).std() + df['max_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).max() + df['min_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).min() + + # 淇濆瓨澶勭悊鍚庣殑鏁版嵁 + save_processed_data(df) + +if df is not None: + run_gui() +else: + print("鏁版嵁鍔犺浇澶辫触锛屾棤娉曡繍琛岄娴嬨��") -- Gitblit v1.9.3