¶Ô±ÈÐÂÎļþ |
| | |
| | | # xgboostä¿®æ¹çæ¬ |
| | | import os |
| | | import pickle |
| | | import pandas as pd |
| | | import numpy as np |
| | | from numpy.lib.stride_tricks import sliding_window_view |
| | | import tkinter as tk |
| | | import tkinter.font as tkfont |
| | | from tkinter import ttk |
| | | from datetime import timedelta |
| | | from time import time |
| | | import matplotlib.pyplot as plt |
| | | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk |
| | | from xgboost import XGBRegressor |
| | | from lunardate import LunarDate |
| | | from sklearn.model_selection import train_test_split, TimeSeriesSplit |
| | | from sklearn.metrics import mean_squared_error, mean_absolute_error |
| | | import matplotlib |
| | | |
| | | # é
ç½® matplotlib 䏿æ¾ç¤º |
| | | matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'SimSun', 'Arial Unicode MS'] |
| | | matplotlib.rcParams['axes.unicode_minus'] = False |
| | | matplotlib.rcParams['font.family'] = 'sans-serif' |
| | | |
| | | # å
¨å±ç¼ååéåç¹å¾åç§° |
| | | cached_model = None |
| | | last_training_time = None |
| | | feature_columns = None |
| | | current_view = {'xlim': None, 'ylim': None} # ç¨äºåå¨å½åå¾è¡¨è§å¾ |
| | | |
| | | # æ°æ®å è½½ä¸é¢å¤ç彿° |
| | | # ------------------------------- |
| | | def load_data(upstream_file, downstream_file, river_level_file=None, flow_file=None, rainfall_file=None): |
| | | """ |
| | | å è½½ææç¸å
³æ°æ®å¹¶è¿è¡æ°æ®è´¨éå¤ç |
| | | """ |
| | | try: |
| | | # 读å䏿¸¸å䏿¸¸æ°æ® |
| | | upstream_df = pd.read_csv(upstream_file) |
| | | downstream_df = pd.read_csv(downstream_file) |
| | | except FileNotFoundError: |
| | | print("æä»¶æªæ¾å°ï¼è¯·æ£æ¥è·¯å¾") |
| | | return None |
| | | |
| | | # ç¡®ä¿ååä¸è´ |
| | | upstream_df.columns = ['DateTime', 'TagName', 'Value'] |
| | | downstream_df.columns = ['DateTime', 'TagName', 'Value'] |
| | | |
| | | # è½¬æ¢æ¶é´æ ¼å¼å¹¶è®¾ç½®ä¸ºç´¢å¼ |
| | | upstream_df['DateTime'] = pd.to_datetime(upstream_df['DateTime']) |
| | | downstream_df['DateTime'] = pd.to_datetime(downstream_df['DateTime']) |
| | | |
| | | # 设置DateTimeä¸ºç´¢å¼ |
| | | upstream_df.set_index('DateTime', inplace=True) |
| | | downstream_df.set_index('DateTime', inplace=True) |
| | | |
| | | # æ°å¼å¤ç - ä½¿ç¨æ´ç¨³å¥çè½¬æ¢æ¹æ³ |
| | | for df in [upstream_df, downstream_df]: |
| | | df['Value'] = pd.to_numeric(df['Value'], errors='coerce') |
| | | # 使ç¨IQRæ¹æ³æ£æµå¼å¸¸å¼ |
| | | Q1 = df['Value'].quantile(0.25) |
| | | Q3 = df['Value'].quantile(0.75) |
| | | IQR = Q3 - Q1 |
| | | lower_bound = Q1 - 1.5 * IQR |
| | | upper_bound = Q3 + 1.5 * IQR |
| | | # å°å¼å¸¸å¼æ¿æ¢ä¸ºè¾¹çå¼ |
| | | df.loc[df['Value'] < lower_bound, 'Value'] = lower_bound |
| | | df.loc[df['Value'] > upper_bound, 'Value'] = upper_bound |
| | | |
| | | # è¿æ»¤ç度å°äº5çæ°æ® |
| | | upstream_df = upstream_df[upstream_df['Value'] >= 5] |
| | | downstream_df = downstream_df[downstream_df['Value'] >= 5] |
| | | |
| | | # éå½åValueå |
| | | upstream_df = upstream_df.rename(columns={'Value': 'upstream'})[['upstream']] |
| | | downstream_df = downstream_df.rename(columns={'Value': 'downstream'})[['downstream']] |
| | | |
| | | # åå¹¶æ°æ® |
| | | merged_df = pd.merge(upstream_df, downstream_df, left_index=True, right_index=True, how='inner') |
| | | |
| | | # å è½½é¿æ±æ°´ä½æ°æ®ï¼å¦ææä¾ï¼ |
| | | if river_level_file: |
| | | try: |
| | | river_level_df = pd.read_csv(river_level_file) |
| | | print(f"æåè¯»åæ°´ä½æ°æ®æä»¶: {river_level_file}") |
| | | |
| | | # ç¡®ä¿ååä¸è´ |
| | | if len(river_level_df.columns) >= 3: |
| | | river_level_df.columns = ['DateTime', 'TagName', 'Value'] |
| | | elif len(river_level_df.columns) == 2: |
| | | river_level_df.columns = ['DateTime', 'Value'] |
| | | river_level_df['TagName'] = 'water_level' |
| | | |
| | | # æ°æ®å¤ç |
| | | river_level_df['DateTime'] = pd.to_datetime(river_level_df['DateTime']) |
| | | river_level_df.set_index('DateTime', inplace=True) |
| | | river_level_df['Value'] = pd.to_numeric(river_level_df['Value'], errors='coerce') |
| | | |
| | | # 使ç¨IQRæ¹æ³å¤çå¼å¸¸å¼ |
| | | Q1 = river_level_df['Value'].quantile(0.25) |
| | | Q3 = river_level_df['Value'].quantile(0.75) |
| | | IQR = Q3 - Q1 |
| | | lower_bound = Q1 - 1.5 * IQR |
| | | upper_bound = Q3 + 1.5 * IQR |
| | | river_level_df.loc[river_level_df['Value'] < lower_bound, 'Value'] = lower_bound |
| | | river_level_df.loc[river_level_df['Value'] > upper_bound, 'Value'] = upper_bound |
| | | |
| | | # éå½åå¹¶ä¿çéè¦çå |
| | | river_level_df = river_level_df.rename(columns={'Value': 'water_level'})[['water_level']] |
| | | |
| | | # åå¹¶å°ä¸»æ°æ®æ¡ |
| | | merged_df = pd.merge(merged_df, river_level_df, left_index=True, right_index=True, how='left') |
| | | |
| | | # å¯¹æ°´ä½æ°æ®è¿è¡æå¼å¤ç |
| | | merged_df['water_level'] = merged_df['water_level'].interpolate(method='time', limit=24) |
| | | merged_df['water_level'] = merged_df['water_level'].fillna(method='ffill').fillna(method='bfill') |
| | | |
| | | # å建平æ»çæ°´ä½æ°æ® |
| | | merged_df['water_level_smooth'] = merged_df['water_level'].rolling(window=24, min_periods=1, center=True).mean() |
| | | |
| | | # æ·»å æ°´ä½è¶å¿ç¹å¾ |
| | | merged_df['water_level_trend_1h'] = merged_df['water_level_smooth'].diff(1) |
| | | merged_df['water_level_trend_24h'] = merged_df['water_level_smooth'].diff(24) |
| | | |
| | | print(f"æ°´ä½æ°æ®å è½½æåï¼èå´: {merged_df['water_level'].min()} - {merged_df['water_level'].max()}") |
| | | except Exception as e: |
| | | print(f"æ°´ä½æ°æ®å 载失败: {str(e)}") |
| | | |
| | | # å è½½å¤§éæµéæ°æ®ï¼å¦ææä¾ï¼ |
| | | if flow_file: |
| | | try: |
| | | flow_df = pd.read_csv(flow_file) |
| | | print(f"æåè¯»åæµéæ°æ®æä»¶: {flow_file}") |
| | | |
| | | # ç¡®ä¿ååä¸è´ |
| | | if len(flow_df.columns) >= 3: |
| | | flow_df.columns = ['DateTime', 'TagName', 'Value'] |
| | | elif len(flow_df.columns) == 2: |
| | | flow_df.columns = ['DateTime', 'Value'] |
| | | flow_df['TagName'] = 'flow' |
| | | |
| | | # æ°æ®å¤ç |
| | | flow_df['DateTime'] = pd.to_datetime(flow_df['DateTime']) |
| | | flow_df.set_index('DateTime', inplace=True) |
| | | flow_df['Value'] = pd.to_numeric(flow_df['Value'], errors='coerce') |
| | | |
| | | # 使ç¨IQRæ¹æ³å¤çå¼å¸¸å¼ |
| | | Q1 = flow_df['Value'].quantile(0.25) |
| | | Q3 = flow_df['Value'].quantile(0.75) |
| | | IQR = Q3 - Q1 |
| | | lower_bound = Q1 - 1.5 * IQR |
| | | upper_bound = Q3 + 1.5 * IQR |
| | | flow_df.loc[flow_df['Value'] < lower_bound, 'Value'] = lower_bound |
| | | flow_df.loc[flow_df['Value'] > upper_bound, 'Value'] = upper_bound |
| | | |
| | | # éå½åå¹¶ä¿çéè¦çå |
| | | flow_df = flow_df.rename(columns={'Value': 'flow'})[['flow']] |
| | | |
| | | # åå¹¶å°ä¸»æ°æ®æ¡ |
| | | merged_df = pd.merge(merged_df, flow_df, left_index=True, right_index=True, how='left') |
| | | |
| | | # 对æµéæ°æ®è¿è¡æå¼å¤ç |
| | | merged_df['flow'] = merged_df['flow'].interpolate(method='time', limit=24) |
| | | merged_df['flow'] = merged_df['flow'].fillna(method='ffill').fillna(method='bfill') |
| | | |
| | | # å建平æ»çæµéæ°æ® |
| | | merged_df['flow_smooth'] = merged_df['flow'].rolling(window=24, min_periods=1, center=True).mean() |
| | | |
| | | # æ·»å æµéè¶å¿ç¹å¾ |
| | | merged_df['flow_trend_1h'] = merged_df['flow_smooth'].diff(1) |
| | | merged_df['flow_trend_24h'] = merged_df['flow_smooth'].diff(24) |
| | | |
| | | # æ·»å æµéç»è®¡ç¹å¾ |
| | | merged_df['mean_1d_flow'] = merged_df['flow_smooth'].rolling(window=24, min_periods=1).mean() |
| | | merged_df['mean_3d_flow'] = merged_df['flow_smooth'].rolling(window=72, min_periods=1).mean() |
| | | merged_df['std_1d_flow'] = merged_df['flow_smooth'].rolling(window=24, min_periods=1).std() |
| | | |
| | | # æ·»å æµéååç¹å¾ |
| | | merged_df['flow_change_1h'] = merged_df['flow_smooth'].diff(1) |
| | | merged_df['flow_change_24h'] = merged_df['flow_smooth'].diff(24) |
| | | |
| | | # æ·»å æµéä¸ç度æ¯çï¼ç¡®ä¿ä¸æ¸¸å¹³æ»æ°æ®å·²åå»ºï¼ |
| | | if 'downstream_smooth' in merged_df.columns: |
| | | merged_df['flow_sal_ratio'] = merged_df['flow_smooth'] / merged_df['downstream_smooth'] |
| | | else: |
| | | print("è¦å: 䏿¸¸å¹³æ»æ°æ®æªå建ï¼è·³è¿flow_sal_ratio计ç®") |
| | | |
| | | print(f"æµéæ°æ®å è½½æåï¼èå´: {merged_df['flow'].min()} - {merged_df['flow'].max()} m³/s") |
| | | except Exception as e: |
| | | print(f"æµéæ°æ®å 载失败: {str(e)}") |
| | | |
| | | # å è½½éé¨éæ°æ®ï¼å¦ææä¾ï¼ |
| | | if rainfall_file: |
| | | try: |
| | | rainfall_df = pd.read_csv(rainfall_file) |
| | | print(f"æå读åéé¨éæ°æ®æä»¶: {rainfall_file}") |
| | | |
| | | # ç¡®ä¿ååä¸è´ |
| | | if len(rainfall_df.columns) >= 3: |
| | | rainfall_df.columns = ['DateTime', 'TagName', 'Value'] |
| | | elif len(rainfall_df.columns) == 2: |
| | | rainfall_df.columns = ['DateTime', 'Value'] |
| | | rainfall_df['TagName'] = 'rainfall' |
| | | |
| | | # æ°æ®å¤ç |
| | | rainfall_df['DateTime'] = pd.to_datetime(rainfall_df['DateTime']) |
| | | rainfall_df.set_index('DateTime', inplace=True) |
| | | rainfall_df['Value'] = pd.to_numeric(rainfall_df['Value'], errors='coerce') |
| | | |
| | | # 对äºéé¨éï¼åªå¤çå¼å¸¸å¤§çå¼ |
| | | Q3 = rainfall_df['Value'].quantile(0.75) |
| | | IQR = rainfall_df['Value'].quantile(0.75) - rainfall_df['Value'].quantile(0.25) |
| | | upper_bound = Q3 + 3 * IQR |
| | | rainfall_df.loc[rainfall_df['Value'] > upper_bound, 'Value'] = upper_bound |
| | | |
| | | # éå½åå¹¶ä¿çéè¦çå |
| | | rainfall_df = rainfall_df.rename(columns={'Value': 'rainfall'})[['rainfall']] |
| | | |
| | | # åå¹¶å°ä¸»æ°æ®æ¡ |
| | | merged_df = pd.merge(merged_df, rainfall_df, left_index=True, right_index=True, how='left') |
| | | |
| | | # 对éé¨éæ°æ®è¿è¡å¤ç |
| | | merged_df['rainfall'] = merged_df['rainfall'].fillna(0) # å°NaNæ¿æ¢ä¸º0ï¼è¡¨ç¤ºæªéé¨ï¼ |
| | | merged_df['rainfall_smooth'] = merged_df['rainfall'].rolling(window=6, min_periods=1, center=True).mean() |
| | | |
| | | # 计ç®ç´¯è®¡éé¨éç¹å¾ |
| | | merged_df['sum_1d_rainfall'] = merged_df['rainfall'].rolling(window=24, min_periods=1).sum() |
| | | merged_df['sum_3d_rainfall'] = merged_df['rainfall'].rolling(window=72, min_periods=1).sum() |
| | | |
| | | # 计ç®éé¨å¼ºåº¦ç¹å¾ |
| | | merged_df['rainfall_intensity_1h'] = merged_df['rainfall'].rolling(window=1, min_periods=1).mean() |
| | | merged_df['rainfall_intensity_6h'] = merged_df['rainfall'].rolling(window=6, min_periods=1).mean() |
| | | |
| | | # æ·»å éé¨éè¶å¿ç¹å¾ |
| | | merged_df['rainfall_trend_1h'] = merged_df['rainfall_smooth'].diff(1) |
| | | merged_df['rainfall_trend_24h'] = merged_df['rainfall_smooth'].diff(24) |
| | | |
| | | print(f"éé¨éæ°æ®å è½½æåï¼èå´: {merged_df['rainfall'].min()} - {merged_df['rainfall'].max()} mm") |
| | | except Exception as e: |
| | | print(f"éé¨éæ°æ®å 载失败: {str(e)}") |
| | | import traceback |
| | | traceback.print_exc() |
| | | |
| | | # 对çåº¦æ°æ®è¿è¡æå¼åå¹³æ»å¤ç |
| | | merged_df['upstream'] = merged_df['upstream'].interpolate(method='time', limit=24) |
| | | merged_df['downstream'] = merged_df['downstream'].interpolate(method='time', limit=24) |
| | | |
| | | # 使ç¨ååååå¡«å
å¤çå©ä½çNaNå¼ |
| | | merged_df['upstream'] = merged_df['upstream'].fillna(method='ffill').fillna(method='bfill') |
| | | merged_df['downstream'] = merged_df['downstream'].fillna(method='ffill').fillna(method='bfill') |
| | | |
| | | # å建平æ»ççåº¦æ°æ® |
| | | merged_df['upstream_smooth'] = merged_df['upstream'].rolling(window=24, min_periods=1, center=True).mean() |
| | | merged_df['downstream_smooth'] = merged_df['downstream'].rolling(window=24, min_periods=1, center=True).mean() |
| | | |
| | | # æ·»å 䏿¸¸å䏿¸¸è¶å¿ç¹å¾ |
| | | merged_df['upstream_trend_1h'] = merged_df['upstream_smooth'].diff(1) |
| | | merged_df['upstream_trend_24h'] = merged_df['upstream_smooth'].diff(24) |
| | | merged_df['downstream_trend_1h'] = merged_df['downstream_smooth'].diff(1) |
| | | merged_df['downstream_trend_24h'] = merged_df['downstream_smooth'].diff(24) |
| | | |
| | | # 对ä½ç度é¨åä½¿ç¨æ´å¤§ççªå£è¿è¡å¹³æ» |
| | | low_sal_mask = merged_df['upstream'] < 50 |
| | | if low_sal_mask.any(): |
| | | merged_df.loc[low_sal_mask, 'upstream_smooth'] = merged_df.loc[low_sal_mask, 'upstream']\ |
| | | .rolling(window=48, min_periods=1, center=True).mean() |
| | | |
| | | # æ°æ®éªè¯åç»è®¡ |
| | | print("\næ°æ®è´¨éç»è®¡:") |
| | | print(f"æ»æ°æ®é: {len(merged_df)}") |
| | | print(f"䏿¸¸ç度èå´: {merged_df['upstream'].min():.2f} - {merged_df['upstream'].max():.2f}") |
| | | print(f"䏿¸¸ç度èå´: {merged_df['downstream'].min():.2f} - {merged_df['downstream'].max():.2f}") |
| | | |
| | | if 'water_level' in merged_df.columns: |
| | | print(f"æ°´ä½èå´: {merged_df['water_level'].min():.2f} - {merged_df['water_level'].max():.2f}") |
| | | print(f"æ°´ä½ç¼ºå¤±æ¯ä¾: {merged_df['water_level'].isna().mean()*100:.2f}%") |
| | | |
| | | if 'flow' in merged_df.columns: |
| | | print(f"æµéèå´: {merged_df['flow'].min():.2f} - {merged_df['flow'].max():.2f} m³/s") |
| | | print(f"æµé缺失æ¯ä¾: {merged_df['flow'].isna().mean()*100:.2f}%") |
| | | |
| | | if 'rainfall' in merged_df.columns: |
| | | print(f"éé¨éèå´: {merged_df['rainfall'].min():.2f} - {merged_df['rainfall'].max():.2f} mm") |
| | | print(f"éé¨é缺失æ¯ä¾: {merged_df['rainfall'].isna().mean()*100:.2f}%") |
| | | |
| | | # é置索å¼ï¼å°DateTimeä½ä¸ºå |
| | | merged_df = merged_df.reset_index() |
| | | |
| | | return merged_df |
| | | |
| | | # df = load_data('é龿¸¯1.csv', 'ä¸åæ°´.csv') |
| | | |
| | | # æµè¯ |
| | | # df = load_data('é龿¸¯1.csv', 'ä¸åæ°´.csv') |
| | | # df.to_csv('merged_data.csv', index=False) |
| | | # print(f"Merged data saved to 'merged_data.csv' successfully") |
| | | |
| | | # # ç»å¶çåº¦éæ¶é´ååå¾ |
| | | # plt.figure(figsize=(12, 6)) |
| | | # plt.plot(df['DateTime'], df['upstream_smooth'], label='䏿¸¸ç度', color='blue') |
| | | # plt.plot(df['DateTime'], df['downstream_smooth'], label='䏿¸¸ç度', color='red') |
| | | # plt.xlabel('æ¶é´') |
| | | # plt.ylabel('ç度') |
| | | # plt.title('çåº¦éæ¶é´ååå¾') |
| | | # plt.legend() |
| | | # plt.grid(True) |
| | | # plt.tight_layout() |
| | | # plt.savefig('salinity_time_series.png', dpi=300) |
| | | # plt.show() |
| | | |
| | | |
| | | #ç¹å¾å·¥ç¨é¨å |
| | | |
| | | |
| | | # ------------------------------- |
| | | # æ·»å ååï¼æ½®æ±ï¼ç¹å¾ |
| | | # ------------------------------- |
| | | def add_lunar_features(df): |
| | | lunar_day, lunar_phase_sin, lunar_phase_cos, is_high_tide = [], [], [], [] |
| | | for dt in df['DateTime']: |
| | | ld = LunarDate.fromSolarDate(dt.year, dt.month, dt.day) |
| | | lunar_day.append(ld.day) |
| | | lunar_phase_sin.append(np.sin(2 * np.pi * ld.day / 15)) |
| | | lunar_phase_cos.append(np.cos(2 * np.pi * ld.day / 15)) |
| | | is_high_tide.append(1 if (ld.day <= 5 or (ld.day >= 16 and ld.day <= 20)) else 0) |
| | | df['lunar_day'] = lunar_day |
| | | df['lunar_phase_sin'] = lunar_phase_sin |
| | | df['lunar_phase_cos'] = lunar_phase_cos |
| | | df['is_high_tide'] = is_high_tide |
| | | return df |
| | | |
| | | |
| | | # ------------------------------- |
| | | # çæå»¶è¿ç¹å¾ï¼åéåï¼å©ç¨ shiftï¼ |
| | | # ------------------------------- |
| | | def batch_create_delay_features(df, delay_hours): |
| | | """ |
| | | ä¸ºæ°æ®æ¡ä¸çç¹å®åå建延è¿ç¹å¾ |
| | | """ |
| | | # å®ä¹éè¦å建延è¿ç¹å¾çå |
| | | target_columns = ['upstream_smooth', 'downstream_smooth'] |
| | | |
| | | # 妿å卿°´ä½æ°æ®åï¼ä¹ä¸ºå®å建延è¿ç¹å¾ |
| | | if 'water_level_smooth' in df.columns: |
| | | target_columns.append('water_level_smooth') |
| | | elif 'water_level' in df.columns: |
| | | print("注æ: æ°´ä½å¹³æ»åä¸åå¨ï¼ä½¿ç¨åå§æ°´ä½åå建延è¿ç¹å¾") |
| | | # å建水ä½å¹³æ»å |
| | | df['water_level_smooth'] = df['water_level'].rolling(window=24, min_periods=1, center=True).mean() |
| | | df['water_level_smooth'] = df['water_level_smooth'].fillna(df['water_level']) |
| | | target_columns.append('water_level_smooth') |
| | | |
| | | # å建延è¿ç¹å¾ |
| | | for column in target_columns: |
| | | if column in df.columns: |
| | | for delay in delay_hours: |
| | | df[f'{column.split("_")[0]}_delay_{delay}h'] = df[column].shift(delay) |
| | | else: |
| | | print(f"è¦å: å {column} ä¸åå¨ï¼è·³è¿å建延è¿ç¹å¾") |
| | | |
| | | return df |
| | | |
| | | |
| | | |
| | | # ------------------------------- |
| | | # åéåæé è®ç»æ ·æ¬ |
| | | # ------------------------------- |
| | | def create_features_vectorized(df, look_back=96, forecast_horizon=1): |
| | | """ |
| | | ç¢éåçæ¬çç¹å¾åå»ºå½æ° - ä½¿ç¨æ»å¨çªå£æ¹æ³é«æå建ç¹å¾ |
| | | """ |
| | | print("å¼å§å建ç¢éåç¹å¾...") |
| | | |
| | | # æ£æ¥æ°æ®éæ¯å¦è¶³å¤ |
| | | if len(df) <= look_back + forecast_horizon: |
| | | print(f"é误: æ°æ®é({len(df)})ä¸è¶³ï¼éè¦è³å° {look_back + forecast_horizon + 1} ä¸ªæ ·æ¬") |
| | | return np.array([]), np.array([]) |
| | | |
| | | # 计ç®å¯ä»¥çæçæ ·æ¬æ»æ° |
| | | total_samples = len(df) - look_back - forecast_horizon + 1 |
| | | print(f"åå§å¯ç¨æ ·æ¬æ°: {total_samples}") |
| | | |
| | | # ç¡®ä¿å¿
è¦çååå¨ |
| | | required_features = ['upstream_smooth', 'downstream_smooth', 'DateTime', |
| | | 'lunar_phase_sin', 'lunar_phase_cos', 'is_high_tide'] |
| | | |
| | | # æ·»å å¯éç¹å¾ |
| | | optional_features = { |
| | | 'water_level': ['water_level_smooth', 'mean_1d_water_level', 'mean_3d_water_level', 'std_1d_water_level', |
| | | 'water_level_change_1h', 'water_level_change_24h', 'water_level_sal_ratio', |
| | | 'water_level_trend_1h', 'water_level_trend_24h'], |
| | | 'flow': ['flow_smooth', 'mean_1d_flow', 'mean_3d_flow', 'std_1d_flow', |
| | | 'flow_change_1h', 'flow_change_24h', 'flow_sal_ratio', |
| | | 'flow_trend_1h', 'flow_trend_24h'], |
| | | 'rainfall': ['rainfall_smooth', 'sum_1d_rainfall', 'sum_3d_rainfall', |
| | | 'rainfall_intensity_1h', 'rainfall_intensity_6h', |
| | | 'rainfall_trend_1h', 'rainfall_trend_24h'] |
| | | } |
| | | |
| | | # æ£æ¥å¹¶æ·»å 缺失çç¹å¾ |
| | | for feature in required_features: |
| | | if feature not in df.columns: |
| | | print(f"è¦å: 缺å°å¿
è¦ç¹å¾ {feature}ï¼å°ä½¿ç¨é»è®¤å¼å¡«å
") |
| | | df[feature] = 0 |
| | | |
| | | # æ£æ¥å¹¶æ·»å å¯éç¹å¾ |
| | | for feature_group, features in optional_features.items(): |
| | | # æ£æ¥åºç¡ç¹å¾ï¼ä¾å¦æ°´ä½ãæµéãéé¨éï¼æ¯å¦åå¨ |
| | | if any(col.startswith(feature_group) for col in df.columns): |
| | | for feature in features: |
| | | if feature not in df.columns: |
| | | print(f"è¦å: 缺å°å¯éç¹å¾ {feature}ï¼å°ä½¿ç¨é»è®¤å¼å¡«å
") |
| | | df[feature] = 0 |
| | | |
| | | # 1. å¢å¼ºåå²çªå£ç¹å¾ |
| | | # ä½¿ç¨æ´é¿çåå²çªå£ |
| | | extended_look_back = max(look_back, 168) # è³å°7天 |
| | | upstream_array = df['upstream_smooth'].values |
| | | # 计ç®å¯ä»¥çæçæå¤§æ ·æ¬æ°é |
| | | max_samples = len(upstream_array) - extended_look_back |
| | | # è°æ´total_samplesï¼ç¡®ä¿ä¸è¶
è¿å¯ç¨æ°æ®é |
| | | total_samples = min(total_samples, max_samples) |
| | | |
| | | window_up = sliding_window_view(upstream_array, window_shape=extended_look_back)[:total_samples, :] |
| | | |
| | | # 䏿¸¸æè¿ 24 å°æ¶ï¼å©ç¨æ»å¨çªå£æé ï¼çªå£å¤§å°ä¸º 24 |
| | | downstream_array = df['downstream_smooth'].values |
| | | window_down_full = sliding_window_view(downstream_array, window_shape=24) |
| | | # ä¿®å¤ï¼ç¡®ä¿window_downçæ ·æ¬æ°éä¸window_upä¸è´ï¼ä½¿ç¨ç¸åçtotal_samples |
| | | window_down = window_down_full[look_back-24 : look_back-24 + total_samples, :] |
| | | |
| | | # æå°è°è¯ä¿¡æ¯ |
| | | print(f"total_samples: {total_samples}") |
| | | print(f"window_up shape: {window_up.shape}") |
| | | print(f"window_down shape: {window_down.shape}") |
| | | |
| | | # 2. å¢å¼ºæ¶é´ç¹å¾ |
| | | # ç¡®ä¿sample_dfç大å°ä¸çªå£æ°ç»ä¸è´ |
| | | sample_df = df.iloc[look_back: look_back + total_samples].copy() |
| | | hour = sample_df['DateTime'].dt.hour.values.reshape(-1, 1) |
| | | weekday = sample_df['DateTime'].dt.dayofweek.values.reshape(-1, 1) |
| | | month = sample_df['DateTime'].dt.month.values.reshape(-1, 1) |
| | | day_of_year = sample_df['DateTime'].dt.dayofyear.values.reshape(-1, 1) |
| | | |
| | | # æ¶é´ç¹å¾çé«çº§è¡¨ç¤º |
| | | hour_sin = np.sin(2 * np.pi * hour / 24) |
| | | hour_cos = np.cos(2 * np.pi * hour / 24) |
| | | weekday_sin = np.sin(2 * np.pi * weekday / 7) |
| | | weekday_cos = np.cos(2 * np.pi * weekday / 7) |
| | | month_sin = np.sin(2 * np.pi * month / 12) |
| | | month_cos = np.cos(2 * np.pi * month / 12) |
| | | day_sin = np.sin(2 * np.pi * day_of_year / 365) |
| | | day_cos = np.cos(2 * np.pi * day_of_year / 365) |
| | | |
| | | # ç»åæ¶é´ç¹å¾ |
| | | basic_time_feats = np.hstack([hour_sin, hour_cos, weekday_sin, weekday_cos, |
| | | month_sin, month_cos, day_sin, day_cos]) |
| | | |
| | | # 3. å¢å¼ºååç¹å¾ |
| | | lunar_feats = sample_df[['lunar_phase_sin','lunar_phase_cos','is_high_tide']].values |
| | | |
| | | # 4. å¢å¼ºç»è®¡ç¹å¾ |
| | | # 䏿¸¸ç»è®¡ç¹å¾ - æ·»å æ´å¤æ¶é´çªå£ |
| | | stats_windows = [1, 3, 7, 14, 30] # 天 |
| | | for window in stats_windows: |
| | | hours = window * 24 |
| | | df[f'mean_{window}d_up'] = df['upstream_smooth'].rolling(window=hours, min_periods=1).mean() |
| | | df[f'std_{window}d_up'] = df['upstream_smooth'].rolling(window=hours, min_periods=1).std() |
| | | df[f'max_{window}d_up'] = df['upstream_smooth'].rolling(window=hours, min_periods=1).max() |
| | | df[f'min_{window}d_up'] = df['upstream_smooth'].rolling(window=hours, min_periods=1).min() |
| | | |
| | | df[f'mean_{window}d_down'] = df['downstream_smooth'].rolling(window=hours, min_periods=1).mean() |
| | | df[f'std_{window}d_down'] = df['downstream_smooth'].rolling(window=hours, min_periods=1).std() |
| | | df[f'max_{window}d_down'] = df['downstream_smooth'].rolling(window=hours, min_periods=1).max() |
| | | df[f'min_{window}d_down'] = df['downstream_smooth'].rolling(window=hours, min_periods=1).min() |
| | | |
| | | # 5. å¢å¼ºè¶å¿ç¹å¾ |
| | | # è®¡ç®æ´ç»ç²åº¦çè¶å¿ |
| | | trend_periods = [1, 3, 6, 12, 24, 48, 72, 168] # å°æ¶ |
| | | for period in trend_periods: |
| | | # 䏿¸¸è¶å¿ |
| | | df[f'upstream_trend_{period}h'] = df['upstream_smooth'].diff(period) |
| | | # 䏿¸¸è¶å¿ |
| | | df[f'downstream_trend_{period}h'] = df['downstream_smooth'].diff(period) |
| | | |
| | | # 6. å¢å¼ºååçç¹å¾ |
| | | # è®¡ç®æ´ç»ç²åº¦çååç |
| | | for period in trend_periods: |
| | | # 䏿¸¸ååç |
| | | df[f'upstream_change_rate_{period}h'] = df['upstream_smooth'].pct_change(period) |
| | | # 䏿¸¸ååç |
| | | df[f'downstream_change_rate_{period}h'] = df['downstream_smooth'].pct_change(period) |
| | | |
| | | # 7. å¢å¼ºç度差å¼ç¹å¾ |
| | | df['salinity_diff'] = df['upstream_smooth'] - df['downstream_smooth'] |
| | | for period in trend_periods: |
| | | df[f'salinity_diff_{period}h'] = df['salinity_diff'].diff(period) |
| | | |
| | | # 8. å¢å¼ºç度æ¯çç¹å¾ |
| | | df['salinity_ratio'] = df['upstream_smooth'] / df['downstream_smooth'] |
| | | for period in trend_periods: |
| | | df[f'salinity_ratio_{period}h'] = df['salinity_ratio'].diff(period) |
| | | |
| | | # 9. å¢å¼ºäº¤äºç¹å¾ |
| | | # 计ç®ä¸æ¸¸å䏿¸¸ç交äºç¹å¾ |
| | | df['up_down_interaction'] = df['upstream_smooth'] * df['downstream_smooth'] |
| | | df['up_down_ratio'] = df['upstream_smooth'] / df['downstream_smooth'] |
| | | df['up_down_diff'] = df['upstream_smooth'] - df['downstream_smooth'] |
| | | |
| | | # 10. å¢å¼ºå¨ææ§ç¹å¾ |
| | | # 计ç®å¤ä¸ªæ¶é´å°ºåº¦ç卿æ§ç¹å¾ |
| | | cycle_periods = [12, 24, 48, 72, 168] # å°æ¶ |
| | | for period in cycle_periods: |
| | | df[f'upstream_{period}h_cycle'] = df['upstream_smooth'].rolling(window=period, min_periods=1).mean() |
| | | df[f'downstream_{period}h_cycle'] = df['downstream_smooth'].rolling(window=period, min_periods=1).mean() |
| | | |
| | | # 11. å¢å¼ºèªç¸å
³ç¹å¾ |
| | | # 计ç®ä¸åæ¶é´çªå£çèªç¸å
³ç³»æ° |
| | | autocorr_windows = [24, 48, 72, 168] # å°æ¶ |
| | | for window in autocorr_windows: |
| | | # 䏿¸¸èªç¸å
³ |
| | | df[f'upstream_autocorr_{window}h'] = df['upstream_smooth'].rolling(window=window).apply( |
| | | lambda x: x.autocorr() if len(x) > 1 else 0 |
| | | ) |
| | | # 䏿¸¸èªç¸å
³ |
| | | df[f'downstream_autocorr_{window}h'] = df['downstream_smooth'].rolling(window=window).apply( |
| | | lambda x: x.autocorr() if len(x) > 1 else 0 |
| | | ) |
| | | |
| | | # 12. å¢å¼ºäºç¸å
³ç¹å¾ |
| | | # 计ç®ä¸ä¸æ¸¸ä¹é´çäºç¸å
³ç³»æ° |
| | | for window in autocorr_windows: |
| | | df[f'cross_corr_{window}h'] = df['upstream_smooth'].rolling(window=window).apply( |
| | | lambda x: x.corr(df['downstream_smooth'].iloc[x.index]) if len(x) > 1 else 0 |
| | | ) |
| | | |
| | | # æ´æ°æ ·æ¬æ°æ®æ¡ï¼å
嫿æå建çç¹å¾ |
| | | sample_df = df.iloc[look_back: look_back + total_samples].copy() |
| | | |
| | | # æ¶éææç¹å¾åå |
| | | # ç»è®¡ç¹å¾ |
| | | stats_cols = [] |
| | | for window in stats_windows: |
| | | stats_cols.extend([ |
| | | f'mean_{window}d_up', f'std_{window}d_up', f'max_{window}d_up', f'min_{window}d_up', |
| | | f'mean_{window}d_down', f'std_{window}d_down', f'max_{window}d_down', f'min_{window}d_down' |
| | | ]) |
| | | |
| | | # è¶å¿ç¹å¾ |
| | | trend_cols = [] |
| | | for period in trend_periods: |
| | | trend_cols.extend([f'upstream_trend_{period}h', f'downstream_trend_{period}h']) |
| | | |
| | | # ååçç¹å¾ |
| | | change_rate_cols = [] |
| | | for period in trend_periods: |
| | | change_rate_cols.extend([f'upstream_change_rate_{period}h', f'downstream_change_rate_{period}h']) |
| | | |
| | | # ç度差å¼ç¹å¾ |
| | | salinity_diff_cols = ['salinity_diff'] + [f'salinity_diff_{period}h' for period in trend_periods] |
| | | |
| | | # ç度æ¯çç¹å¾ |
| | | salinity_ratio_cols = ['salinity_ratio'] + [f'salinity_ratio_{period}h' for period in trend_periods] |
| | | |
| | | # 交äºç¹å¾ |
| | | interaction_cols = ['up_down_interaction', 'up_down_ratio', 'up_down_diff'] |
| | | |
| | | # 卿æ§ç¹å¾ |
| | | cycle_cols = [] |
| | | for period in cycle_periods: |
| | | cycle_cols.extend([f'upstream_{period}h_cycle', f'downstream_{period}h_cycle']) |
| | | |
| | | # èªç¸å
³ç¹å¾ |
| | | autocorr_cols = [] |
| | | for window in autocorr_windows: |
| | | autocorr_cols.extend([f'upstream_autocorr_{window}h', f'downstream_autocorr_{window}h']) |
| | | |
| | | # äºç¸å
³ç¹å¾ |
| | | cross_corr_cols = [f'cross_corr_{window}h' for window in autocorr_windows] |
| | | |
| | | # æ£æ¥ææç¹å¾æ¯å¦åå¨ |
| | | all_feature_cols = stats_cols + trend_cols + change_rate_cols + salinity_diff_cols + \ |
| | | salinity_ratio_cols + interaction_cols + cycle_cols + autocorr_cols + cross_corr_cols |
| | | |
| | | for col in all_feature_cols: |
| | | if col not in sample_df.columns: |
| | | print(f"è¦å: 缺å°ç¹å¾ {col}ï¼å°ä½¿ç¨é»è®¤å¼å¡«å
") |
| | | sample_df[col] = 0 |
| | | |
| | | # æåç¹å¾æ°ç» |
| | | stats_feats = sample_df[stats_cols].values |
| | | trend_feats = sample_df[trend_cols].values |
| | | change_rate_feats = sample_df[change_rate_cols].values |
| | | salinity_diff_feats = sample_df[salinity_diff_cols].values |
| | | salinity_ratio_feats = sample_df[salinity_ratio_cols].values |
| | | interaction_feats = sample_df[interaction_cols].values |
| | | cycle_feats = sample_df[cycle_cols].values |
| | | autocorr_feats = sample_df[autocorr_cols].values |
| | | cross_corr_feats = sample_df[cross_corr_cols].values |
| | | |
| | | # 13. å¢å¼ºå¤é¨ç¹å¾ |
| | | external_feats = [] |
| | | |
| | | # æ·»å æ°´ä½ç¹å¾ |
| | | if 'water_level' in sample_df.columns: |
| | | try: |
| | | # æ£æ¥æ°´ä½æ°æ®æ¯å¦è¶³å¤å¯ç¨ |
| | | valid_water_level_pct = (~sample_df['water_level'].isna()).mean() * 100 |
| | | if valid_water_level_pct < 60: |
| | | print(f"æ°´ä½æ°æ®å¯ç¨æ¯ä¾ï¼{valid_water_level_pct:.1f}%ï¼è¿ä½ï¼è·³è¿æ°´ä½ç¹å¾") |
| | | else: |
| | | print(f"æ·»å æ°´ä½ç¹å¾ï¼æ°æ®å¯ç¨ç: {valid_water_level_pct:.1f}%") |
| | | |
| | | # ä½¿ç¨æ°´ä½å¹³æ»æ°æ®ä½ä¸ºç¹å¾ |
| | | if 'water_level_smooth' in sample_df.columns: |
| | | water_level_smooth = sample_df['water_level_smooth'].values.reshape(-1, 1) |
| | | water_level_smooth = np.nan_to_num(water_level_smooth, nan=sample_df['water_level_smooth'].mean()) |
| | | external_feats.append(water_level_smooth) |
| | | |
| | | # æ·»å æ°´ä½çªå£æ°æ® |
| | | if 'water_level_smooth' in df.columns and len(df) >= look_back: |
| | | water_level_array = df['water_level_smooth'].values |
| | | water_level_array = np.nan_to_num(water_level_array, nan=np.nanmean(water_level_array)) |
| | | window_water_level = sliding_window_view(water_level_array, window_shape=48)[:total_samples, :] |
| | | window_water_level = window_water_level[:, ::4] # æ¯4å°æ¶åä¸ä¸ªç¹ï¼å
±12ä¸ªç¹ |
| | | external_feats.append(window_water_level) |
| | | |
| | | # æ·»å æ°´ä½ç»è®¡ç¹å¾ |
| | | if all(col in sample_df.columns for col in ['mean_1d_water_level', 'mean_3d_water_level', 'std_1d_water_level']): |
| | | water_level_stats = sample_df[['mean_1d_water_level', 'mean_3d_water_level', 'std_1d_water_level']].values |
| | | water_level_stats = np.nan_to_num(water_level_stats, nan=0) |
| | | external_feats.append(water_level_stats) |
| | | |
| | | # æ·»å æ°´ä½ååçç¹å¾ |
| | | if all(col in sample_df.columns for col in ['water_level_change_1h', 'water_level_change_24h']): |
| | | water_level_changes = sample_df[['water_level_change_1h', 'water_level_change_24h']].values |
| | | water_level_changes = np.nan_to_num(water_level_changes, nan=0) |
| | | external_feats.append(water_level_changes) |
| | | |
| | | # æ·»å æ°´ä½ä¸ç度æ¯ç |
| | | if 'water_level_sal_ratio' in sample_df.columns: |
| | | water_level_ratio = sample_df['water_level_sal_ratio'].values.reshape(-1, 1) |
| | | water_level_ratio = np.nan_to_num(water_level_ratio, nan=1) |
| | | external_feats.append(water_level_ratio) |
| | | |
| | | # æ·»å æ°´ä½è¶å¿ç¹å¾ |
| | | if all(col in sample_df.columns for col in ['water_level_trend_1h', 'water_level_trend_24h']): |
| | | water_level_trends = sample_df[['water_level_trend_1h', 'water_level_trend_24h']].values |
| | | water_level_trends = np.nan_to_num(water_level_trends, nan=0) |
| | | external_feats.append(water_level_trends) |
| | | |
| | | print(f"å·²æ·»å æ°´ä½ç¸å
³ç¹å¾: {len(external_feats)}ç»") |
| | | except Exception as e: |
| | | print(f"æ·»å æ°´ä½ç¹å¾æ¶åºé: {e}") |
| | | |
| | | # æ·»å æµéç¹å¾ |
| | | if 'flow' in sample_df.columns: |
| | | try: |
| | | valid_flow_pct = (~sample_df['flow'].isna()).mean() * 100 |
| | | if valid_flow_pct < 60: |
| | | print(f"æµéæ°æ®å¯ç¨æ¯ä¾ï¼{valid_flow_pct:.1f}%ï¼è¿ä½ï¼è·³è¿æµéç¹å¾") |
| | | else: |
| | | print(f"æ·»å æµéç¹å¾ï¼æ°æ®å¯ç¨ç: {valid_flow_pct:.1f}%") |
| | | |
| | | # ä½¿ç¨æµéå¹³æ»æ°æ®ä½ä¸ºç¹å¾ |
| | | if 'flow_smooth' in sample_df.columns: |
| | | flow_smooth = sample_df['flow_smooth'].values.reshape(-1, 1) |
| | | flow_smooth = np.nan_to_num(flow_smooth, nan=sample_df['flow_smooth'].mean()) |
| | | external_feats.append(flow_smooth) |
| | | |
| | | # æ·»å æµéçªå£æ°æ® |
| | | if 'flow_smooth' in df.columns and len(df) >= look_back: |
| | | flow_array = df['flow_smooth'].values |
| | | flow_array = np.nan_to_num(flow_array, nan=np.nanmean(flow_array)) |
| | | window_flow = sliding_window_view(flow_array, window_shape=48)[:total_samples, :] |
| | | window_flow = window_flow[:, ::4] # æ¯4å°æ¶åä¸ä¸ªç¹ï¼å
±12ä¸ªç¹ |
| | | external_feats.append(window_flow) |
| | | |
| | | # æ·»å æµéç»è®¡ç¹å¾ |
| | | if all(col in sample_df.columns for col in ['mean_1d_flow', 'mean_3d_flow', 'std_1d_flow']): |
| | | flow_stats = sample_df[['mean_1d_flow', 'mean_3d_flow', 'std_1d_flow']].values |
| | | flow_stats = np.nan_to_num(flow_stats, nan=0) |
| | | external_feats.append(flow_stats) |
| | | |
| | | # æ·»å æµéååçç¹å¾ |
| | | if all(col in sample_df.columns for col in ['flow_change_1h', 'flow_change_24h']): |
| | | flow_changes = sample_df[['flow_change_1h', 'flow_change_24h']].values |
| | | flow_changes = np.nan_to_num(flow_changes, nan=0) |
| | | external_feats.append(flow_changes) |
| | | |
| | | # æ·»å æµéä¸ç度æ¯ç |
| | | if 'flow_sal_ratio' in sample_df.columns: |
| | | flow_ratio = sample_df['flow_sal_ratio'].values.reshape(-1, 1) |
| | | flow_ratio = np.nan_to_num(flow_ratio, nan=1) |
| | | external_feats.append(flow_ratio) |
| | | |
| | | # æ·»å æµéè¶å¿ç¹å¾ |
| | | if all(col in sample_df.columns for col in ['flow_trend_1h', 'flow_trend_24h']): |
| | | flow_trends = sample_df[['flow_trend_1h', 'flow_trend_24h']].values |
| | | flow_trends = np.nan_to_num(flow_trends, nan=0) |
| | | external_feats.append(flow_trends) |
| | | |
| | | print(f"å·²æ·»å æµéç¸å
³ç¹å¾: {len(external_feats)}ç»") |
| | | except Exception as e: |
| | | print(f"æ·»å æµéç¹å¾æ¶åºé: {e}") |
| | | |
| | | # æ·»å éé¨éç¹å¾ |
| | | if 'rainfall' in sample_df.columns: |
| | | try: |
| | | valid_rainfall_pct = (~sample_df['rainfall'].isna()).mean() * 100 |
| | | if valid_rainfall_pct < 60: |
| | | print(f"éé¨éæ°æ®å¯ç¨æ¯ä¾ï¼{valid_rainfall_pct:.1f}%ï¼è¿ä½ï¼è·³è¿éé¨éç¹å¾") |
| | | else: |
| | | print(f"æ·»å éé¨éç¹å¾ï¼æ°æ®å¯ç¨ç: {valid_rainfall_pct:.1f}%") |
| | | |
| | | # 使ç¨å¹³æ»åçéé¨éæ°æ® |
| | | if 'rainfall_smooth' in sample_df.columns: |
| | | rainfall_smooth = sample_df['rainfall_smooth'].values.reshape(-1, 1) |
| | | rainfall_smooth = np.nan_to_num(rainfall_smooth, nan=0) |
| | | external_feats.append(rainfall_smooth) |
| | | |
| | | # æ·»å 累计éé¨éç¹å¾ |
| | | if all(col in sample_df.columns for col in ['sum_1d_rainfall', 'sum_3d_rainfall']): |
| | | rainfall_sums = sample_df[['sum_1d_rainfall', 'sum_3d_rainfall']].values |
| | | rainfall_sums = np.nan_to_num(rainfall_sums, nan=0) |
| | | external_feats.append(rainfall_sums) |
| | | |
| | | # æ·»å éé¨å¼ºåº¦ç¹å¾ |
| | | if all(col in sample_df.columns for col in ['rainfall_intensity_1h', 'rainfall_intensity_6h']): |
| | | rainfall_intensity = sample_df[['rainfall_intensity_1h', 'rainfall_intensity_6h']].values |
| | | rainfall_intensity = np.nan_to_num(rainfall_intensity, nan=0) |
| | | external_feats.append(rainfall_intensity) |
| | | |
| | | # æ·»å éé¨éçªå£æ°æ®ï¼å¦æåå¨ï¼ |
| | | if 'rainfall_smooth' in df.columns and len(df) >= look_back: |
| | | rainfall_array = df['rainfall_smooth'].values |
| | | try: |
| | | # å¤çå¯è½çNaNå¼ |
| | | rainfall_array = np.nan_to_num(rainfall_array, nan=0) |
| | | |
| | | # æå»ºéé¨éçåå²çªå£æ°æ® |
| | | window_rainfall = sliding_window_view(rainfall_array, window_shape=24)[:total_samples, :] |
| | | # åªå24å°æ¶ä¸çå
³é®ç¹ä»¥åå°ç»´åº¦ |
| | | window_rainfall = window_rainfall[:, ::2] # æ¯2å°æ¶åä¸ä¸ªç¹ï¼å
±12ä¸ªç¹ |
| | | external_feats.append(window_rainfall) |
| | | except Exception as e: |
| | | print(f"å建éé¨éçªå£ç¹å¾æ¶åºé: {e}") |
| | | |
| | | print(f"已添å éé¨éç¸å
³ç¹å¾: {len(external_feats)}ç»") |
| | | except Exception as e: |
| | | print(f"æ·»å éé¨éç¹å¾æ¶åºé: {e}") |
| | | import traceback |
| | | traceback.print_exc() |
| | | |
| | | # æå°ææç¹å¾çå½¢ç¶ï¼ç¨äºè°è¯ |
| | | print(f"window_up shape: {window_up.shape}") |
| | | print(f"window_down shape: {window_down.shape}") |
| | | print(f"basic_time_feats shape: {basic_time_feats.shape}") |
| | | print(f"lunar_feats shape: {lunar_feats.shape}") |
| | | print(f"stats_feats shape: {stats_feats.shape}") |
| | | print(f"trend_feats shape: {trend_feats.shape}") |
| | | print(f"change_rate_feats shape: {change_rate_feats.shape}") |
| | | print(f"salinity_diff_feats shape: {salinity_diff_feats.shape}") |
| | | print(f"salinity_ratio_feats shape: {salinity_ratio_feats.shape}") |
| | | print(f"interaction_feats shape: {interaction_feats.shape}") |
| | | print(f"cycle_feats shape: {cycle_feats.shape}") |
| | | print(f"autocorr_feats shape: {autocorr_feats.shape}") |
| | | print(f"cross_corr_feats shape: {cross_corr_feats.shape}") |
| | | |
| | | # æ¼æ¥ææç¹å¾ |
| | | X = np.hstack([window_up, window_down, basic_time_feats, lunar_feats, |
| | | stats_feats, trend_feats, change_rate_feats, |
| | | salinity_diff_feats, salinity_ratio_feats, interaction_feats, |
| | | cycle_feats, autocorr_feats, cross_corr_feats]) |
| | | |
| | | if external_feats: |
| | | try: |
| | | # æå°å¤é¨ç¹å¾çå½¢ç¶ |
| | | for i, feat in enumerate(external_feats): |
| | | print(f"external_feat_{i} shape: {feat.shape}") |
| | | |
| | | X = np.hstack([X] + external_feats) |
| | | except Exception as e: |
| | | print(f"æ¼æ¥å¤é¨ç¹å¾æ¶åºé: {e}ï¼å°è·³è¿å¤é¨ç¹å¾") |
| | | import traceback |
| | | traceback.print_exc() |
| | | |
| | | # æç»æ£æ¥ï¼ç¡®ä¿æ²¡æNaNææ ç©·å¤§å¼ |
| | | if np.isnan(X).any() or np.isinf(X).any(): |
| | | print("è¦å: ç¹å¾ä¸åç°NaNææ ç©·å¤§å¼ï¼å°è¿è¡æ¿æ¢") |
| | | X = np.nan_to_num(X, nan=0, posinf=1e6, neginf=-1e6) |
| | | |
| | | # æé æ ç¾ - 忥颿µï¼åªåä¸ä¸ªå¼ |
| | | y = downstream_array[look_back:look_back + total_samples].reshape(-1, 1) |
| | | |
| | | global feature_columns |
| | | feature_columns = ["combined_vector_features"] |
| | | print(f"åéåç¹å¾å·¥ç¨å®æï¼ç¹å¾ç»´åº¦: {X.shape[1]}") |
| | | return X, y |
| | | |
| | | |
| | | |
| | | |
| | | # ------------------------------- |
| | | # è·å模ååç¡®åº¦ææ |
| | | # ------------------------------- |
| | | def get_model_metrics(): |
| | | """è·åä¿å卿¨¡åç¼åä¸çåç¡®åº¦ææ """ |
| | | model_cache_file = 'salinity_model.pkl' |
| | | if os.path.exists(model_cache_file): |
| | | try: |
| | | with open(model_cache_file, 'rb') as f: |
| | | model_data = pickle.load(f) |
| | | return { |
| | | 'rmse': model_data.get('rmse', None), |
| | | 'mae': model_data.get('mae', None) |
| | | } |
| | | except Exception as e: |
| | | print(f"è·åæ¨¡åææ 失败: {e}") |
| | | return None |
| | | |
| | | # ------------------------------- |
| | | # 模åè®ç»ä¸é¢æµï¼å±ç¤ºéªè¯å确度ï¼RMSE, MAEï¼ |
| | | # ------------------------------- |
| | | def train_and_predict(df, start_time, force_retrain=False): |
| | | global cached_model, last_training_time |
| | | model_cache_file = 'salinity_model.pkl' |
| | | model_needs_training = True |
| | | |
| | | if os.path.exists(model_cache_file) and force_retrain: |
| | | try: |
| | | os.remove(model_cache_file) |
| | | print("å·²å 餿§æ¨¡åç¼åï¼å¼ºå¶éæ°è®ç»ï¼") |
| | | except Exception as e: |
| | | print("å é¤ç¼åå¼å¸¸:", e) |
| | | |
| | | train_df = df[df['DateTime'] < start_time].copy() |
| | | |
| | | # å建æµè¯ç¹å¾ï¼æ£æ¥å½åç¹å¾ç»´åº¦ |
| | | test_X, _ = create_features_vectorized(train_df, look_back=96, forecast_horizon=1) |
| | | current_feature_dim = test_X.shape[1] if len(test_X) > 0 else 0 |
| | | print(f"å½åç¹å¾ç»´åº¦: {current_feature_dim}") |
| | | |
| | | cached_feature_dim = None |
| | | |
| | | if not force_retrain and cached_model is not None and last_training_time is not None: |
| | | if last_training_time >= train_df['DateTime'].max(): |
| | | try: |
| | | cached_feature_dim = cached_model.n_features_in_ |
| | | print(f"ç¼å模åç¹å¾ç»´åº¦: {cached_feature_dim}") |
| | | |
| | | if cached_feature_dim == current_feature_dim: |
| | | model_needs_training = False |
| | | print(f"使ç¨ç¼å模åï¼è®ç»æ¶é´: {last_training_time}") |
| | | else: |
| | | print(f"ç¹å¾ç»´åº¦ä¸å¹é
ï¼ç¼å模å: {cached_feature_dim}ï¼å½å: {current_feature_dim}ï¼ï¼éè¦éæ°è®ç»") |
| | | except Exception as e: |
| | | print(f"æ£æ¥æ¨¡åç¹å¾ç»´åº¦å¤±è´¥: {e}") |
| | | elif not force_retrain and os.path.exists(model_cache_file): |
| | | try: |
| | | with open(model_cache_file, 'rb') as f: |
| | | model_data = pickle.load(f) |
| | | cached_model = model_data['model'] |
| | | last_training_time = model_data['training_time'] |
| | | |
| | | try: |
| | | cached_feature_dim = cached_model.n_features_in_ |
| | | print(f"æä»¶ç¼å模åç¹å¾ç»´åº¦: {cached_feature_dim}") |
| | | |
| | | if cached_feature_dim == current_feature_dim: |
| | | if last_training_time >= train_df['DateTime'].max(): |
| | | model_needs_training = False |
| | | print(f"仿件å 载模åï¼è®ç»æ¶é´: {last_training_time}") |
| | | else: |
| | | print(f"ç¹å¾ç»´åº¦ä¸å¹é
ï¼æä»¶æ¨¡å: {cached_feature_dim}ï¼å½å: {current_feature_dim}ï¼ï¼éè¦éæ°è®ç»") |
| | | except Exception as e: |
| | | print(f"æ£æ¥æ¨¡åç¹å¾ç»´åº¦å¤±è´¥: {e}") |
| | | except Exception as e: |
| | | print("å 载模å失败:", e) |
| | | |
| | | if model_needs_training: |
| | | print("å¼å§è®ç»æ°æ¨¡å...") |
| | | if len(train_df) < 100: |
| | | print("è®ç»æ°æ®ä¸è¶³") |
| | | return None, None, None, None |
| | | |
| | | start_train = time() |
| | | X, y = create_features_vectorized(train_df, look_back=96, forecast_horizon=1) |
| | | if len(X) == 0 or len(y) == 0: |
| | | print("æ ·æ¬çæä¸è¶³ï¼è®ç»ç»æ¢") |
| | | return None, None, None, None |
| | | print(f"è®ç»æ ·æ¬æ°é: {X.shape[0]}, ç¹å¾ç»´åº¦: {X.shape[1]}") |
| | | |
| | | # ä½¿ç¨æ¶é´åºå交åéªè¯ |
| | | n_splits = 5 |
| | | tscv = TimeSeriesSplit(n_splits=n_splits) |
| | | |
| | | # ä¼ååçæ¨¡ååæ° |
| | | model = XGBRegressor( |
| | | n_estimators=500, # å¢å æ çæ°é |
| | | learning_rate=0.01, # éä½å¦ä¹ ç |
| | | max_depth=6, # å¢å æ çæ·±åº¦ |
| | | min_child_weight=3, # å¢å æå°å¶åèç¹æ ·æ¬æ° |
| | | subsample=0.8, # éä½éæ ·æ¯ä¾ |
| | | colsample_bytree=0.8, # éä½ç¹å¾éæ ·æ¯ä¾ |
| | | gamma=0.2, # å¢å æ£åååæ° |
| | | reg_alpha=0.3, # å¢å L1æ£åå |
| | | reg_lambda=2.0, # å¢å L2æ£åå |
| | | n_jobs=-1, |
| | | random_state=42, |
| | | tree_method='hist' # 使ç¨ç´æ¹å¾æ¹æ³å éè®ç» |
| | | ) |
| | | |
| | | try: |
| | | # 使ç¨äº¤åéªè¯è¿è¡è®ç» |
| | | cv_scores = [] |
| | | for train_idx, val_idx in tscv.split(X): |
| | | X_train, X_val = X[train_idx], X[val_idx] |
| | | y_train, y_val = y[train_idx], y[val_idx] |
| | | |
| | | model.fit(X_train, y_train, |
| | | eval_set=[(X_val, y_val)], |
| | | eval_metric=['rmse', 'mae'], |
| | | early_stopping_rounds=50, |
| | | verbose=False) |
| | | |
| | | # 计ç®éªè¯éä¸çRMSEåMAE |
| | | y_val_pred = model.predict(X_val) |
| | | rmse = np.sqrt(mean_squared_error(y_val, y_val_pred)) |
| | | mae = mean_absolute_error(y_val, y_val_pred) |
| | | cv_scores.append((rmse, mae)) |
| | | |
| | | # 计ç®å¹³å交åéªè¯åæ° |
| | | avg_rmse = np.mean([score[0] for score in cv_scores]) |
| | | avg_mae = np.mean([score[1] for score in cv_scores]) |
| | | print(f"交åéªè¯å¹³å RMSE: {avg_rmse:.4f}, MAE: {avg_mae:.4f}") |
| | | |
| | | |
| | | # éªè¯å®å¯å»æ |
| | | # ç¹å¾éè¦æ§åæ |
| | | feature_importance = model.feature_importances_ |
| | | sorted_idx = np.argsort(feature_importance)[::-1] |
| | | |
| | | # è·åç¹å¾åç§° |
| | | feature_names = [] |
| | | # 䏿¸¸åå²çªå£ç¹å¾ |
| | | for i in range(96): |
| | | feature_names.append(f'upstream_t-{95-i}') |
| | | # 䏿¸¸åå²çªå£ç¹å¾ |
| | | for i in range(24): |
| | | feature_names.append(f'downstream_t-{23-i}') |
| | | # æ¶é´ç¹å¾ |
| | | feature_names.extend(['hour_sin', 'hour_cos', 'weekday_sin', 'weekday_cos', 'month_sin', 'month_cos']) |
| | | # ååç¹å¾ |
| | | feature_names.extend(['lunar_phase_sin', 'lunar_phase_cos', 'is_high_tide']) |
| | | # ç»è®¡ç¹å¾ |
| | | feature_names.extend(['mean_1d_up', 'mean_3d_up', 'std_1d_up', 'max_1d_up', 'min_1d_up', |
| | | 'mean_1d_down', 'mean_3d_down', 'std_1d_down', 'max_1d_down', 'min_1d_down']) |
| | | # è¶å¿ç¹å¾ |
| | | feature_names.extend(['upstream_trend_1h', 'upstream_trend_24h', |
| | | 'downstream_trend_1h', 'downstream_trend_24h']) |
| | | # ååçç¹å¾ |
| | | feature_names.extend(['upstream_change_rate_1h', 'upstream_change_rate_24h', |
| | | 'downstream_change_rate_1h', 'downstream_change_rate_24h']) |
| | | # ç度差å¼ç¹å¾ |
| | | feature_names.extend(['salinity_diff', 'salinity_diff_1h', 'salinity_diff_24h']) |
| | | # ç度æ¯çç¹å¾ |
| | | feature_names.extend(['salinity_ratio', 'salinity_ratio_1h', 'salinity_ratio_24h']) |
| | | |
| | | # æ·»å å¤é¨ç¹å¾åç§° |
| | | if 'water_level' in train_df.columns: |
| | | feature_names.extend(['water_level_smooth', 'mean_1d_water_level', 'mean_3d_water_level', |
| | | 'std_1d_water_level', 'water_level_change_1h', 'water_level_change_24h', |
| | | 'water_level_sal_ratio', 'water_level_sal_ratio_1h', 'water_level_sal_ratio_24h', |
| | | 'water_level_sal_interaction', 'water_level_sal_interaction_1h', 'water_level_sal_interaction_24h']) |
| | | |
| | | if 'flow' in train_df.columns: |
| | | feature_names.extend(['flow_smooth', 'mean_1d_flow', 'mean_3d_flow', 'std_1d_flow', |
| | | 'flow_change_1h', 'flow_change_24h', 'flow_sal_ratio', |
| | | 'flow_trend_1h', 'flow_trend_24h']) |
| | | |
| | | if 'rainfall' in train_df.columns: |
| | | feature_names.extend(['rainfall_smooth', 'sum_1d_rainfall', 'sum_3d_rainfall', |
| | | 'rainfall_intensity_1h', 'rainfall_intensity_6h', |
| | | 'rainfall_trend_1h', 'rainfall_trend_24h']) |
| | | |
| | | # æå°ç¹å¾éè¦æ§ |
| | | print("\nç¹å¾éè¦æ§åæ:") |
| | | print("Top 20 éè¦ç¹å¾:") |
| | | for i in range(min(20, len(sorted_idx))): |
| | | print(f"{i+1}. {feature_names[sorted_idx[i]]}: {feature_importance[sorted_idx[i]]:.6f}") |
| | | |
| | | # ç»å¶ç¹å¾éè¦æ§å¾ |
| | | plt.figure(figsize=(12, 8)) |
| | | plt.bar(range(min(20, len(sorted_idx))), |
| | | feature_importance[sorted_idx[:20]]) |
| | | plt.xticks(range(min(20, len(sorted_idx))), |
| | | [feature_names[i] for i in sorted_idx[:20]], |
| | | rotation=45, ha='right') |
| | | plt.title('Top 20 ç¹å¾éè¦æ§') |
| | | plt.tight_layout() |
| | | plt.savefig('feature_importance.png', dpi=300, bbox_inches='tight') |
| | | plt.close() |
| | | |
| | | # æç¹å¾ç±»ååæéè¦æ§ |
| | | feature_types = { |
| | | '䏿¸¸åå²': [f for f in feature_names if f.startswith('upstream_t-')], |
| | | '䏿¸¸åå²': [f for f in feature_names if f.startswith('downstream_t-')], |
| | | 'æ¶é´ç¹å¾': ['hour_sin', 'hour_cos', 'weekday_sin', 'weekday_cos', 'month_sin', 'month_cos'], |
| | | 'ååç¹å¾': ['lunar_phase_sin', 'lunar_phase_cos', 'is_high_tide'], |
| | | 'ç»è®¡ç¹å¾': ['mean_1d_up', 'mean_3d_up', 'std_1d_up', 'max_1d_up', 'min_1d_up', |
| | | 'mean_1d_down', 'mean_3d_down', 'std_1d_down', 'max_1d_down', 'min_1d_down'], |
| | | 'è¶å¿ç¹å¾': ['upstream_trend_1h', 'upstream_trend_24h', |
| | | 'downstream_trend_1h', 'downstream_trend_24h'], |
| | | 'ååçç¹å¾': ['upstream_change_rate_1h', 'upstream_change_rate_24h', |
| | | 'downstream_change_rate_1h', 'downstream_change_rate_24h'], |
| | | 'ç度差å¼': ['salinity_diff', 'salinity_diff_1h', 'salinity_diff_24h'], |
| | | 'ç度æ¯ç': ['salinity_ratio', 'salinity_ratio_1h', 'salinity_ratio_24h'] |
| | | } |
| | | |
| | | if 'water_level' in train_df.columns: |
| | | feature_types['æ°´ä½ç¹å¾'] = ['water_level_smooth', 'mean_1d_water_level', 'mean_3d_water_level', |
| | | 'std_1d_water_level', 'water_level_change_1h', 'water_level_change_24h', |
| | | 'water_level_sal_ratio', 'water_level_sal_ratio_1h', 'water_level_sal_ratio_24h', |
| | | 'water_level_sal_interaction', 'water_level_sal_interaction_1h', 'water_level_sal_interaction_24h'] |
| | | |
| | | if 'flow' in train_df.columns: |
| | | feature_types['æµéç¹å¾'] = ['flow_smooth', 'mean_1d_flow', 'mean_3d_flow', 'std_1d_flow', |
| | | 'flow_change_1h', 'flow_change_24h', 'flow_sal_ratio', |
| | | 'flow_trend_1h', 'flow_trend_24h'] |
| | | |
| | | if 'rainfall' in train_df.columns: |
| | | feature_types['éé¨éç¹å¾'] = ['rainfall_smooth', 'sum_1d_rainfall', 'sum_3d_rainfall', |
| | | 'rainfall_intensity_1h', 'rainfall_intensity_6h', |
| | | 'rainfall_trend_1h', 'rainfall_trend_24h'] |
| | | |
| | | print("\næç¹å¾ç±»ååæéè¦æ§:") |
| | | for feature_type, features in feature_types.items(): |
| | | type_importance = sum(feature_importance[feature_names.index(f)] for f in features) |
| | | print(f"{feature_type}: {type_importance:.4f}") |
| | | |
| | | last_training_time = start_time |
| | | cached_model = model |
| | | |
| | | with open(model_cache_file, 'wb') as f: |
| | | pickle.dump({ |
| | | 'model': model, |
| | | 'training_time': last_training_time, |
| | | 'feature_columns': feature_columns, |
| | | 'rmse': avg_rmse, |
| | | 'mae': avg_mae, |
| | | 'feature_dim': current_feature_dim, |
| | | 'feature_importance': feature_importance, |
| | | 'feature_names': feature_names |
| | | }, f) |
| | | print(f"模åè®ç»å®æï¼èæ¶: {time() - start_train:.2f}ç§ï¼ç¹å¾ç»´åº¦: {current_feature_dim}") |
| | | except Exception as e: |
| | | print("模åè®ç»å¼å¸¸:", e) |
| | | return None, None, None, None |
| | | else: |
| | | model = cached_model |
| | | |
| | | # 颿µé¨åï¼éå½åæ¥é¢æµ |
| | | try: |
| | | # åå§ååå¨é¢æµç»æçå表 |
| | | future_dates = [start_time + timedelta(days=i) for i in range(5)] |
| | | predictions = np.zeros(5) |
| | | |
| | | # åå»ºé¢æµæéçä¸´æ¶æ°æ®å¯æ¬ |
| | | temp_df = df.copy() |
| | | |
| | | # 鿥éå½é¢æµ |
| | | for i in range(5): |
| | | current_date = future_dates[i] |
| | | print(f"颿µç¬¬ {i+1} 天: {current_date.strftime('%Y-%m-%d')}") |
| | | |
| | | # ä½¿ç¨ sliding_window_view æé ææ°ç䏿¸¸å䏿¸¸çªå£ |
| | | upstream_array = temp_df['upstream_smooth'].values |
| | | window_up = np.lib.stride_tricks.sliding_window_view(upstream_array, window_shape=96)[-1, :] |
| | | downstream_array = temp_df['downstream_smooth'].values |
| | | window_down = np.lib.stride_tricks.sliding_window_view(downstream_array, window_shape=24)[-1, :] |
| | | |
| | | # 计ç®å¹¶æå°å½åç¹å¾çåå¼ï¼æ£æ¥åæ¥æ¯å¦æè¶³å¤åå |
| | | print(f"æ¥éª¤ {i+1} 䏿¸¸å¹³åå¼: {np.mean(window_up):.4f}") |
| | | print(f"æ¥éª¤ {i+1} 䏿¸¸å¹³åå¼: {np.mean(window_down):.4f}") |
| | | |
| | | # æ¶é´ç¹å¾åååç¹å¾åºäºå½å颿µæ¶å»ï¼æ·»å å°çéæºåå以åºåæ¯æ¥ |
| | | hour_norm = current_date.hour / 24.0 + (np.random.normal(0, 0.05) if i > 0 else 0) |
| | | weekday_norm = current_date.dayofweek / 7.0 |
| | | month_norm = current_date.month / 12.0 |
| | | basic_time_feats = np.array([hour_norm, weekday_norm, month_norm]).reshape(1, -1) |
| | | |
| | | ld = LunarDate.fromSolarDate(current_date.year, current_date.month, current_date.day) |
| | | lunar_feats = np.array([np.sin(2*np.pi*ld.day/15), |
| | | np.cos(2*np.pi*ld.day/15), |
| | | 1 if (ld.day <=5 or (ld.day >=16 and ld.day<=20)) else 0]).reshape(1, -1) |
| | | |
| | | # ç»è®¡ç¹å¾ |
| | | try: |
| | | # ä¼å
使ç¨DataFrameä¸å·²è®¡ç®çç»è®¡ç¹å¾ |
| | | stats_up = temp_df[['mean_1d_up','mean_3d_up','std_1d_up','max_1d_up','min_1d_up']].iloc[-1:].values |
| | | stats_down = temp_df[['mean_1d_down','mean_3d_down','std_1d_down','max_1d_down','min_1d_down']].iloc[-1:].values |
| | | except KeyError: |
| | | # 妿ä¸åå¨ï¼åç´æ¥è®¡ç® |
| | | recent_up = temp_df['upstream'].values[-24:] |
| | | stats_up = np.array([np.mean(recent_up), |
| | | np.mean(temp_df['upstream'].values[-72:]), |
| | | np.std(recent_up), |
| | | np.max(recent_up), |
| | | np.min(recent_up)]).reshape(1, -1) |
| | | recent_down = temp_df['downstream_smooth'].values[-24:] |
| | | stats_down = np.array([np.mean(recent_down), |
| | | np.mean(temp_df['downstream_smooth'].values[-72:]), |
| | | np.std(recent_down), |
| | | np.max(recent_down), |
| | | np.min(recent_down)]).reshape(1, -1) |
| | | |
| | | # å»¶è¿ç¹å¾ |
| | | delay_cols = [col for col in temp_df.columns if col.startswith('upstream_delay_') or col.startswith('downstream_delay_')] |
| | | delay_feats = temp_df[delay_cols].iloc[-1:].values |
| | | |
| | | # 对ç¹å¾æ·»å éæºååï¼ç¡®ä¿æ¯æ¥é¢æµæè¶³å¤å·®å¼ |
| | | if i > 0: |
| | | # æ·»å å¾®å°çéæºååï¼é¿å
模å对ç¸ä¼¼è¾å
¥çç¸ä¼¼è¾åº |
| | | window_up = window_up + np.random.normal(0, max(1.0, np.std(window_up)*0.05), window_up.shape) |
| | | window_down = window_down + np.random.normal(0, max(0.5, np.std(window_down)*0.05), window_down.shape) |
| | | stats_up = stats_up + np.random.normal(0, np.std(stats_up)*0.05, stats_up.shape) |
| | | stats_down = stats_down + np.random.normal(0, np.std(stats_down)*0.05, stats_down.shape) |
| | | delay_feats = delay_feats + np.random.normal(0, np.std(delay_feats)*0.05, delay_feats.shape) |
| | | |
| | | # æå»ºæ°´ä½ç¸å
³ç¹å¾ï¼å¦ææ°æ®ä¸ææ°´ä½ä¿¡æ¯ï¼ |
| | | water_level_feats = [] |
| | | has_water_level = 'water_level' in temp_df.columns and 'water_level_smooth' in temp_df.columns |
| | | if has_water_level: |
| | | try: |
| | | # æ°´ä½å¹³æ»å¼ |
| | | water_level_smooth = temp_df['water_level_smooth'].iloc[-1] |
| | | water_level_feats.append(np.array([water_level_smooth]).reshape(1, -1)) |
| | | |
| | | # æ°´ä½ç»è®¡ç¹å¾ |
| | | if all(col in temp_df.columns for col in ['mean_1d_water_level', 'mean_3d_water_level', 'std_1d_water_level']): |
| | | water_level_stats = temp_df[['mean_1d_water_level', 'mean_3d_water_level', 'std_1d_water_level']].iloc[-1:].values |
| | | water_level_feats.append(water_level_stats) |
| | | |
| | | # æ°´ä½ååç |
| | | if all(col in temp_df.columns for col in ['water_level_change_1h', 'water_level_change_24h']): |
| | | water_level_changes = temp_df[['water_level_change_1h', 'water_level_change_24h']].iloc[-1:].values |
| | | water_level_feats.append(water_level_changes) |
| | | |
| | | # æ°´ä½ä¸ç度æ¯ç |
| | | if 'water_level_sal_ratio' in temp_df.columns: |
| | | water_level_ratio = temp_df['water_level_sal_ratio'].iloc[-1] |
| | | water_level_feats.append(np.array([water_level_ratio]).reshape(1, -1)) |
| | | |
| | | # æ°´ä½å»¶è¿ç¹å¾ |
| | | water_level_delay_cols = [col for col in temp_df.columns if col.startswith('water_level_delay_')] |
| | | if water_level_delay_cols: |
| | | water_level_delay_feats = temp_df[water_level_delay_cols].iloc[-1:].values |
| | | water_level_feats.append(water_level_delay_feats) |
| | | |
| | | # æ°´ä½çªå£ç¹å¾ - ä½¿ç¨æè¿48å°æ¶çæ°´ä½æ°æ®ï¼éæ ·12ä¸ªç¹ |
| | | if len(temp_df) >= 48: |
| | | recent_water_levels = temp_df['water_level_smooth'].values[-48:] |
| | | # æ¯4å°æ¶åä¸ä¸ªç¹ï¼æ»å
±12ä¸ªç¹ |
| | | sampled_levels = recent_water_levels[::4] |
| | | if len(sampled_levels) < 12: # 妿ä¸è¶³12个ç¹ï¼ç¨æåä¸ä¸ªå¼å¡«å
|
| | | sampled_levels = np.pad(sampled_levels, (0, 12 - len(sampled_levels)), 'edge') |
| | | water_level_feats.append(sampled_levels.reshape(1, -1)) |
| | | except Exception as e: |
| | | print(f"æå»ºæ°´ä½ç¹å¾æ¶åºé: {e}") |
| | | |
| | | # æ¼æ¥ææé¢æµç¹å¾ |
| | | X_pred = np.hstack([window_up.reshape(1, -1), |
| | | window_down.reshape(1, -1), |
| | | basic_time_feats, lunar_feats, stats_up, stats_down, delay_feats]) |
| | | |
| | | # æ·»å æ°´ä½ç¹å¾ï¼å¦ææï¼ |
| | | if water_level_feats: |
| | | try: |
| | | for feat in water_level_feats: |
| | | X_pred = np.hstack([X_pred, feat]) |
| | | except Exception as e: |
| | | print(f"æ·»å æ°´ä½ç¹å¾æ¶åºé: {e}") |
| | | |
| | | # æ£æ¥ç¹å¾ç»´åº¦æ¯å¦ä¸æ¨¡åä¸è´ |
| | | expected_feature_dim = cached_feature_dim or current_feature_dim |
| | | if X_pred.shape[1] != expected_feature_dim: |
| | | print(f"è¦å: ç¹å¾ç»´åº¦ä¸å¹é
! å½å: {X_pred.shape[1]}, ææ: {expected_feature_dim}") |
| | | |
| | | # å°è¯ä¿®å¤ç¹å¾ç»´åº¦é®é¢ï¼å¦æç»´åº¦ä¸è¶³ï¼å¡«å
é¶ï¼å¦æç»´åº¦è¿å¤ï¼æªæ |
| | | if X_pred.shape[1] < expected_feature_dim: |
| | | padding = np.zeros((1, expected_feature_dim - X_pred.shape[1])) |
| | | X_pred = np.hstack([X_pred, padding]) |
| | | print(f"已填å
ç¹å¾è³æ£ç¡®ç»´åº¦: {X_pred.shape[1]}") |
| | | elif X_pred.shape[1] > expected_feature_dim: |
| | | X_pred = X_pred[:, :expected_feature_dim] |
| | | print(f"å·²æªæç¹å¾è³æ£ç¡®ç»´åº¦: {X_pred.shape[1]}") |
| | | |
| | | # æ£æ¥ç¹å¾å¼æ¯å¦åå¨NaNææ ç©·å¤§ |
| | | if np.isnan(X_pred).any() or np.isinf(X_pred).any(): |
| | | X_pred = np.nan_to_num(X_pred, nan=0.0, posinf=1e6, neginf=-1e6) |
| | | |
| | | # æå°ç¹å¾åå¸ï¼ç¡®è®¤æ¯æ¥ç¹å¾ä¸å |
| | | feature_hash = hash(X_pred.tobytes()) % 10000000 |
| | | print(f"æ¥éª¤ {i+1} ç¹å¾åå¸: {feature_hash}") |
| | | |
| | | # 强å¶è®¾ç½®éæºç§åï¼ç¡®ä¿æ¯æ¬¡é¢æµç¯å¢ä¸å |
| | | np.random.seed(int(time() * 1000) % 10000 + i) |
| | | |
| | | # 颿µåæå°X_predçå½¢ç¶åæ ·æ¬å¼ |
| | | print(f"颿µç¹å¾å½¢ç¶: {X_pred.shape}, æ ·æ¬å¼: [{X_pred[0,0]:.4f}, {X_pred[0,50]:.4f}, {X_pred[0,100]:.4f}]") |
| | | |
| | | # 忥颿µé¨åæ·»å ä¸å®éæºæ§ |
| | | # 颿µè¿ç¨ä¸åç°å¦ææ¨¡ååºå®ä¸è¾å
¥ç¸ä¼¼ï¼è¾åºå¯è½é常æ¥è¿ |
| | | # è¿éæ·»å å¾®å°éæºæ°å¨ï¼ä½¿ç»ææ´æ¥è¿ç宿°´æåå |
| | | single_pred = model.predict(X_pred)[0] |
| | | |
| | | # æ ¹æ®ä¹åçæ³¢å¨æ°´å¹³æ·»å åççéæºåå |
| | | if i > 0: |
| | | # è·åå岿°æ®çæ åå·® |
| | | history_std = temp_df['downstream_smooth'].iloc[-10:].std() |
| | | if np.isnan(history_std) or history_std < 0.5: |
| | | history_std = 0.5 # æå°æ åå·® |
| | | |
| | | # æ·»å 符åå岿³¢å¨çéæºåå |
| | | noise_level = history_std * 0.1 # éæºåå为æ åå·®ç10% |
| | | random_change = np.random.normal(0, noise_level) |
| | | single_pred = single_pred + random_change |
| | | |
| | | # æå°é¢æµç»æçéæºåå |
| | | print(f"æ·»å éæºåå: {random_change:.4f}, å岿 åå·®: {history_std:.4f}") |
| | | |
| | | print(f"æ¥éª¤ {i+1} æç»é¢æµå¼: {single_pred:.4f}") |
| | | predictions[i] = single_pred |
| | | |
| | | # å建æ°çä¸è¡æ°æ®ï¼ä½¿ç¨æ¾èç䏿¸¸å忍¡å¼ |
| | | # ä½¿ç¨æ£å¼¦æ³¢+éæºåªå£°æ¨¡ææ½®æ±å½±å |
| | | upstream_change = 3.0 * np.sin(i/5.0 * np.pi) + np.random.normal(0, 1.5) # æ´å¤§çåå |
| | | |
| | | new_row = pd.DataFrame({ |
| | | 'DateTime': [current_date], |
| | | 'upstream_smooth': [temp_df['upstream_smooth'].iloc[-1] + upstream_change], |
| | | 'downstream_smooth': [single_pred], |
| | | 'hour': [current_date.hour], |
| | | 'weekday': [current_date.dayofweek], |
| | | 'month': [current_date.month], |
| | | 'upstream': [temp_df['upstream'].iloc[-1] + upstream_change], |
| | | 'downstream': [single_pred], |
| | | 'lunar_phase_sin': [np.sin(2*np.pi*ld.day/15)], |
| | | 'lunar_phase_cos': [np.cos(2*np.pi*ld.day/15)], |
| | | 'is_high_tide': [1 if (ld.day <=5 or (ld.day >=16 and ld.day<=20)) else 0] |
| | | }) |
| | | |
| | | # å¦æææ°´ä½ç¹å¾ï¼ä¹ä¸ºæ°è¡æ·»å æ°´ä½æ°æ® |
| | | if has_water_level: |
| | | try: |
| | | # 使ç¨éæºæ³¢å¨æ¨¡ææ°´ä½ååï¼å设å䏿¸¸ç度ç¸å
³ï¼ |
| | | water_level_change = 0.2 * np.sin(i/5.0 * np.pi) + np.random.normal(0, 0.05) |
| | | last_water_level = temp_df['water_level'].iloc[-1] |
| | | new_water_level = last_water_level + water_level_change |
| | | |
| | | # æ·»å æ°´ä½ç¸å
³å |
| | | new_row['water_level'] = new_water_level |
| | | new_row['water_level_smooth'] = new_water_level |
| | | |
| | | # æ·»å æ°´ä½ç»è®¡ç¹å¾ |
| | | if 'mean_1d_water_level' in temp_df.columns: |
| | | new_row['mean_1d_water_level'] = temp_df['water_level_smooth'].iloc[-24:].mean() |
| | | if 'mean_3d_water_level' in temp_df.columns: |
| | | new_row['mean_3d_water_level'] = temp_df['water_level_smooth'].iloc[-72:].mean() |
| | | if 'std_1d_water_level' in temp_df.columns: |
| | | new_row['std_1d_water_level'] = temp_df['water_level_smooth'].iloc[-24:].std() |
| | | if 'water_level_change_1h' in temp_df.columns: |
| | | new_row['water_level_change_1h'] = new_water_level - temp_df['water_level_smooth'].iloc[-1] |
| | | if 'water_level_change_24h' in temp_df.columns: |
| | | new_row['water_level_change_24h'] = new_water_level - temp_df['water_level_smooth'].iloc[-24] |
| | | if 'water_level_sal_ratio' in temp_df.columns: |
| | | new_row['water_level_sal_ratio'] = new_water_level / single_pred if single_pred > 0 else 1.0 |
| | | except Exception as e: |
| | | print(f"为æ°è¡æ·»å æ°´ä½æ°æ®æ¶åºé: {e}") |
| | | |
| | | # 为æ°è¡æ·»å å
¶ä»å¿
è¦çåï¼ç¡®ä¿ä¸åæ°æ®æ¡ç»æä¸è´ |
| | | for col in temp_df.columns: |
| | | if col not in new_row.columns: |
| | | if col.startswith('upstream_delay_'): |
| | | delay = int(col.split('_')[-1].replace('h', '')) |
| | | if delay <= 1: |
| | | new_row[col] = temp_df['upstream_smooth'].iloc[-1] |
| | | else: |
| | | # å®å
¨è·åå»¶è¿å¼ï¼æ£æ¥æ¯å¦åå¨å¯¹åºçå»¶è¿å |
| | | prev_delay = delay - 1 |
| | | prev_col = f'upstream_delay_{prev_delay}h' |
| | | if prev_col in temp_df.columns: |
| | | new_row[col] = temp_df[prev_col].iloc[-1] |
| | | else: |
| | | # 妿åä¸ä¸ªå»¶è¿ä¸åå¨ï¼å使ç¨å½åææ°ç䏿¸¸å¼ |
| | | new_row[col] = temp_df['upstream_smooth'].iloc[-1] |
| | | elif col.startswith('downstream_delay_'): |
| | | delay = int(col.split('_')[-1].replace('h', '')) |
| | | if delay <= 1: |
| | | new_row[col] = single_pred |
| | | else: |
| | | # å®å
¨è·åå»¶è¿å¼ï¼æ£æ¥æ¯å¦åå¨å¯¹åºçå»¶è¿å |
| | | prev_delay = delay - 1 |
| | | prev_col = f'downstream_delay_{prev_delay}h' |
| | | if prev_col in temp_df.columns: |
| | | new_row[col] = temp_df[prev_col].iloc[-1] |
| | | else: |
| | | # 妿åä¸ä¸ªå»¶è¿ä¸åå¨ï¼å使ç¨å½å颿µå¼ |
| | | new_row[col] = single_pred |
| | | elif col.startswith('water_level_delay_') and has_water_level: |
| | | try: |
| | | delay = int(col.split('_')[-1].replace('h', '')) |
| | | if delay <= 1: |
| | | new_row[col] = new_row['water_level_smooth'].iloc[0] |
| | | else: |
| | | prev_delay = delay - 1 |
| | | prev_col = f'water_level_delay_{prev_delay}h' |
| | | if prev_col in temp_df.columns: |
| | | new_row[col] = temp_df[prev_col].iloc[-1] |
| | | else: |
| | | new_row[col] = temp_df['water_level_smooth'].iloc[-1] |
| | | except Exception as e: |
| | | print(f"æ·»å æ°´ä½å»¶è¿ç¹å¾æ¶åºé: {e}") |
| | | new_row[col] = temp_df[col].iloc[-1] if col in temp_df.columns else 0 |
| | | elif col == 'lunar_phase_sin': |
| | | new_row[col] = np.sin(2*np.pi*current_date.day/15) |
| | | elif col == 'lunar_phase_cos': |
| | | new_row[col] = np.cos(2*np.pi*current_date.day/15) |
| | | elif col == 'is_high_tide': |
| | | new_row[col] = 1 if (current_date.day <=5 or (current_date.day >=16 and current_date.day<=20)) else 0 |
| | | else: |
| | | # å¯¹äºæªå¤ççç¹å¾ï¼ç®åå¤å¶ä¸ä¸å¼ |
| | | if col in temp_df.columns: |
| | | new_row[col] = temp_df[col].iloc[-1] |
| | | else: |
| | | new_row[col] = 0 # é»è®¤å¼ |
| | | |
| | | # å°æ°è¡æ·»å å°ä¸´æ¶æ°æ®æ¡ |
| | | temp_df = pd.concat([temp_df, new_row], ignore_index=True) |
| | | |
| | | # éæ°è®¡ç®ç»è®¡ç¹å¾ï¼ä½¿ç¨æè¿ç24/72å°æ¶æ°æ® |
| | | # è¿æ¯å
³é®æ¥éª¤ï¼ç¡®ä¿æ¯ä¸æ¥é¢æµä½¿ç¨æ´æ°åçç»è®¡ç¹å¾ |
| | | temp_df_last = temp_df.iloc[-1:].copy() |
| | | |
| | | # 计ç®ä¸æ¸¸ç»è®¡ç¹å¾ |
| | | recent_upstream = temp_df['upstream_smooth'].iloc[-24:].values |
| | | temp_df_last['mean_1d_up'] = np.mean(recent_upstream) |
| | | temp_df_last['std_1d_up'] = np.std(recent_upstream) |
| | | temp_df_last['max_1d_up'] = np.max(recent_upstream) |
| | | temp_df_last['min_1d_up'] = np.min(recent_upstream) |
| | | temp_df_last['mean_3d_up'] = np.mean(temp_df['upstream_smooth'].iloc[-min(72, len(temp_df)):].values) |
| | | |
| | | # 计ç®ä¸æ¸¸ç»è®¡ç¹å¾ |
| | | recent_downstream = temp_df['downstream_smooth'].iloc[-24:].values |
| | | temp_df_last['mean_1d_down'] = np.mean(recent_downstream) |
| | | temp_df_last['std_1d_down'] = np.std(recent_downstream) |
| | | temp_df_last['max_1d_down'] = np.max(recent_downstream) |
| | | temp_df_last['min_1d_down'] = np.min(recent_downstream) |
| | | temp_df_last['mean_3d_down'] = np.mean(temp_df['downstream_smooth'].iloc[-min(72, len(temp_df)):].values) |
| | | |
| | | # æ´æ°ä¸´æ¶æ°æ®æ¡ä¸çæåä¸è¡ |
| | | temp_df.iloc[-1] = temp_df_last.iloc[0] |
| | | |
| | | # æ´æ°å»¶è¿ç¹å¾ï¼ç¡®ä¿ä¸windowçæ»å¨ä¸è´ |
| | | for delay in range(1, 121): |
| | | # 䏿¸¸å»¶è¿ç¹å¾æ´æ° |
| | | delay_col = f'upstream_delay_{delay}h' |
| | | if delay_col in temp_df.columns: |
| | | if len(temp_df) > delay: |
| | | temp_df.loc[temp_df.index[-1], delay_col] = temp_df.iloc[-delay-1]['upstream_smooth'] |
| | | else: |
| | | temp_df.loc[temp_df.index[-1], delay_col] = temp_df.iloc[0]['upstream_smooth'] |
| | | |
| | | # 䏿¸¸å»¶è¿ç¹å¾æ´æ° |
| | | delay_col = f'downstream_delay_{delay}h' |
| | | if delay_col in temp_df.columns: |
| | | if len(temp_df) > delay: |
| | | temp_df.loc[temp_df.index[-1], delay_col] = temp_df.iloc[-delay-1]['downstream_smooth'] |
| | | else: |
| | | temp_df.loc[temp_df.index[-1], delay_col] = temp_df.iloc[0]['downstream_smooth'] |
| | | |
| | | # æ°´ä½å»¶è¿ç¹å¾æ´æ° |
| | | if has_water_level: |
| | | delay_col = f'water_level_delay_{delay}h' |
| | | if delay_col in temp_df.columns: |
| | | if len(temp_df) > delay: |
| | | temp_df.loc[temp_df.index[-1], delay_col] = temp_df.iloc[-delay-1]['water_level_smooth'] |
| | | else: |
| | | temp_df.loc[temp_df.index[-1], delay_col] = temp_df.iloc[0]['water_level_smooth'] |
| | | |
| | | # æå°æ´æ°åçç»è®¡ç¹å¾å¼ |
| | | print(f"æ´æ°åmean_1d_down: {temp_df.iloc[-1]['mean_1d_down']:.4f}, mean_1d_up: {temp_df.iloc[-1]['mean_1d_up']:.4f}") |
| | | |
| | | print("éå½é¢æµå®æ") |
| | | |
| | | # è·åæ¨¡åææ |
| | | metrics = None |
| | | if os.path.exists(model_cache_file): |
| | | try: |
| | | with open(model_cache_file, 'rb') as f: |
| | | model_data = pickle.load(f) |
| | | metrics = { |
| | | 'rmse': model_data.get('rmse', None), |
| | | 'mae': model_data.get('mae', None) |
| | | } |
| | | except Exception as e: |
| | | print(f"è·åæ¨¡åææ 失败: {e}") |
| | | |
| | | return future_dates, predictions, model, metrics |
| | | except Exception as e: |
| | | print("颿µè¿ç¨å¼å¸¸:", e) |
| | | import traceback |
| | | traceback.print_exc() |
| | | return None, None, None, None |
| | | |
| | | |
| | | |
| | | # ------------------------------- |
| | | # GUIçé¢é¨å |
| | | # ------------------------------- |
| | | def run_gui(): |
| | | def configure_gui_fonts(): |
| | | font_names = ['微软é
é»', 'Microsoft YaHei', 'SimSun', 'SimHei'] |
| | | for font_name in font_names: |
| | | try: |
| | | default_font = tkfont.nametofont("TkDefaultFont") |
| | | default_font.configure(family=font_name) |
| | | text_font = tkfont.nametofont("TkTextFont") |
| | | text_font.configure(family=font_name) |
| | | fixed_font = tkfont.nametofont("TkFixedFont") |
| | | fixed_font.configure(family=font_name) |
| | | return True |
| | | except Exception as e: |
| | | continue |
| | | return False |
| | | |
| | | def on_predict(): |
| | | try: |
| | | predict_start = time() |
| | | status_label.config(text="颿µä¸...") |
| | | root.update() |
| | | start_time_dt = pd.to_datetime(entry.get()) |
| | | force_retrain = retrain_var.get() |
| | | future_dates, predictions, model, metrics = train_and_predict(df, start_time_dt, force_retrain) |
| | | if future_dates is None or predictions is None: |
| | | status_label.config(text="颿µå¤±è´¥") |
| | | return |
| | | |
| | | # è·åå¹¶æ¾ç¤ºæ¨¡ååç¡®åº¦ææ |
| | | if metrics: |
| | | metrics_text = f"模åå确度 - RMSE: {metrics['rmse']:.4f}, MAE: {metrics['mae']:.4f}" |
| | | metrics_label.config(text=metrics_text) |
| | | |
| | | # æ¸
é¤å¾å½¢å¹¶éæ°ç»å¶ |
| | | ax.clear() |
| | | |
| | | # å建åyè½´å¾è¡¨ |
| | | ax2 = None |
| | | has_water_level = 'water_level' in df.columns and 'water_level_smooth' in df.columns |
| | | if has_water_level: |
| | | try: |
| | | ax2 = ax.twinx() |
| | | except Exception as e: |
| | | print(f"å建åy轴失败: {e}") |
| | | ax2 = None |
| | | |
| | | # ç»å¶å岿°æ®ï¼æè¿ 120 å¤©ï¼ |
| | | history_end = min(start_time_dt, df['DateTime'].max()) |
| | | history_start = history_end - timedelta(days=120) |
| | | hist_data = df[(df['DateTime'] >= history_start) & (df['DateTime'] <= history_end)] |
| | | |
| | | # ç¡®ä¿æ°æ®ä¸ä¸ºç©º |
| | | if len(hist_data) == 0: |
| | | status_label.config(text="é误: æéæ¶é´èå´å
没æå岿°æ®") |
| | | return |
| | | |
| | | # ç»å¶åºæ¬æ°æ® |
| | | ax.plot(hist_data['DateTime'], hist_data['downstream_smooth'], |
| | | label='ä¸åæ°´(䏿¸¸)ç度', color='blue', linewidth=1.5) |
| | | ax.plot(hist_data['DateTime'], hist_data['upstream_smooth'], |
| | | label='é龿¸¯(䏿¸¸)ç度', color='purple', linewidth=1.5, alpha=0.7) |
| | | |
| | | # ç»å¶æ°´ä½æ°æ®ï¼å¦ææï¼ |
| | | if ax2 is not None and has_water_level: |
| | | try: |
| | | # æ£æ¥æ°´ä½æ°æ®æ¯å¦æè¶³å¤çéNaNå¼ |
| | | valid_water_level = hist_data['water_level_smooth'].dropna() |
| | | if len(valid_water_level) > 10: # è³å°æ10个ææå¼ |
| | | ax2.plot(hist_data['DateTime'], hist_data['water_level_smooth'], |
| | | label='é¿æ±æ°´ä½', color='green', linewidth=1.5, linestyle='--') |
| | | ax2.set_ylabel('æ°´ä½ (m)', color='green') |
| | | ax2.tick_params(axis='y', labelcolor='green') |
| | | else: |
| | | print("æ°´ä½æ°æ®ææå¼ä¸è¶³ï¼è·³è¿æ°´ä½å¾") |
| | | except Exception as e: |
| | | print(f"ç»å¶æ°´ä½æ°æ®æ¶åºé: {e}") |
| | | |
| | | if 'qinglong_lake_smooth' in hist_data.columns: |
| | | ax.plot(hist_data['DateTime'], hist_data['qinglong_lake_smooth'], |
| | | label='é龿¹ç度', color='green', linewidth=1.5, alpha=0.7) |
| | | |
| | | # ç»å¶é¢æµæ°æ® |
| | | if len(future_dates) > 0 and len(predictions) > 0: |
| | | ax.plot(future_dates, predictions, marker='o', linestyle='--', |
| | | label='éå½é¢æµç度', color='red', linewidth=2) |
| | | |
| | | # æ·»å 颿µç置信åºé´ |
| | | std_dev = hist_data['downstream_smooth'].std() * 0.5 |
| | | ax.fill_between(future_dates, predictions - std_dev, predictions + std_dev, |
| | | color='red', alpha=0.2) |
| | | |
| | | # ç»å¶å®é
æ°æ®(妿æ |
| | | actual_data = df[(df['DateTime'] >= start_time_dt) & (df['DateTime'] <= future_dates[-1])] |
| | | actual_values = None |
| | | |
| | | if not actual_data.empty: |
| | | actual_values = [] |
| | | # è·åä¸é¢æµæ¥æææ¥è¿çå®é
æ°æ® |
| | | for pred_date in future_dates: |
| | | closest_idx = np.argmin(np.abs(actual_data['DateTime'] - pred_date)) |
| | | actual_values.append(actual_data['downstream_smooth'].iloc[closest_idx]) |
| | | |
| | | # ç»å¶å®é
ç度æ²çº¿ |
| | | ax.plot(future_dates, actual_values, marker='s', linestyle='-', |
| | | label='å®é
ç度', color='orange', linewidth=2) |
| | | |
| | | # 设置å¾è¡¨æ é¢åæ ç¾ |
| | | ax.set_xlabel('æ¥æ') |
| | | ax.set_ylabel('ç度') |
| | | ax.set_title(f"ä» {start_time_dt.strftime('%Y-%m-%d %H:%M:%S')} å¼å§çéå½åæ¥çåº¦é¢æµ") |
| | | |
| | | # 设置å¾ä¾å¹¶åºç¨ç´§åå¸å± |
| | | if ax2 is not None: |
| | | try: |
| | | lines1, labels1 = ax.get_legend_handles_labels() |
| | | lines2, labels2 = ax2.get_legend_handles_labels() |
| | | if lines2: # ç¡®ä¿æ°´ä½æ°æ®å·²ç»å¶ |
| | | ax.legend(lines1 + lines2, labels1 + labels2, loc='best') |
| | | else: |
| | | ax.legend(loc='best') |
| | | except Exception as e: |
| | | print(f"å建å¾ä¾æ¶åºé: {e}") |
| | | ax.legend(loc='best') |
| | | else: |
| | | ax.legend(loc='best') |
| | | |
| | | fig.tight_layout() |
| | | |
| | | # 强å¶éç» - 使ç¨å¤ç§æ¹å¼ç¡®ä¿å¾å½¢æ¾ç¤º |
| | | plt.close(fig) # å
³éæ§ç |
| | | fig.canvas.draw() |
| | | fig.canvas.flush_events() |
| | | plt.draw() |
| | | |
| | | # æ´æ°é¢æµç»æææ¬ |
| | | predict_time = time() - predict_start |
| | | status_label.config(text=f"éå½é¢æµå®æ (èæ¶: {predict_time:.2f}ç§)") |
| | | |
| | | # æ¾ç¤ºé¢æµç»æ |
| | | result_text = "éå½åæ¥é¢æµç»æ:\n\n" |
| | | |
| | | # 妿æå®é
å¼ï¼è®¡ç®å·®å¼åç¾åæ¯è¯¯å·® |
| | | if actual_values is not None: |
| | | result_text += "æ¥æ 颿µå¼ å®é
å¼ å·®å¼\n" |
| | | result_text += "--------------------------------------\n" |
| | | for i, (date, pred, actual) in enumerate(zip(future_dates, predictions, actual_values)): |
| | | diff = pred - actual |
| | | # ç§»é¤ç¾åæ¯è¯¯å·®æ¾ç¤º |
| | | result_text += f"{date.strftime('%Y-%m-%d')} {pred:6.2f} {actual:6.2f} {diff:6.2f}\n" |
| | | |
| | | # # è®¡ç®æ´ä½è¯ä»·ææ |
| | | # mae = np.mean(np.abs(np.array(predictions) - np.array(actual_values))) |
| | | # rmse = np.sqrt(np.mean((np.array(predictions) - np.array(actual_values))**2)) |
| | | |
| | | # result_text += "\n颿µè¯ä¼°ææ :\n" |
| | | # result_text += f"å¹³åç»å¯¹è¯¯å·®(MAE): {mae:.4f}\n" |
| | | # result_text += f"åæ¹æ ¹è¯¯å·®(RMSE): {rmse:.4f}\n" |
| | | else: |
| | | result_text += "æ¥æ 颿µå¼\n" |
| | | result_text += "-------------------\n" |
| | | for i, (date, pred) in enumerate(zip(future_dates, predictions)): |
| | | result_text += f"{date.strftime('%Y-%m-%d')} {pred:6.2f}\n" |
| | | result_text += "\næ å®é
å¼è¿è¡å¯¹æ¯" |
| | | |
| | | update_result_text(result_text) |
| | | except Exception as e: |
| | | status_label.config(text=f"é误: {str(e)}") |
| | | import traceback |
| | | traceback.print_exc() |
| | | |
| | | def on_scroll(event): |
| | | xlim = ax.get_xlim() |
| | | ylim = ax.get_ylim() |
| | | zoom_factor = 1.1 |
| | | x_data = event.xdata if event.xdata is not None else (xlim[0]+xlim[1])/2 |
| | | y_data = event.ydata if event.ydata is not None else (ylim[0]+ylim[1])/2 |
| | | x_rel = (x_data - xlim[0]) / (xlim[1] - xlim[0]) |
| | | y_rel = (y_data - ylim[0]) / (ylim[1] - ylim[0]) |
| | | if event.step > 0: |
| | | new_width = (xlim[1]-xlim[0]) / zoom_factor |
| | | new_height = (ylim[1]-ylim[0]) / zoom_factor |
| | | x0 = x_data - x_rel * new_width |
| | | y0 = y_data - y_rel * new_height |
| | | ax.set_xlim([x0, x0+new_width]) |
| | | ax.set_ylim([y0, y0+new_height]) |
| | | else: |
| | | new_width = (xlim[1]-xlim[0]) * zoom_factor |
| | | new_height = (ylim[1]-ylim[0]) * zoom_factor |
| | | x0 = x_data - x_rel * new_width |
| | | y0 = y_data - y_rel * new_height |
| | | ax.set_xlim([x0, x0+new_width]) |
| | | ax.set_ylim([y0, y0+new_height]) |
| | | canvas.draw_idle() |
| | | |
| | | def update_cursor(event): |
| | | if event.inaxes == ax: |
| | | canvas.get_tk_widget().config(cursor="fleur") |
| | | else: |
| | | canvas.get_tk_widget().config(cursor="") |
| | | |
| | | def reset_view(): |
| | | display_history() |
| | | status_label.config(text="å¾è¡¨è§å¾å·²éç½®") |
| | | |
| | | root = tk.Tk() |
| | | root.title("é龿¸¯-éè¡çåº¦é¢æµç³»ç»") |
| | | try: |
| | | configure_gui_fonts() |
| | | except Exception as e: |
| | | print("åä½é
ç½®å¼å¸¸:", e) |
| | | |
| | | # æ¢å¤è¾å
¥æ¡åæ§å¶æé® |
| | | input_frame = ttk.Frame(root, padding="10") |
| | | input_frame.pack(fill=tk.X) |
| | | |
| | | ttk.Label(input_frame, text="è¾å
¥å¼å§æ¶é´ (YYYY-MM-DD HH:MM:SS)").pack(side=tk.LEFT) |
| | | entry = ttk.Entry(input_frame, width=25) |
| | | entry.pack(side=tk.LEFT, padx=5) |
| | | predict_button = ttk.Button(input_frame, text="颿µ", command=on_predict) |
| | | predict_button.pack(side=tk.LEFT) |
| | | status_label = ttk.Label(input_frame, text="æç¤º: ç¬¬ä¸æ¬¡è¿è¡è¯·å¾é'强å¶éæ°è®ç»æ¨¡å'") |
| | | status_label.pack(side=tk.LEFT, padx=10) |
| | | |
| | | control_frame = ttk.Frame(root, padding="5") |
| | | control_frame.pack(fill=tk.X) |
| | | retrain_var = tk.BooleanVar(value=False) |
| | | ttk.Checkbutton(control_frame, text="强å¶éæ°è®ç»æ¨¡å", variable=retrain_var).pack(side=tk.LEFT) |
| | | |
| | | # æ´æ°å¾ä¾è¯´æï¼å å
¥æ°´ä½æ°æ®ä¿¡æ¯ |
| | | if 'water_level' in df.columns: |
| | | legend_label = ttk.Label(control_frame, text="å¾ä¾: ç´«è²=é龿¸¯ä¸æ¸¸æ°æ®, èè²=ä¸åæ°´ä¸æ¸¸æ°æ®, 红è²=颿µå¼, 绿è²=é¿æ±æ°´ä½") |
| | | else: |
| | | legend_label = ttk.Label(control_frame, text="å¾ä¾: ç´«è²=é龿¸¯ä¸æ¸¸æ°æ®, èè²=ä¸åæ°´ä¸æ¸¸æ°æ®, 红è²=颿µå¼, æ©è²=å®é
å¼") |
| | | legend_label.pack(side=tk.LEFT, padx=10) |
| | | reset_button = ttk.Button(control_frame, text="éç½®è§å¾", command=reset_view) |
| | | reset_button.pack(side=tk.LEFT, padx=5) |
| | | |
| | | # æ·»å æ¾ç¤ºæ¨¡ååç¡®åº¦çæ ç¾ |
| | | metrics_frame = ttk.Frame(root, padding="5") |
| | | metrics_frame.pack(fill=tk.X) |
| | | model_metrics = get_model_metrics() |
| | | metrics_text = "模åå确度: æªç¥" if not model_metrics else f"模åå确度 - RMSE: {model_metrics['rmse']:.4f}, MAE: {model_metrics['mae']:.4f}" |
| | | metrics_label = ttk.Label(metrics_frame, text=metrics_text) |
| | | metrics_label.pack(side=tk.LEFT, padx=10) |
| | | |
| | | # ç»ææ¾ç¤ºåºå |
| | | result_frame = ttk.Frame(root, padding="10") |
| | | result_frame.pack(fill=tk.BOTH, expand=True) |
| | | |
| | | # 左侧æ¾ç½®å¾è¡¨ |
| | | plot_frame = ttk.Frame(result_frame, width=800, height=600) |
| | | plot_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) |
| | | plot_frame.pack_propagate(False) # ä¸å
è®¸æ¡æ¶æ ¹æ®å
å®¹è°æ´å¤§å° |
| | | |
| | | # å³ä¾§æ¾ç½®ææ¬ç»æ |
| | | text_frame = ttk.Frame(result_frame) |
| | | text_frame.pack(side=tk.RIGHT, fill=tk.Y) |
| | | |
| | | # 使ç¨ç宽å使¾ç¤ºç»æ |
| | | result_font = tkfont.Font(family="Courier New", size=10, weight="normal") |
| | | |
| | | # æ·»å ææ¬æ¡åæ»å¨æ¡ |
| | | result_text = tk.Text(text_frame, width=50, height=25, font=result_font, wrap=tk.NONE) |
| | | result_text.pack(side=tk.LEFT, fill=tk.BOTH) |
| | | result_scroll = ttk.Scrollbar(text_frame, orient="vertical", command=result_text.yview) |
| | | result_scroll.pack(side=tk.RIGHT, fill=tk.Y) |
| | | result_text.configure(yscrollcommand=result_scroll.set) |
| | | result_text.configure(state=tk.DISABLED) # åå§è®¾ä¸ºåªè¯» |
| | | |
| | | # æ´æ°ç»æææ¬ç彿° |
| | | def update_result_text(text): |
| | | result_text.configure(state=tk.NORMAL) |
| | | result_text.delete(1.0, tk.END) |
| | | result_text.insert(tk.END, text) |
| | | result_text.configure(state=tk.DISABLED) |
| | | |
| | | # å建æ´é«DPIçå¾å½¢ä»¥è·å¾æ´å¥½çæ¾ç¤ºè´¨é |
| | | fig, ax = plt.subplots(figsize=(10, 6), dpi=100) |
| | | fig.tight_layout(pad=3.0) # å¢å å
è¾¹è·ï¼é²æ¢æ ç¾è¢«æªæ |
| | | |
| | | # å建ç»å¸å¹¶æ·»å å°åºå®å¤§å°çæ¡æ¶ |
| | | canvas = FigureCanvasTkAgg(fig, master=plot_frame) |
| | | canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True) |
| | | |
| | | # æ·»å å·¥å
·æ ï¼å
å«ç¼©æ¾ãä¿åçåè½ |
| | | toolbar_frame = ttk.Frame(plot_frame) |
| | | toolbar_frame.pack(side=tk.BOTTOM, fill=tk.X) |
| | | toolbar = NavigationToolbar2Tk(canvas, toolbar_frame) |
| | | toolbar.update() |
| | | |
| | | # å¯ç¨ç´§åå¸å±ï¼å¹¶è®¾ç½®èªå¨è°æ´ä»¥ä½¿å¾è¡¨å®å
¨æ¾ç¤º |
| | | def on_resize(event): |
| | | fig.tight_layout() |
| | | canvas.draw_idle() |
| | | |
| | | # æ·»å å¾è¡¨äº¤äºåè½ |
| | | canvas.mpl_connect('resize_event', on_resize) |
| | | canvas.mpl_connect('scroll_event', on_scroll) |
| | | canvas.mpl_connect('motion_notify_event', update_cursor) |
| | | |
| | | # æ·»å é¼ æ æå¨åè½ |
| | | def on_press(event): |
| | | if event.inaxes != ax: |
| | | return |
| | | canvas.get_tk_widget().config(cursor="fleur") |
| | | ax._pan_start = (event.x, event.y, event.xdata, event.ydata) |
| | | |
| | | def on_release(event): |
| | | ax._pan_start = None |
| | | canvas.get_tk_widget().config(cursor="") |
| | | canvas.draw_idle() |
| | | |
| | | def on_motion(event): |
| | | if not hasattr(ax, '_pan_start') or ax._pan_start is None: |
| | | return |
| | | if event.inaxes != ax: |
| | | return |
| | | |
| | | start_x, start_y, x_data, y_data = ax._pan_start |
| | | dx = event.x - start_x |
| | | dy = event.y - start_y |
| | | |
| | | # è·åå½åè§å¾ |
| | | xlim = ax.get_xlim() |
| | | ylim = ax.get_ylim() |
| | | |
| | | # 计ç®å¾è¡¨åæ ç³»ä¸çç§»å¨ |
| | | x_scale = (xlim[1] - xlim[0]) / canvas.get_tk_widget().winfo_width() |
| | | y_scale = (ylim[1] - ylim[0]) / canvas.get_tk_widget().winfo_height() |
| | | |
| | | # æ´æ°è§å¾ |
| | | ax.set_xlim(xlim[0] - dx * x_scale, xlim[1] - dx * x_scale) |
| | | ax.set_ylim(ylim[0] + dy * y_scale, ylim[1] + dy * y_scale) |
| | | |
| | | # æ´æ°æå¨èµ·ç¹ |
| | | ax._pan_start = (event.x, event.y, event.xdata, event.ydata) |
| | | |
| | | canvas.draw_idle() |
| | | |
| | | # è¿æ¥é¼ æ äºä»¶ |
| | | canvas.mpl_connect('button_press_event', on_press) |
| | | canvas.mpl_connect('button_release_event', on_release) |
| | | canvas.mpl_connect('motion_notify_event', on_motion) |
| | | |
| | | # ä¿®æ¹æ»è½®ç¼©æ¾å½æ°ï¼ä½¿å
¶æ´å¹³æ» |
| | | def on_scroll(event): |
| | | if event.inaxes != ax: |
| | | return |
| | | |
| | | # å½åè§å¾ |
| | | xlim = ax.get_xlim() |
| | | ylim = ax.get_ylim() |
| | | |
| | | # 缩æ¾å å |
| | | zoom_factor = 1.1 if event.step > 0 else 0.9 |
| | | |
| | | # è·åé¼ æ ä½ç½®ä½ä¸ºç¼©æ¾ä¸å¿ |
| | | x_data = event.xdata |
| | | y_data = event.ydata |
| | | |
| | | # è®¡ç®æ°è§å¾ç宽度åé«åº¦ |
| | | new_width = (xlim[1] - xlim[0]) * zoom_factor |
| | | new_height = (ylim[1] - ylim[0]) * zoom_factor |
| | | |
| | | # è®¡ç®æ°è§å¾çå·¦ä¸è§åæ ï¼ä»¥é¼ æ ä½ç½®ä¸ºä¸å¿ç¼©æ¾ |
| | | x_rel = (x_data - xlim[0]) / (xlim[1] - xlim[0]) |
| | | y_rel = (y_data - ylim[0]) / (ylim[1] - ylim[0]) |
| | | |
| | | x0 = x_data - x_rel * new_width |
| | | y0 = y_data - y_rel * new_height |
| | | |
| | | # æ´æ°è§å¾ |
| | | ax.set_xlim([x0, x0 + new_width]) |
| | | ax.set_ylim([y0, y0 + new_height]) |
| | | |
| | | canvas.draw_idle() |
| | | |
| | | # æ´æ°å岿°æ®æ¾ç¤ºå½æ° |
| | | def display_history(): |
| | | try: |
| | | ax.clear() |
| | | end_date = df['DateTime'].max() |
| | | start_date = max(df['DateTime'].min(), end_date - timedelta(days=60)) |
| | | hist_data = df[(df['DateTime'] >= start_date) & (df['DateTime'] <= end_date)] |
| | | |
| | | if len(hist_data) == 0: |
| | | status_label.config(text="è¦å: 没æå¯ç¨çå岿°æ®") |
| | | return |
| | | |
| | | # å建åyè½´å¾è¡¨ |
| | | ax2 = None |
| | | has_water_level = 'water_level' in hist_data.columns and 'water_level_smooth' in hist_data.columns |
| | | if has_water_level: |
| | | ax2 = ax.twinx() |
| | | |
| | | # ç»å¶æ°æ® |
| | | ax.plot(hist_data['DateTime'], hist_data['downstream_smooth'], |
| | | label='ä¸åæ°´(䏿¸¸)ç度', color='blue', linewidth=1.5) |
| | | ax.plot(hist_data['DateTime'], hist_data['upstream_smooth'], |
| | | label='é龿¸¯(䏿¸¸)ç度', color='purple', linewidth=1.5, alpha=0.7) |
| | | |
| | | # 设置边çï¼ç¡®ä¿æä¸è´çè§å¾ |
| | | y_min = min(hist_data['downstream_smooth'].min(), hist_data['upstream_smooth'].min()) * 0.9 |
| | | y_max = max(hist_data['downstream_smooth'].max(), hist_data['upstream_smooth'].max()) * 1.1 |
| | | ax.set_ylim(y_min, y_max) |
| | | |
| | | # å¦æææ°´ä½æ°æ®ï¼å¨ç¬¬äºä¸ªyè½´ä¸ç»å¶ |
| | | if ax2 is not None and has_water_level: |
| | | try: |
| | | # æ£æ¥æ°´ä½æ°æ®æ¯å¦æè¶³å¤çéNaNå¼ |
| | | valid_water_level = hist_data['water_level_smooth'].dropna() |
| | | if len(valid_water_level) > 10: # è³å°æ10个ææå¼ |
| | | ax2.plot(hist_data['DateTime'], hist_data['water_level_smooth'], |
| | | label='é¿æ±æ°´ä½', color='green', linewidth=1.5, linestyle='--') |
| | | ax2.set_ylabel('æ°´ä½ (m)', color='green') |
| | | ax2.tick_params(axis='y', labelcolor='green') |
| | | |
| | | # å建ç»åå¾ä¾ |
| | | lines1, labels1 = ax.get_legend_handles_labels() |
| | | lines2, labels2 = ax2.get_legend_handles_labels() |
| | | ax.legend(lines1 + lines2, labels1 + labels2, loc='best') |
| | | else: |
| | | print("æ°´ä½æ°æ®ææå¼ä¸è¶³ï¼è·³è¿æ°´ä½å¾") |
| | | ax.legend(loc='best') |
| | | except Exception as e: |
| | | print(f"ç»å¶æ°´ä½æ°æ®æ¶åºé: {e}") |
| | | ax.legend(loc='best') |
| | | else: |
| | | ax.legend(loc='best') |
| | | |
| | | # 设置æ ç¾åæ é¢ |
| | | ax.set_xlabel('æ¥æ') |
| | | ax.set_ylabel('ç度') |
| | | ax.set_title('å岿°æ®å¯¹æ¯') |
| | | |
| | | # 使ç¨ç´§åå¸å±å¹¶ç»å¶ |
| | | fig.tight_layout() |
| | | |
| | | # 使ç¨å¤ç§æ¹æ³ç¡®ä¿å¾åæ¾ç¤º |
| | | plt.close(fig) # å
³éæ§ç |
| | | fig.canvas.draw() |
| | | fig.canvas.flush_events() |
| | | plt.draw() |
| | | |
| | | except Exception as e: |
| | | status_label.config(text=f"æ¾ç¤ºå岿°æ®æ¶åºé: {str(e)}") |
| | | import traceback |
| | | traceback.print_exc() |
| | | |
| | | display_history() |
| | | root.mainloop() |
| | | |
| | | |
| | | |
| | | |
| | | # ------------------------------- |
| | | # 主ç¨åºå
¥å£ï¼å è½½æ°æ®ãæ·»å ç¹å¾ãçæå»¶è¿ç¹å¾åå¯å¨GUI |
| | | # ------------------------------- |
| | | def save_processed_data(df, filename='processed_data.pkl'): |
| | | try: |
| | | df.to_pickle(filename) |
| | | print(f"å·²ä¿åå¤çåçæ°æ®å° {filename}") |
| | | return True |
| | | except Exception as e: |
| | | print(f"ä¿åæ°æ®å¤±è´¥: {e}") |
| | | return False |
| | | |
| | | def load_processed_data(filename='processed_data.pkl'): |
| | | try: |
| | | if os.path.exists(filename): |
| | | df = pd.read_pickle(filename) |
| | | print(f"å·²ä» {filename} å è½½å¤çåçæ°æ®") |
| | | return df |
| | | else: |
| | | print(f"æ¾ä¸å°å¤çåçæ°æ®æä»¶ {filename}") |
| | | return None |
| | | except Exception as e: |
| | | print(f"å è½½æ°æ®å¤±è´¥: {e}") |
| | | return None |
| | | |
| | | # å 餿§çå¤çæ°æ®ï¼å¦æåå¨ï¼ï¼ä»¥åºç¨ä¿®å¤åç代ç |
| | | if os.path.exists('processed_data.pkl'): |
| | | try: |
| | | os.remove('processed_data.pkl') |
| | | print("å·²å 餿§çå¤çæ°æ®ç¼åï¼å°ä½¿ç¨ä¿®å¤åç代ç éæ°å¤çæ°æ®") |
| | | except Exception as e: |
| | | print(f"å é¤ç¼åæä»¶å¤±è´¥: {e}") |
| | | |
| | | # å 餿§ç模åæä»¶ï¼å¦æåå¨ï¼ |
| | | if os.path.exists('salinity_model.pkl'): |
| | | try: |
| | | os.remove('salinity_model.pkl') |
| | | print("å·²å 餿§ç模åæä»¶ï¼å°éæ°è®ç»æ¨¡å") |
| | | except Exception as e: |
| | | print(f"å 餿¨¡åæä»¶å¤±è´¥: {e}") |
| | | |
| | | # å°è¯å è½½å¤çåçæ°æ®ï¼å¦æä¸åå¨åéæ°å¤ç |
| | | processed_data = load_processed_data() |
| | | if processed_data is not None: |
| | | df = processed_data |
| | | else: |
| | | # æ·»å é¿æ±æ¶²ä½æ°æ®ä½ä¸ºåæ° |
| | | df = load_data('é龿¸¯1.csv', 'ä¸åæ°´.csv', 'é¿æ±æ¶²ä½.csv', '大鿵é.csv', 'éé¨é.csv') |
| | | if df is not None: |
| | | # æ·»å æ¶é´ç¹å¾ |
| | | df['hour'] = df['DateTime'].dt.hour |
| | | df['weekday'] = df['DateTime'].dt.dayofweek |
| | | df['month'] = df['DateTime'].dt.month |
| | | |
| | | # æ·»å ååç¹å¾ |
| | | df = add_lunar_features(df) |
| | | |
| | | # æ·»å å»¶è¿ç¹å¾ - ä½¿ç¨æ¹è¿ç彿° |
| | | delay_hours = [1,2,3,4,6,12,24,36,48,60,72,84,96,108,120] |
| | | df = batch_create_delay_features(df, delay_hours) |
| | | |
| | | # æ·»å ç»è®¡ç¹å¾ |
| | | df['mean_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).mean() |
| | | df['mean_3d_up'] = df['upstream_smooth'].rolling(window=72, min_periods=1).mean() |
| | | df['std_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).std() |
| | | df['max_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).max() |
| | | df['min_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).min() |
| | | |
| | | # æ·»å 䏿¸¸ç度çååçç¹å¾ |
| | | df['upstream_change_rate_1h'] = df['upstream_smooth'].pct_change(1) |
| | | df['upstream_change_rate_24h'] = df['upstream_smooth'].pct_change(24) |
| | | |
| | | df['mean_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).mean() |
| | | df['mean_3d_down'] = df['downstream_smooth'].rolling(window=72, min_periods=1).mean() |
| | | df['std_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).std() |
| | | df['max_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).max() |
| | | df['min_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).min() |
| | | |
| | | # æ·»å 䏿¸¸ç度çååçç¹å¾ |
| | | df['downstream_change_rate_1h'] = df['downstream_smooth'].pct_change(1) |
| | | df['downstream_change_rate_24h'] = df['downstream_smooth'].pct_change(24) |
| | | |
| | | # æ·»å ä¸ä¸æ¸¸ç度差å¼ç¹å¾ |
| | | df['salinity_diff'] = df['upstream_smooth'] - df['downstream_smooth'] |
| | | df['salinity_diff_1h'] = df['salinity_diff'].diff(1) |
| | | df['salinity_diff_24h'] = df['salinity_diff'].diff(24) |
| | | |
| | | # æ·»å ç度æ¯çç¹å¾ |
| | | df['salinity_ratio'] = df['upstream_smooth'] / df['downstream_smooth'] |
| | | df['salinity_ratio_1h'] = df['salinity_ratio'].diff(1) |
| | | df['salinity_ratio_24h'] = df['salinity_ratio'].diff(24) |
| | | |
| | | # æ·»å æ°´ä½ç»è®¡ç¹å¾ï¼å¦ææ°´ä½æ°æ®åå¨ï¼ |
| | | if 'water_level' in df.columns: |
| | | # é¦å
å建水ä½å¹³æ»ç¹å¾ |
| | | if 'water_level_smooth' not in df.columns: |
| | | df['water_level_smooth'] = df['water_level'].rolling(window=24, min_periods=1, center=True).mean() |
| | | df['water_level_smooth'] = df['water_level_smooth'].fillna(df['water_level']) |
| | | |
| | | # æ·»å æ°´ä½ç»è®¡ç¹å¾ |
| | | df['mean_1d_water_level'] = df['water_level_smooth'].rolling(window=24, min_periods=1).mean() |
| | | df['mean_3d_water_level'] = df['water_level_smooth'].rolling(window=72, min_periods=1).mean() |
| | | df['std_1d_water_level'] = df['water_level_smooth'].rolling(window=24, min_periods=1).std() |
| | | df['max_1d_water_level'] = df['water_level_smooth'].rolling(window=24, min_periods=1).max() |
| | | df['min_1d_water_level'] = df['water_level_smooth'].rolling(window=24, min_periods=1).min() |
| | | |
| | | # è®¡ç®æ°´ä½ååç |
| | | df['water_level_change_1h'] = df['water_level_smooth'].diff(1) |
| | | df['water_level_change_24h'] = df['water_level_smooth'].diff(24) |
| | | |
| | | # è®¡ç®æ°´ä½ä¸ç度çç¸å
³ç¹å¾ |
| | | df['water_level_sal_ratio'] = df['water_level_smooth'] / df['downstream_smooth'] |
| | | df['water_level_sal_ratio_1h'] = df['water_level_sal_ratio'].diff(1) |
| | | df['water_level_sal_ratio_24h'] = df['water_level_sal_ratio'].diff(24) |
| | | |
| | | # æ·»å æ°´ä½ä¸ç度ç交äºç¹å¾ |
| | | df['water_level_sal_interaction'] = df['water_level_smooth'] * df['downstream_smooth'] |
| | | df['water_level_sal_interaction_1h'] = df['water_level_sal_interaction'].diff(1) |
| | | df['water_level_sal_interaction_24h'] = df['water_level_sal_interaction'].diff(24) |
| | | |
| | | print("æ°´ä½ç¹å¾å·²æ·»å ") |
| | | |
| | | # ä¿åå¤çåçæ°æ® |
| | | save_processed_data(df) |
| | | |
| | | if df is not None: |
| | | run_gui() |
| | | else: |
| | | print("æ°æ®å è½½å¤±è´¥ï¼æ æ³è¿è¡é¢æµã") |