¶Ô±ÈÐÂÎļþ |
| | |
| | | # xgboostä¿®æ¹çæ¬ |
| | | import os |
| | | import pickle |
| | | import pandas as pd |
| | | import numpy as np |
| | | from numpy.lib.stride_tricks import sliding_window_view |
| | | import tkinter as tk |
| | | import tkinter.font as tkfont |
| | | from tkinter import ttk |
| | | from datetime import timedelta |
| | | from time import time |
| | | import matplotlib.pyplot as plt |
| | | from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk |
| | | from xgboost import XGBRegressor |
| | | from lunardate import LunarDate |
| | | from sklearn.model_selection import train_test_split |
| | | from sklearn.metrics import mean_squared_error, mean_absolute_error |
| | | import matplotlib |
| | | |
| | | # é
ç½® matplotlib 䏿æ¾ç¤º |
| | | matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'SimSun', 'Arial Unicode MS'] |
| | | matplotlib.rcParams['axes.unicode_minus'] = False |
| | | matplotlib.rcParams['font.family'] = 'sans-serif' |
| | | |
| | | # å
¨å±ç¼ååéåç¹å¾åç§° |
| | | cached_model = None |
| | | last_training_time = None |
| | | feature_columns = None |
| | | current_view = {'xlim': None, 'ylim': None, 'ylim2': None} # ç¨äºåå¨å½åå¾è¡¨è§å¾ |
| | | event_cids = [] # ç¨äºåå¨äºä»¶è¿æ¥çID |
| | | |
| | | # æ°æ®å è½½ä¸é¢å¤ç彿° |
| | | # ------------------------------- |
| | | def load_data(upstream_file, downstream_file, river_level_file=None, flow_file=None): |
| | | """ |
| | | å è½½ææç¸å
³æ°æ®å¹¶è¿è¡æ°æ®è´¨éå¤ç |
| | | """ |
| | | try: |
| | | # 读å䏿¸¸å䏿¸¸æ°æ® |
| | | upstream_df = pd.read_csv(upstream_file) |
| | | downstream_df = pd.read_csv(downstream_file) |
| | | except FileNotFoundError: |
| | | print("æä»¶æªæ¾å°ï¼è¯·æ£æ¥è·¯å¾") |
| | | return None |
| | | |
| | | # ç¡®ä¿ååä¸è´ |
| | | upstream_df.columns = ['DateTime', 'TagName', 'Value'] |
| | | downstream_df.columns = ['DateTime', 'TagName', 'Value'] |
| | | |
| | | # è½¬æ¢æ¶é´æ ¼å¼å¹¶è®¾ç½®ä¸ºç´¢å¼ |
| | | upstream_df['DateTime'] = pd.to_datetime(upstream_df['DateTime']) |
| | | downstream_df['DateTime'] = pd.to_datetime(downstream_df['DateTime']) |
| | | |
| | | # 设置DateTimeä¸ºç´¢å¼ |
| | | upstream_df.set_index('DateTime', inplace=True) |
| | | downstream_df.set_index('DateTime', inplace=True) |
| | | |
| | | # æ°å¼å¤ç - ä½¿ç¨æ´ç¨³å¥çè½¬æ¢æ¹æ³ |
| | | for df in [upstream_df, downstream_df]: |
| | | df['Value'] = pd.to_numeric(df['Value'], errors='coerce') |
| | | # 使ç¨IQRæ¹æ³æ£æµå¼å¸¸å¼ |
| | | Q1 = df['Value'].quantile(0.25) |
| | | Q3 = df['Value'].quantile(0.75) |
| | | IQR = Q3 - Q1 |
| | | lower_bound = Q1 - 1.5 * IQR |
| | | upper_bound = Q3 + 1.5 * IQR |
| | | # å°å¼å¸¸å¼æ¿æ¢ä¸ºè¾¹çå¼ |
| | | df.loc[df['Value'] < lower_bound, 'Value'] = lower_bound |
| | | df.loc[df['Value'] > upper_bound, 'Value'] = upper_bound |
| | | |
| | | # å¤çä½ç度å¼ï¼å°äº5ï¼ |
| | | # ä¸ç´æ¥è¿æ»¤ï¼èæ¯æ 记为NaNå¹¶ä½¿ç¨æå¼æ¹æ³å¤ç |
| | | for df in [upstream_df, downstream_df]: |
| | | # æ è®°ä½ç度å¼ä¸ºNaN |
| | | low_salinity_mask = df['Value'] < 5 |
| | | if low_salinity_mask.any(): |
| | | print(f"åç°{low_salinity_mask.sum()}个ä½ç度å¼ï¼<5ï¼ï¼å°ä½¿ç¨æå¼å¤ç") |
| | | df.loc[low_salinity_mask, 'Value'] = np.nan |
| | | |
| | | # 对çæç¼ºå¤±ä½¿ç¨çº¿æ§æå¼ |
| | | df['Value'] = df['Value'].interpolate(method='linear', limit=4) |
| | | |
| | | # 对è¾é¿æç¼ºå¤±ä½¿ç¨åºäºæ¶é´çæå¼ |
| | | df['Value'] = df['Value'].interpolate(method='time', limit=24) |
| | | |
| | | # 对å©ä½ç¼ºå¤±ä½¿ç¨åååååå¡«å
|
| | | df['Value'] = df['Value'].fillna(method='ffill').fillna(method='bfill') |
| | | |
| | | # ä½¿ç¨æ»å¨ä¸ä½æ°å¹³æ»å¤çåçå¼ |
| | | df['Value'] = df['Value'].rolling(window=12, center=True, min_periods=1).median() |
| | | |
| | | # éå½åValueå |
| | | upstream_df = upstream_df.rename(columns={'Value': 'upstream'})[['upstream']] |
| | | downstream_df = downstream_df.rename(columns={'Value': 'downstream'})[['downstream']] |
| | | |
| | | # åå¹¶æ°æ® |
| | | merged_df = pd.merge(upstream_df, downstream_df, left_index=True, right_index=True, how='inner') |
| | | |
| | | # å è½½é¿æ±æ°´ä½æ°æ®ï¼å¦ææä¾ï¼ |
| | | if river_level_file: |
| | | try: |
| | | river_level_df = pd.read_csv(river_level_file) |
| | | print(f"æåè¯»åæ°´ä½æ°æ®æä»¶: {river_level_file}") |
| | | |
| | | # ç¡®ä¿ååä¸è´ |
| | | if len(river_level_df.columns) >= 3: |
| | | river_level_df.columns = ['DateTime', 'TagName', 'Value'] |
| | | elif len(river_level_df.columns) == 2: |
| | | river_level_df.columns = ['DateTime', 'Value'] |
| | | river_level_df['TagName'] = 'water_level' |
| | | |
| | | # æ°æ®å¤ç |
| | | river_level_df['DateTime'] = pd.to_datetime(river_level_df['DateTime']) |
| | | river_level_df.set_index('DateTime', inplace=True) |
| | | river_level_df['Value'] = pd.to_numeric(river_level_df['Value'], errors='coerce') |
| | | |
| | | # 使ç¨IQRæ¹æ³å¤çå¼å¸¸å¼ |
| | | Q1 = river_level_df['Value'].quantile(0.25) |
| | | Q3 = river_level_df['Value'].quantile(0.75) |
| | | IQR = Q3 - Q1 |
| | | lower_bound = Q1 - 1.5 * IQR |
| | | upper_bound = Q3 + 1.5 * IQR |
| | | river_level_df.loc[river_level_df['Value'] < lower_bound, 'Value'] = lower_bound |
| | | river_level_df.loc[river_level_df['Value'] > upper_bound, 'Value'] = upper_bound |
| | | |
| | | # éå½åå¹¶ä¿çéè¦çå |
| | | river_level_df = river_level_df.rename(columns={'Value': 'water_level'})[['water_level']] |
| | | |
| | | # åå¹¶å°ä¸»æ°æ®æ¡ |
| | | merged_df = pd.merge(merged_df, river_level_df, left_index=True, right_index=True, how='left') |
| | | |
| | | # å¯¹æ°´ä½æ°æ®è¿è¡æå¼å¤ç |
| | | merged_df['water_level'] = merged_df['water_level'].interpolate(method='time', limit=24) |
| | | merged_df['water_level'] = merged_df['water_level'].fillna(method='ffill').fillna(method='bfill') |
| | | |
| | | # å建平æ»çæ°´ä½æ°æ® |
| | | merged_df['water_level_smooth'] = merged_df['water_level'].rolling(window=24, min_periods=1, center=True).mean() |
| | | |
| | | # æ·»å æ°´ä½è¶å¿ç¹å¾ |
| | | merged_df['water_level_trend_1h'] = merged_df['water_level_smooth'].diff(1) |
| | | merged_df['water_level_trend_24h'] = merged_df['water_level_smooth'].diff(24) |
| | | |
| | | print(f"æ°´ä½æ°æ®å è½½æåï¼èå´: {merged_df['water_level'].min()} - {merged_df['water_level'].max()}") |
| | | except Exception as e: |
| | | print(f"æ°´ä½æ°æ®å 载失败: {str(e)}") |
| | | |
| | | # å è½½å¤§éæµéæ°æ®ï¼å¦ææä¾ï¼ |
| | | if flow_file: |
| | | try: |
| | | flow_df = pd.read_csv(flow_file) |
| | | print(f"æåè¯»åæµéæ°æ®æä»¶: {flow_file}") |
| | | |
| | | # ç¡®ä¿ååä¸è´ |
| | | if len(flow_df.columns) >= 3: |
| | | flow_df.columns = ['DateTime', 'TagName', 'Value'] |
| | | elif len(flow_df.columns) == 2: |
| | | flow_df.columns = ['DateTime', 'Value'] |
| | | flow_df['TagName'] = 'flow' |
| | | |
| | | # æ°æ®å¤ç |
| | | flow_df['DateTime'] = pd.to_datetime(flow_df['DateTime']) |
| | | flow_df.set_index('DateTime', inplace=True) |
| | | flow_df['Value'] = pd.to_numeric(flow_df['Value'], errors='coerce') |
| | | |
| | | # 使ç¨IQRæ¹æ³å¤çå¼å¸¸å¼ |
| | | Q1 = flow_df['Value'].quantile(0.25) |
| | | Q3 = flow_df['Value'].quantile(0.75) |
| | | IQR = Q3 - Q1 |
| | | lower_bound = Q1 - 1.5 * IQR |
| | | upper_bound = Q3 + 1.5 * IQR |
| | | flow_df.loc[flow_df['Value'] < lower_bound, 'Value'] = lower_bound |
| | | flow_df.loc[flow_df['Value'] > upper_bound, 'Value'] = upper_bound |
| | | |
| | | # éå½åå¹¶ä¿çéè¦çå |
| | | flow_df = flow_df.rename(columns={'Value': 'flow'})[['flow']] |
| | | |
| | | # åå¹¶å°ä¸»æ°æ®æ¡ |
| | | merged_df = pd.merge(merged_df, flow_df, left_index=True, right_index=True, how='left') |
| | | |
| | | # 对æµéæ°æ®è¿è¡æå¼å¤ç |
| | | merged_df['flow'] = merged_df['flow'].interpolate(method='time', limit=24) |
| | | merged_df['flow'] = merged_df['flow'].fillna(method='ffill').fillna(method='bfill') |
| | | |
| | | # å建平æ»çæµéæ°æ® |
| | | merged_df['flow_smooth'] = merged_df['flow'].rolling(window=24, min_periods=1, center=True).mean() |
| | | |
| | | # æ·»å æµéè¶å¿ç¹å¾ |
| | | merged_df['flow_trend_1h'] = merged_df['flow_smooth'].diff(1) |
| | | merged_df['flow_trend_24h'] = merged_df['flow_smooth'].diff(24) |
| | | |
| | | # æ·»å æµéç»è®¡ç¹å¾ |
| | | merged_df['mean_1d_flow'] = merged_df['flow_smooth'].rolling(window=24, min_periods=1).mean() |
| | | merged_df['mean_3d_flow'] = merged_df['flow_smooth'].rolling(window=72, min_periods=1).mean() |
| | | merged_df['std_1d_flow'] = merged_df['flow_smooth'].rolling(window=24, min_periods=1).std() |
| | | |
| | | # æ·»å æµéååç¹å¾ |
| | | merged_df['flow_change_1h'] = merged_df['flow_smooth'].diff(1) |
| | | merged_df['flow_change_24h'] = merged_df['flow_smooth'].diff(24) |
| | | |
| | | # # æ·»å æµéä¸ç度æ¯çï¼ç¡®ä¿ä¸æ¸¸å¹³æ»æ°æ®å·²åå»ºï¼ |
| | | # if 'downstream_smooth' in merged_df.columns: |
| | | # merged_df['flow_sal_ratio'] = merged_df['flow_smooth'] / merged_df['downstream_smooth'] |
| | | # else: |
| | | # print("è¦å: 䏿¸¸å¹³æ»æ°æ®æªå建ï¼è·³è¿flow_sal_ratio计ç®") |
| | | |
| | | print(f"æµéæ°æ®å è½½æåï¼èå´: {merged_df['flow'].min()} - {merged_df['flow'].max()} m³/s") |
| | | except Exception as e: |
| | | print(f"æµéæ°æ®å 载失败: {str(e)}") |
| | | |
| | | # 对çåº¦æ°æ®è¿è¡æå¼åå¹³æ»å¤ç |
| | | merged_df['upstream'] = merged_df['upstream'].interpolate(method='time', limit=24) |
| | | merged_df['downstream'] = merged_df['downstream'].interpolate(method='time', limit=24) |
| | | |
| | | # 使ç¨ååååå¡«å
å¤çå©ä½çNaNå¼ |
| | | merged_df['upstream'] = merged_df['upstream'].ffill().bfill() |
| | | merged_df['downstream'] = merged_df['downstream'].ffill().bfill() |
| | | |
| | | # å建平æ»ççåº¦æ°æ® |
| | | merged_df['upstream_smooth'] = merged_df['upstream'].rolling(window=24, min_periods=1, center=True).mean() |
| | | merged_df['downstream_smooth'] = merged_df['downstream'].rolling(window=24, min_periods=1, center=True).mean() |
| | | |
| | | # æ·»å è¶å¿ç¹å¾ |
| | | merged_df['upstream_trend_1h'] = merged_df['upstream_smooth'].diff(1) |
| | | merged_df['upstream_trend_24h'] = merged_df['upstream_smooth'].diff(24) |
| | | merged_df['downstream_trend_1h'] = merged_df['downstream_smooth'].diff(1) |
| | | merged_df['downstream_trend_24h'] = merged_df['downstream_smooth'].diff(24) |
| | | |
| | | # å¡«å
NaNå¼ |
| | | merged_df['upstream_trend_1h'] = merged_df['upstream_trend_1h'].fillna(0) |
| | | merged_df['upstream_trend_24h'] = merged_df['upstream_trend_24h'].fillna(0) |
| | | merged_df['downstream_trend_1h'] = merged_df['downstream_trend_1h'].fillna(0) |
| | | merged_df['downstream_trend_24h'] = merged_df['downstream_trend_24h'].fillna(0) |
| | | |
| | | # 对ä½ç度é¨åä½¿ç¨æ´å¤§ççªå£è¿è¡å¹³æ» |
| | | low_sal_mask = merged_df['upstream'] < 50 |
| | | if low_sal_mask.any(): |
| | | merged_df.loc[low_sal_mask, 'upstream_smooth'] = merged_df.loc[low_sal_mask, 'upstream']\ |
| | | .rolling(window=48, min_periods=1, center=True).mean() |
| | | |
| | | # æ°æ®éªè¯åç»è®¡ |
| | | print("\næ°æ®è´¨éç»è®¡:") |
| | | print(f"æ»æ°æ®é: {len(merged_df)}") |
| | | print(f"䏿¸¸ç度èå´: {merged_df['upstream'].min():.2f} - {merged_df['upstream'].max():.2f}") |
| | | print(f"䏿¸¸ç度èå´: {merged_df['downstream'].min():.2f} - {merged_df['downstream'].max():.2f}") |
| | | |
| | | if 'water_level' in merged_df.columns: |
| | | print(f"æ°´ä½èå´: {merged_df['water_level'].min():.2f} - {merged_df['water_level'].max():.2f}") |
| | | print(f"æ°´ä½ç¼ºå¤±æ¯ä¾: {merged_df['water_level'].isna().mean()*100:.2f}%") |
| | | |
| | | if 'flow' in merged_df.columns: |
| | | print(f"æµéèå´: {merged_df['flow'].min():.2f} - {merged_df['flow'].max():.2f} m³/s") |
| | | print(f"æµé缺失æ¯ä¾: {merged_df['flow'].isna().mean()*100:.2f}%") |
| | | |
| | | # é置索å¼ï¼å°DateTimeä½ä¸ºå |
| | | merged_df = merged_df.reset_index() |
| | | |
| | | return merged_df |
| | | |
| | | # df = load_data('é龿¸¯1.csv', 'ä¸åæ°´.csv') |
| | | |
| | | # æµè¯ |
| | | # df = load_data('é龿¸¯1.csv', 'ä¸åæ°´.csv') |
| | | # df.to_csv('merged_data.csv', index=False) |
| | | # print(f"Merged data saved to 'merged_data.csv' successfully") |
| | | |
| | | # # ç»å¶çåº¦éæ¶é´ååå¾ |
| | | # plt.figure(figsize=(12, 6)) |
| | | # plt.plot(df['DateTime'], df['upstream_smooth'], label='䏿¸¸ç度', color='blue') |
| | | # plt.plot(df['DateTime'], df['downstream_smooth'], label='䏿¸¸ç度', color='red') |
| | | # plt.xlabel('æ¶é´') |
| | | # plt.ylabel('ç度') |
| | | # plt.title('çåº¦éæ¶é´ååå¾') |
| | | # plt.legend() |
| | | # plt.grid(True) |
| | | # plt.tight_layout() |
| | | # plt.savefig('salinity_time_series.png', dpi=300) |
| | | # plt.show() |
| | | |
| | | |
| | | # ----------------------ç¹å¾å·¥ç¨é¨å |
| | | |
| | | |
| | | # ------------------------------- |
| | | # æ·»å ååï¼æ½®æ±ï¼ç¹å¾ |
| | | # ------------------------------- |
| | | def add_lunar_features(df): |
| | | lunar_day, lunar_phase_sin, lunar_phase_cos, is_high_tide = [], [], [], [] |
| | | for dt in df['DateTime']: |
| | | ld = LunarDate.fromSolarDate(dt.year, dt.month, dt.day) |
| | | lunar_day.append(ld.day) |
| | | lunar_phase_sin.append(np.sin(2 * np.pi * ld.day / 15)) |
| | | lunar_phase_cos.append(np.cos(2 * np.pi * ld.day / 15)) |
| | | is_high_tide.append(1 if (ld.day <= 5 or (ld.day >= 16 and ld.day <= 20)) else 0) |
| | | df['lunar_day'] = lunar_day |
| | | df['lunar_phase_sin'] = lunar_phase_sin |
| | | df['lunar_phase_cos'] = lunar_phase_cos |
| | | df['is_high_tide'] = is_high_tide |
| | | return df |
| | | |
| | | |
| | | # ------------------------------- |
| | | # çæå»¶è¿ç¹å¾ï¼åéåï¼å©ç¨ shiftï¼ |
| | | # ------------------------------- |
| | | def batch_create_delay_features(df, delay_hours): |
| | | """ |
| | | ä¸ºæ°æ®æ¡ä¸çç¹å®åå建延è¿ç¹å¾ |
| | | """ |
| | | # å®ä¹éè¦å建延è¿ç¹å¾çå |
| | | target_columns = ['upstream_smooth'] |
| | | |
| | | # å建延è¿ç¹å¾ |
| | | for column in target_columns: |
| | | if column in df.columns: |
| | | for delay in delay_hours: |
| | | df[f'{column.split("_")[0]}_delay_{delay}h'] = df[column].shift(delay) |
| | | else: |
| | | print(f"è¦å: å {column} ä¸åå¨ï¼è·³è¿å建延è¿ç¹å¾") |
| | | |
| | | return df |
| | | |
| | | |
| | | # çæå
¶ä»ç¹å¾ |
| | | def generate_features(df): |
| | | """ |
| | | çæå
¶ä»ç¹å¾ï¼å
æ¬å岿°æ®ãæ¶é´ç¹å¾ãç»è®¡ç¹å¾åå¤é¨ç¹å¾ï¼å¹¶å°è¿äºç¹å¾æ·»å å°åå§DataFrameä¸ |
| | | """ |
| | | try: |
| | | # å建平æ»ççåº¦æ°æ® |
| | | df['upstream_smooth'] = df['upstream'].rolling(window=24, min_periods=1, center=True).mean() |
| | | df['downstream_smooth'] = df['downstream'].rolling(window=24, min_periods=1, center=True).mean() |
| | | |
| | | # æ¶é´ç¹å¾ |
| | | df['hour'] = df['DateTime'].dt.hour |
| | | df['weekday'] = df['DateTime'].dt.dayofweek |
| | | df['month'] = df['DateTime'].dt.month |
| | | |
| | | # æ¶é´ç¹å¾çsinåcosè½¬æ¢ |
| | | df['hour_sin'] = np.sin(2 * np.pi * df['hour'] / 24) |
| | | df['hour_cos'] = np.cos(2 * np.pi * df['hour'] / 24) |
| | | df['weekday_sin'] = np.sin(2 * np.pi * df['weekday'] / 7) |
| | | df['weekday_cos'] = np.cos(2 * np.pi * df['weekday'] / 7) |
| | | df['month_sin'] = np.sin(2 * np.pi * df['month'] / 12) |
| | | df['month_cos'] = np.cos(2 * np.pi * df['month'] / 12) |
| | | |
| | | # ç»è®¡ç¹å¾ |
| | | df['mean_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).mean() |
| | | df['mean_3d_up'] = df['upstream_smooth'].rolling(window=72, min_periods=1).mean() |
| | | df['std_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).std() |
| | | |
| | | df['mean_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).mean() |
| | | df['mean_3d_down'] = df['downstream_smooth'].rolling(window=72, min_periods=1).mean() |
| | | df['std_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).std() |
| | | |
| | | # è¶å¿ç¹å¾ |
| | | df['trend_1h_up'] = df['upstream_smooth'].diff(1) |
| | | df['trend_3h_up'] = df['upstream_smooth'].diff(3) |
| | | df['trend_6h_up'] = df['upstream_smooth'].diff(6) |
| | | df['trend_12h_up'] = df['upstream_smooth'].diff(12) |
| | | df['trend_24h_up'] = df['upstream_smooth'].diff(24) |
| | | |
| | | df['trend_1h_down'] = df['downstream_smooth'].diff(1) |
| | | df['trend_3h_down'] = df['downstream_smooth'].diff(3) |
| | | df['trend_6h_down'] = df['downstream_smooth'].diff(6) |
| | | df['trend_12h_down'] = df['downstream_smooth'].diff(12) |
| | | df['trend_24h_down'] = df['downstream_smooth'].diff(24) |
| | | |
| | | # å¤é¨ç¹å¾ï¼æ°´ä½åæµéï¼ |
| | | if 'water_level_smooth' in df.columns: |
| | | df['water_level_trend_1h'] = df['water_level_smooth'].diff(1) |
| | | df['water_level_trend_24h'] = df['water_level_smooth'].diff(24) |
| | | df['mean_1d_water_level'] = df['water_level_smooth'].rolling(window=24, min_periods=1).mean() |
| | | df['mean_3d_water_level'] = df['water_level_smooth'].rolling(window=72, min_periods=1).mean() |
| | | df['std_1d_water_level'] = df['water_level_smooth'].rolling(window=24, min_periods=1).std() |
| | | |
| | | if 'flow_smooth' in df.columns: |
| | | df['flow_trend_1h'] = df['flow_smooth'].diff(1) |
| | | df['flow_trend_24h'] = df['flow_smooth'].diff(24) |
| | | df['mean_1d_flow'] = df['flow_smooth'].rolling(window=24, min_periods=1).mean() |
| | | df['mean_3d_flow'] = df['flow_smooth'].rolling(window=72, min_periods=1).mean() |
| | | df['std_1d_flow'] = df['flow_smooth'].rolling(window=24, min_periods=1).std() |
| | | |
| | | return df |
| | | |
| | | except Exception as e: |
| | | print(f"ç¹å¾çæå¼å¸¸: {e}") |
| | | return df |
| | | |
| | | |
| | | # ------------------------------- |
| | | # åéåæé è®ç»æ ·æ¬ï¼ä¼åç¹å¾å·¥ç¨ï¼ |
| | | # ------------------------------- |
| | | def create_features_vectorized(df, look_back=168, forecast_horizon=1): |
| | | """ |
| | | åéåæé è®ç»æ ·æ¬ï¼ä½¿ç¨è¿å»7å¤©çææåå§æ°æ®æ¥é¢æµæªæ¥1天ç䏿¸¸ç度åå¼ |
| | | """ |
| | | try: |
| | | # ç¡®ä¿æ°æ®ææ¶é´æåº |
| | | df = df.sort_values('DateTime') |
| | | |
| | | # è·åæææ°å¼åï¼æé¤DateTimeåï¼ |
| | | numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist() |
| | | if 'DateTime' in numeric_columns: |
| | | numeric_columns.remove('DateTime') |
| | | |
| | | # åå§åç¹å¾åæ ç¾å表 |
| | | features = [] # xè¾å
¥ |
| | | targets = [] # yè¾åº |
| | | |
| | | # ä½¿ç¨æ»å¨çªå£åå»ºæ ·æ¬ |
| | | for i in range(len(df) - look_back - forecast_horizon + 1): |
| | | # è·å7天çç¹å¾çªå£ |
| | | window = df.iloc[i:i+look_back] |
| | | |
| | | # æåç¹å¾ - ä½¿ç¨ææåå§æ°æ® |
| | | window_features = [] |
| | | for col in numeric_columns: |
| | | # è·ååæ°æ®å¹¶å¤çNaNå¼ |
| | | col_values = window[col].fillna(method='ffill').fillna(method='bfill').values |
| | | window_features.extend(col_values) |
| | | |
| | | # æ·»å æ¶é´ç¹å¾ |
| | | current_date = window['DateTime'].iloc[-1] |
| | | window_features.extend([ |
| | | current_date.month, |
| | | current_date.day, |
| | | current_date.weekday() |
| | | ]) |
| | | |
| | | # è·åç®æ å¼ï¼æªæ¥1天ç䏿¸¸ç度åå¼ï¼ |
| | | next_day = df.iloc[i+look_back:i+look_back+24] # è·åæªæ¥24å°æ¶çæ°æ® |
| | | # å¤çç®æ å¼ä¸çNaN |
| | | target_values = next_day['downstream_smooth'].fillna(method='ffill').fillna(method='bfill').values |
| | | target = np.mean(target_values) |
| | | |
| | | # æ£æ¥ç¹å¾åç®æ 弿¯å¦ææ |
| | | if not np.any(np.isnan(window_features)) and not np.isnan(target) and not np.isinf(target): |
| | | features.append(window_features) |
| | | targets.append(target) |
| | | |
| | | if not features: |
| | | print("è¦å: æªè½çæä»»ä½ææç¹å¾") |
| | | return np.array([]), np.array([]) |
| | | |
| | | # 转æ¢ä¸ºnumpyæ°ç» |
| | | X = np.array(features) |
| | | y = np.array(targets) |
| | | |
| | | print(f"æåçæç¹å¾ç©éµï¼å½¢ç¶: {X.shape}") |
| | | return X, y |
| | | |
| | | except Exception as e: |
| | | print(f"ç¹å¾å建å¼å¸¸: {e}") |
| | | return np.array([]), np.array([]) |
| | | |
| | | def generate_prediction_features(df, current_date, look_back=168): |
| | | """ |
| | | ä¸ºé¢æµçæç¹å¾ï¼ä½¿ç¨ä¸create_features_vectorizedç¸åçç¹å¾çæé»è¾ |
| | | """ |
| | | try: |
| | | # ç¡®ä¿æ°æ®ææ¶é´æåº |
| | | df = df.sort_values('DateTime') |
| | | |
| | | # è·åæææ°å¼åï¼æé¤DateTimeåï¼ |
| | | numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist() |
| | | if 'DateTime' in numeric_columns: |
| | | numeric_columns.remove('DateTime') |
| | | |
| | | # æ¾å°å½åæ¥æå¨æ°æ®ä¸çä½ç½® |
| | | current_idx = df[df['DateTime'] <= current_date].index[-1] |
| | | |
| | | # è·åè¿å»168å°æ¶ï¼7天ï¼çæ°æ®çªå£ |
| | | if current_idx < look_back: |
| | | print(f"æ°æ®ä¸è¶³ï¼éè¦{look_back}å°æ¶çæ°æ®ï¼ä½åªæ{current_idx+1}å°æ¶") |
| | | return None |
| | | |
| | | window = df.iloc[current_idx-look_back+1:current_idx+1] |
| | | |
| | | # æåç¹å¾ - ä½¿ç¨ææåå§æ°æ® |
| | | features = [] |
| | | for col in numeric_columns: |
| | | # ç´æ¥ä½¿ç¨åå§æ°æ®ä½ä¸ºç¹å¾ |
| | | features.extend(window[col].values) |
| | | |
| | | # æ·»å æ¶é´ç¹å¾ |
| | | features.extend([ |
| | | current_date.month, |
| | | current_date.day, |
| | | current_date.weekday() |
| | | ]) |
| | | |
| | | return np.array(features) |
| | | |
| | | except Exception as e: |
| | | print(f"颿µç¹å¾çæå¼å¸¸: {e}") |
| | | return None |
| | | |
| | | |
| | | # ------------------------------- |
| | | # è·å模ååç¡®åº¦ææ |
| | | # ------------------------------- |
| | | def get_model_metrics(): |
| | | """è·åä¿å卿¨¡åç¼åä¸çåç¡®åº¦ææ """ |
| | | model_cache_file = 'salinity_model.pkl' |
| | | if os.path.exists(model_cache_file): |
| | | try: |
| | | with open(model_cache_file, 'rb') as f: |
| | | model_data = pickle.load(f) |
| | | return { |
| | | 'rmse': model_data.get('rmse', None), |
| | | 'mae': model_data.get('mae', None) |
| | | } |
| | | except Exception as e: |
| | | print(f"è·åæ¨¡åææ 失败: {e}") |
| | | return None |
| | | |
| | | # ------------------------------- |
| | | # 模åè®ç»ä¸é¢æµï¼å±ç¤ºéªè¯å确度ï¼RMSE, MAEï¼ |
| | | # ------------------------------- |
| | | def train_and_predict(df, start_time, force_retrain=False): |
| | | global cached_model, last_training_time |
| | | model_cache_file = 'salinity_model.pkl' |
| | | model_needs_training = True |
| | | |
| | | if os.path.exists(model_cache_file) and force_retrain: |
| | | try: |
| | | os.remove(model_cache_file) |
| | | print("å·²å 餿§æ¨¡åç¼åï¼å¼ºå¶éæ°è®ç»ï¼") |
| | | except Exception as e: |
| | | print("å é¤ç¼åå¼å¸¸:", e) |
| | | |
| | | # è·åè®ç»æ°æ®ï¼ä½¿ç¨å¼å§æ¶é´ä¹åçæææ°æ®ï¼ |
| | | train_df = df[df['DateTime'] < start_time].copy() |
| | | if len(train_df) < 100: |
| | | print(f"è®ç»æ°æ®ä¸è¶³ï¼éè¦è³å°100ä¸ªæ ·æ¬ï¼å½ååªæ{len(train_df)}ä¸ªæ ·æ¬") |
| | | return None, None, None, None |
| | | |
| | | print(f"ä½¿ç¨ {train_df['DateTime'].min()} å° {train_df['DateTime'].max()} çæ°æ®è¿è¡è®ç»") |
| | | print(f"è®ç»æ°æ®æ»é: {len(train_df)} ä¸ªæ ·æ¬") |
| | | |
| | | # å建æµè¯ç¹å¾ï¼æ£æ¥å½åç¹å¾ç»´åº¦ |
| | | test_X, test_y = create_features_vectorized(train_df, look_back=168, forecast_horizon=1) |
| | | if test_X is None or test_y is None: |
| | | print("ç¹å¾çæå¤±è´¥") |
| | | return None, None, None, None |
| | | |
| | | current_feature_dim = test_X.shape[1] if len(test_X) > 0 else 0 |
| | | print(f"å½åç¹å¾ç»´åº¦: {current_feature_dim}") |
| | | |
| | | cached_feature_dim = None |
| | | |
| | | if not force_retrain and cached_model is not None and last_training_time is not None: |
| | | if last_training_time >= train_df['DateTime'].max(): |
| | | try: |
| | | cached_feature_dim = cached_model.n_features_in_ |
| | | print(f"ç¼å模åç¹å¾ç»´åº¦: {cached_feature_dim}") |
| | | |
| | | if cached_feature_dim == current_feature_dim: |
| | | model_needs_training = False |
| | | print(f"使ç¨ç¼å模åï¼è®ç»æ¶é´: {last_training_time}") |
| | | else: |
| | | print(f"ç¹å¾ç»´åº¦ä¸å¹é
ï¼ç¼å模å: {cached_feature_dim}ï¼å½å: {current_feature_dim}ï¼ï¼éè¦éæ°è®ç»") |
| | | except Exception as e: |
| | | print(f"æ£æ¥æ¨¡åç¹å¾ç»´åº¦å¤±è´¥: {e}") |
| | | elif not force_retrain and os.path.exists(model_cache_file): |
| | | try: |
| | | with open(model_cache_file, 'rb') as f: |
| | | model_data = pickle.load(f) |
| | | cached_model = model_data['model'] |
| | | last_training_time = model_data['training_time'] |
| | | |
| | | try: |
| | | cached_feature_dim = cached_model.n_features_in_ |
| | | print(f"æä»¶ç¼å模åç¹å¾ç»´åº¦: {cached_feature_dim}") |
| | | if cached_feature_dim == current_feature_dim: |
| | | if last_training_time >= train_df['DateTime'].max(): |
| | | model_needs_training = False |
| | | print(f"仿件å 载模åï¼è®ç»æ¶é´: {last_training_time}") |
| | | else: |
| | | print(f"ç¹å¾ç»´åº¦ä¸å¹é
ï¼æä»¶æ¨¡å: {cached_feature_dim}ï¼å½å: {current_feature_dim}ï¼ï¼éè¦éæ°è®ç»") |
| | | except Exception as e: |
| | | print(f"æ£æ¥æ¨¡åç¹å¾ç»´åº¦å¤±è´¥: {e}") |
| | | except Exception as e: |
| | | print("å 载模å失败:", e) |
| | | |
| | | if model_needs_training: |
| | | print("å¼å§è®ç»æ°æ¨¡å...") |
| | | start_train = time() |
| | | |
| | | # çæç¹å¾ |
| | | X, y = create_features_vectorized(train_df, look_back=168, forecast_horizon=1) |
| | | if X is None or y is None: |
| | | print("ç¹å¾çæå¤±è´¥") |
| | | return None, None, None, None |
| | | |
| | | if len(X) == 0 or len(y) == 0: |
| | | print("æ ·æ¬çæä¸è¶³ï¼è®ç»ç»æ¢") |
| | | return None, None, None, None |
| | | |
| | | print(f"è®ç»æ ·æ¬æ°é: {X.shape[0]}, ç¹å¾ç»´åº¦: {X.shape[1]}") |
| | | |
| | | # ææ¶é´é¡ºåºååè®ç»éåéªè¯éï¼ä½¿ç¨æå10%çæ°æ®ä½ä¸ºéªè¯éï¼ |
| | | split_idx = int(len(X) * 0.9) |
| | | X_train, X_val = X[:split_idx], X[split_idx:] |
| | | y_train, y_val = y[:split_idx], y[split_idx:] |
| | | |
| | | print(f"è®ç»é大å°: {len(X_train)}, éªè¯é大å°: {len(X_val)}") |
| | | |
| | | # å建模å |
| | | model = XGBRegressor( |
| | | n_estimators=200, |
| | | learning_rate=0.1, |
| | | max_depth=6, |
| | | min_child_weight=2, |
| | | subsample=0.8, |
| | | colsample_bytree=0.8, |
| | | gamma=0.1, |
| | | reg_alpha=0.1, |
| | | reg_lambda=1.0, |
| | | n_jobs=-1, |
| | | random_state=42, |
| | | early_stopping_rounds=10 |
| | | ) |
| | | |
| | | try: |
| | | model.fit(X_train, y_train, |
| | | eval_set=[(X_val, y_val)], |
| | | eval_metric='rmse', |
| | | verbose=False) |
| | | |
| | | # å¨éªè¯éä¸è®¡ç® RMSE å MAE |
| | | y_val_pred = model.predict(X_val) |
| | | rmse = np.sqrt(mean_squared_error(y_val, y_val_pred)) |
| | | mae = mean_absolute_error(y_val, y_val_pred) |
| | | print(f"éªè¯é RMSE: {rmse:.4f}, MAE: {mae:.4f}") |
| | | |
| | | # ç¹å¾éè¦æ§åæ |
| | | feature_importance = model.feature_importances_ |
| | | sorted_idx = np.argsort(feature_importance)[::-1] |
| | | |
| | | # çæç¹å¾åç§° |
| | | feature_names = [] |
| | | numeric_columns = train_df.select_dtypes(include=[np.number]).columns.tolist() |
| | | if 'DateTime' in numeric_columns: |
| | | numeric_columns.remove('DateTime') |
| | | |
| | | # 为æ¯ä¸ªæ°å¼åæ·»å ç¹å¾åç§° |
| | | for col in numeric_columns: |
| | | feature_names.extend([f'{col}_t-{i}' for i in range(168)]) |
| | | |
| | | # æ·»å æ¶é´ç¹å¾åç§° |
| | | feature_names.extend(['month', 'day', 'weekday']) |
| | | |
| | | # ç¡®ä¿ç¹å¾åç§°æ°éä¸éè¦æ§æ°ç»é¿åº¦å¹é
|
| | | if len(feature_names) != len(feature_importance): |
| | | print(f"è¦å: ç¹å¾åç§°æ°é({len(feature_names)})ä¸éè¦æ§æ°ç»é¿åº¦({len(feature_importance)})ä¸å¹é
") |
| | | feature_names = feature_names[:len(feature_importance)] |
| | | |
| | | # æå°å10个éè¦ç¹å¾ |
| | | print("\nTop 10 éè¦ç¹å¾:") |
| | | for i in range(min(10, len(sorted_idx))): |
| | | print(f"{i+1}. {feature_names[sorted_idx[i]]}: {feature_importance[sorted_idx[i]]:.6f}") |
| | | |
| | | last_training_time = start_time |
| | | cached_model = model |
| | | |
| | | with open(model_cache_file, 'wb') as f: |
| | | pickle.dump({ |
| | | 'model': model, |
| | | 'training_time': last_training_time, |
| | | 'feature_columns': feature_names, |
| | | 'rmse': rmse, |
| | | 'mae': mae, |
| | | 'feature_dim': current_feature_dim |
| | | }, f) |
| | | print(f"模åè®ç»å®æï¼èæ¶: {time() - start_train:.2f}ç§ï¼ç¹å¾ç»´åº¦: {current_feature_dim}") |
| | | except Exception as e: |
| | | print("模åè®ç»å¼å¸¸:", e) |
| | | return None, None, None, None |
| | | else: |
| | | model = cached_model |
| | | |
| | | # 颿µé¨å |
| | | try: |
| | | # åå§ååå¨é¢æµç»æçå表 |
| | | future_dates = [start_time + timedelta(days=i) for i in range(5)] |
| | | predictions = np.zeros(5) |
| | | |
| | | # åå»ºé¢æµæéçç¹å¾ç©éµ |
| | | X_pred = [] |
| | | for i in range(5): |
| | | current_date = future_dates[i] |
| | | features = generate_prediction_features(df, current_date, look_back=168) |
| | | if features is None: |
| | | print(f"çæé¢æµç¹å¾å¤±è´¥: {current_date}") |
| | | return None, None, None, None |
| | | X_pred.append(features) |
| | | |
| | | # æ¹é颿µ |
| | | X_pred = np.array(X_pred) |
| | | predictions = model.predict(X_pred) |
| | | |
| | | # 计ç®é¢æµç置信åºé´ |
| | | if model_needs_training: |
| | | # 使ç¨è®ç»æ¶çéªè¯é误差 |
| | | y_train_pred = model.predict(X_train) |
| | | train_std = np.std(y_train - y_train_pred) |
| | | else: |
| | | # ä½¿ç¨æ¨¡åç¼åä¸çRMSEä½ä¸ºè¯¯å·®ä¼°è®¡ |
| | | try: |
| | | with open(model_cache_file, 'rb') as f: |
| | | model_data = pickle.load(f) |
| | | train_std = model_data.get('rmse', 1.0) |
| | | except: |
| | | train_std = 1.0 |
| | | |
| | | prediction_intervals = np.array([ |
| | | predictions - 1.96 * train_std, |
| | | predictions + 1.96 * train_std |
| | | ]) |
| | | |
| | | return future_dates, predictions, model, prediction_intervals |
| | | except Exception as e: |
| | | print("颿µè¿ç¨å¼å¸¸:", e) |
| | | return None, None, None, None |
| | | |
| | | # ------------------------------- |
| | | # GUIçé¢é¨å |
| | | # ------------------------------- |
| | | def run_gui(): |
| | | def configure_gui_fonts(): |
| | | font_names = ['微软é
é»', 'Microsoft YaHei', 'SimSun', 'SimHei'] |
| | | for font_name in font_names: |
| | | try: |
| | | default_font = tkfont.nametofont("TkDefaultFont") |
| | | default_font.configure(family=font_name) |
| | | text_font = tkfont.nametofont("TkTextFont") |
| | | text_font.configure(family=font_name) |
| | | fixed_font = tkfont.nametofont("TkFixedFont") |
| | | fixed_font.configure(family=font_name) |
| | | return True |
| | | except Exception as e: |
| | | continue |
| | | return False |
| | | |
| | | def on_predict(): |
| | | try: |
| | | predict_start = time() |
| | | status_label.config(text="颿µä¸...") |
| | | root.update() |
| | | |
| | | # æ¸
çä¹åçå¾è¡¨åäºä»¶è¿æ¥ |
| | | ax.clear() |
| | | # ç§»é¤ææäºä»¶è¿æ¥ |
| | | for cid in event_cids: |
| | | canvas.mpl_disconnect(cid) |
| | | event_cids.clear() |
| | | |
| | | # æ£æ¥å¹¶æ¸
ç第äºä¸ªè½´ |
| | | for ax_in_fig in fig.get_axes(): |
| | | if ax_in_fig != ax: |
| | | ax_in_fig.remove() |
| | | |
| | | start_time_dt = pd.to_datetime(entry.get()) |
| | | force_retrain = retrain_var.get() |
| | | future_dates, predictions, model, prediction_intervals = train_and_predict(df, start_time_dt, force_retrain) |
| | | if future_dates is None or predictions is None: |
| | | status_label.config(text="颿µå¤±è´¥") |
| | | return |
| | | |
| | | # è·åå¹¶æ¾ç¤ºæ¨¡ååç¡®åº¦ææ |
| | | model_metrics = get_model_metrics() |
| | | if model_metrics: |
| | | metrics_text = f"模åå确度 - RMSE: {model_metrics['rmse']:.4f}, MAE: {model_metrics['mae']:.4f}" |
| | | metrics_label.config(text=metrics_text) |
| | | |
| | | # å建åyè½´å¾è¡¨ |
| | | ax2 = None |
| | | has_water_level = 'water_level' in df.columns and 'water_level_smooth' in df.columns |
| | | if has_water_level: |
| | | try: |
| | | ax2 = ax.twinx() |
| | | except Exception as e: |
| | | print(f"å建åy轴失败: {e}") |
| | | ax2 = None |
| | | |
| | | # ç»å¶å岿°æ®ï¼æè¿ 120 å¤©ï¼ |
| | | history_end = min(start_time_dt, df['DateTime'].max()) |
| | | history_start = history_end - timedelta(days=120) |
| | | hist_data = df[(df['DateTime'] >= history_start) & (df['DateTime'] <= history_end)] |
| | | |
| | | # ç¡®ä¿æ°æ®ä¸ä¸ºç©º |
| | | if len(hist_data) == 0: |
| | | status_label.config(text="é误: æéæ¶é´èå´å
没æå岿°æ®") |
| | | return |
| | | |
| | | # ç»å¶åºæ¬æ°æ® |
| | | ax.plot(hist_data['DateTime'], hist_data['downstream_smooth'], |
| | | label='ä¸åæ°´(䏿¸¸)ç度', color='blue', linewidth=1.5) |
| | | ax.plot(hist_data['DateTime'], hist_data['upstream_smooth'], |
| | | label='é龿¸¯(䏿¸¸)ç度', color='purple', linewidth=1.5, alpha=0.7) |
| | | |
| | | # ç»å¶æ°´ä½æ°æ®ï¼å¦ææï¼ |
| | | if ax2 is not None and has_water_level: |
| | | try: |
| | | # æ£æ¥æ°´ä½æ°æ®æ¯å¦æè¶³å¤çéNaNå¼ |
| | | valid_water_level = hist_data['water_level_smooth'].dropna() |
| | | if len(valid_water_level) > 10: # è³å°æ10个ææå¼ |
| | | # åªæ¾ç¤ºå°è¾å
¥æ¶å»çæ°´ä½æ°æ® |
| | | water_level_data = hist_data[hist_data['DateTime'] <= start_time_dt] |
| | | ax2.plot(water_level_data['DateTime'], water_level_data['water_level_smooth'], |
| | | label='é¿æ±æ°´ä½', color='green', linewidth=1.5, linestyle='--') |
| | | ax2.set_ylabel('æ°´ä½', color='green') |
| | | ax2.tick_params(axis='y', labelcolor='green') |
| | | else: |
| | | print("æ°´ä½æ°æ®ææå¼ä¸è¶³ï¼è·³è¿æ°´ä½å¾") |
| | | except Exception as e: |
| | | print(f"ç»å¶æ°´ä½æ°æ®æ¶åºé: {e}") |
| | | |
| | | # ç»å¶é¢æµæ°æ® |
| | | if len(future_dates) > 0 and len(predictions) > 0: |
| | | ax.plot(future_dates, predictions, marker='o', linestyle='--', |
| | | label='éå½é¢æµç度', color='red', linewidth=2) |
| | | |
| | | # æ·»å 颿µç置信åºé´ |
| | | if prediction_intervals is not None: |
| | | ax.fill_between(future_dates, prediction_intervals[0], prediction_intervals[1], |
| | | color='red', alpha=0.2, label='95% 置信åºé´') |
| | | |
| | | # ç»å¶å®é
æ°æ®(妿æ) |
| | | actual_data = df[(df['DateTime'] >= start_time_dt) & (df['DateTime'] <= future_dates[-1])] |
| | | actual_values = None |
| | | |
| | | if not actual_data.empty: |
| | | actual_values = [] |
| | | # è·åä¸é¢æµæ¥æææ¥è¿çå®é
æ°æ® |
| | | for pred_date in future_dates: |
| | | # åªè·å卿°æ®èå´å
çå®é
å¼ |
| | | if pred_date <= df['DateTime'].max(): |
| | | closest_idx = np.argmin(np.abs(actual_data['DateTime'] - pred_date)) |
| | | actual_values.append(actual_data['downstream_smooth'].iloc[closest_idx]) |
| | | else: |
| | | # 对äºè¶
åºæ°æ®èå´çæ¥æï¼ä½¿ç¨None表示æ å®é
å¼ |
| | | actual_values.append(None) |
| | | |
| | | # ç»å¶å®é
ç度æ²çº¿ï¼åªç»å¶æå®é
å¼çç¹ï¼ |
| | | valid_dates = [date for date, val in zip(future_dates, actual_values) if val is not None] |
| | | valid_values = [val for val in actual_values if val is not None] |
| | | if valid_dates and valid_values: |
| | | ax.plot(valid_dates, valid_values, marker='s', linestyle='-', |
| | | label='å®é
ç度', color='orange', linewidth=2) |
| | | |
| | | # 设置å¾è¡¨æ é¢åæ ç¾ |
| | | ax.set_xlabel('æ¥æ') |
| | | ax.set_ylabel('ç度') |
| | | ax.set_title(f"ä» {start_time_dt.strftime('%Y-%m-%d %H:%M:%S')} å¼å§çéå½åæ¥çåº¦é¢æµ") |
| | | |
| | | # 设置å¾ä¾å¹¶åºç¨ç´§åå¸å± |
| | | if ax2 is not None: |
| | | try: |
| | | lines1, labels1 = ax.get_legend_handles_labels() |
| | | lines2, labels2 = ax2.get_legend_handles_labels() |
| | | if lines2: # ç¡®ä¿æ°´ä½æ°æ®å·²ç»å¶ |
| | | # åå¹¶å¾ä¾ï¼é¿å
éå¤ |
| | | all_lines = lines1 + lines2 |
| | | all_labels = labels1 + labels2 |
| | | # ç§»é¤éå¤çæ ç¾ |
| | | unique_labels = [] |
| | | unique_lines = [] |
| | | for label, line in zip(all_labels, all_lines): |
| | | if label not in unique_labels: |
| | | unique_labels.append(label) |
| | | unique_lines.append(line) |
| | | ax.legend(unique_lines, unique_labels, loc='best') |
| | | else: |
| | | ax.legend(loc='best') |
| | | except Exception as e: |
| | | print(f"å建å¾ä¾æ¶åºé: {e}") |
| | | ax.legend(loc='best') |
| | | else: |
| | | ax.legend(loc='best') |
| | | |
| | | fig.tight_layout() |
| | | |
| | | # 强å¶éç» |
| | | plt.close(fig) |
| | | fig.canvas.draw() |
| | | fig.canvas.flush_events() |
| | | plt.draw() |
| | | |
| | | # ä¿åé»è®¤è§å¾èå´ |
| | | current_view['xlim'] = ax.get_xlim() |
| | | current_view['ylim'] = ax.get_ylim() |
| | | if ax2 is not None: |
| | | current_view['ylim2'] = ax2.get_ylim() |
| | | |
| | | # åå§åæå¨å±æ§ |
| | | ax._pan_start = None |
| | | |
| | | # éæ°è¿æ¥ææäºä»¶ |
| | | event_cids.append(canvas.mpl_connect('resize_event', on_resize)) |
| | | event_cids.append(canvas.mpl_connect('scroll_event', on_scroll)) |
| | | event_cids.append(canvas.mpl_connect('button_press_event', on_press)) |
| | | event_cids.append(canvas.mpl_connect('button_release_event', on_release)) |
| | | event_cids.append(canvas.mpl_connect('motion_notify_event', on_motion)) |
| | | |
| | | # æ´æ°é¢æµç»æææ¬ |
| | | predict_time = time() - predict_start |
| | | status_label.config(text=f"éå½é¢æµå®æ (èæ¶: {predict_time:.2f}ç§)") |
| | | |
| | | # æ¾ç¤ºé¢æµç»æ |
| | | result_text = "éå½åæ¥é¢æµç»æ:\n\n" |
| | | |
| | | # 妿æå®é
å¼ï¼è®¡ç®å·®å¼åç¾åæ¯è¯¯å·® |
| | | if actual_values is not None: |
| | | result_text += "æ¥æ 颿µå¼ å®é
å¼ å·®å¼\n" |
| | | result_text += "--------------------------------------\n" |
| | | for i, (date, pred, actual) in enumerate(zip(future_dates, predictions, actual_values)): |
| | | if actual is not None: # åªå¨æå®é
弿¶æ¾ç¤ºå·®å¼ |
| | | diff = pred - actual |
| | | result_text += f"{date.strftime('%Y-%m-%d')} {pred:6.2f} {actual:6.2f} {diff:6.2f}\n" |
| | | else: |
| | | result_text += f"{date.strftime('%Y-%m-%d')} {pred:6.2f} -- --\n" |
| | | else: |
| | | result_text += "æ¥æ 颿µå¼\n" |
| | | result_text += "-------------------\n" |
| | | for i, (date, pred) in enumerate(zip(future_dates, predictions)): |
| | | result_text += f"{date.strftime('%Y-%m-%d')} {pred:6.2f}\n" |
| | | result_text += "\næ å®é
å¼è¿è¡å¯¹æ¯" |
| | | |
| | | update_result_text(result_text) |
| | | except Exception as e: |
| | | status_label.config(text=f"é误: {str(e)}") |
| | | import traceback |
| | | traceback.print_exc() |
| | | |
| | | def reset_view(): |
| | | # è·åå½åæ´»å¨çè½´ |
| | | current_ax2 = None |
| | | for ax_in_fig in fig.get_axes(): |
| | | if ax_in_fig != ax and hasattr(ax_in_fig, 'get_shared_x_axes'): |
| | | current_ax2 = ax_in_fig |
| | | break |
| | | |
| | | # æ¢å¤ä¿åçè§å¾èå´ |
| | | if current_view['xlim'] is not None: |
| | | ax.set_xlim(current_view['xlim']) |
| | | ax.set_ylim(current_view['ylim']) |
| | | if current_ax2 is not None and current_view['ylim2'] is not None: |
| | | current_ax2.set_ylim(current_view['ylim2']) |
| | | |
| | | # åºç¨ç´§åå¸å±å¹¶éç» |
| | | fig.tight_layout() |
| | | canvas.draw_idle() |
| | | status_label.config(text="å¾è¡¨è§å¾å·²éç½®") |
| | | |
| | | def on_scroll(event): |
| | | # è·åå½åæ´»å¨çè½´ |
| | | current_ax2 = None |
| | | for ax_in_fig in fig.get_axes(): |
| | | if ax_in_fig != ax and hasattr(ax_in_fig, 'get_shared_x_axes'): |
| | | current_ax2 = ax_in_fig |
| | | break |
| | | |
| | | # æ£æ¥é¼ æ æ¯å¦å¨ä»»ä¸è½´åºåå
|
| | | if event.inaxes != ax and (current_ax2 is None or event.inaxes != current_ax2): |
| | | return |
| | | |
| | | xlim = ax.get_xlim() |
| | | ylim = ax.get_ylim() |
| | | zoom_factor = 1.1 |
| | | x_data = event.xdata if event.xdata is not None else (xlim[0]+xlim[1])/2 |
| | | y_data = event.ydata if event.ydata is not None else (ylim[0]+ylim[1])/2 |
| | | x_rel = (x_data - xlim[0]) / (xlim[1] - xlim[0]) |
| | | y_rel = (y_data - ylim[0]) / (ylim[1] - ylim[0]) |
| | | |
| | | if event.step > 0: |
| | | new_width = (xlim[1]-xlim[0]) / zoom_factor |
| | | new_height = (ylim[1]-ylim[0]) / zoom_factor |
| | | x0 = x_data - x_rel * new_width |
| | | y0 = y_data - y_rel * new_height |
| | | ax.set_xlim([x0, x0+new_width]) |
| | | ax.set_ylim([y0, y0+new_height]) |
| | | |
| | | # 妿æç¬¬äºä¸ªè½´ï¼åæ¥æ´æ° |
| | | if current_ax2 is not None: |
| | | ylim2 = current_ax2.get_ylim() |
| | | # 计ç®ç¬¬äºä¸ªè½´çç¼©æ¾æ¯ä¾ |
| | | y_scale2 = (ylim2[1] - ylim2[0]) / (ylim[1] - ylim[0]) |
| | | # 计ç®ç¬¬äºä¸ªè½´çæ°é«åº¦ |
| | | new_height2 = new_height * y_scale2 |
| | | # 计ç®ç¬¬äºä¸ªè½´çæ°y0 |
| | | y02 = ylim2[0] + (y0 - ylim[0]) * y_scale2 |
| | | current_ax2.set_xlim([x0, x0+new_width]) |
| | | current_ax2.set_ylim([y02, y02+new_height2]) |
| | | else: |
| | | new_width = (xlim[1]-xlim[0]) * zoom_factor |
| | | new_height = (ylim[1]-ylim[0]) * zoom_factor |
| | | x0 = x_data - x_rel * new_width |
| | | y0 = y_data - y_rel * new_height |
| | | ax.set_xlim([x0, x0+new_width]) |
| | | ax.set_ylim([y0, y0+new_height]) |
| | | |
| | | # 妿æç¬¬äºä¸ªè½´ï¼åæ¥æ´æ° |
| | | if current_ax2 is not None: |
| | | ylim2 = current_ax2.get_ylim() |
| | | # 计ç®ç¬¬äºä¸ªè½´çç¼©æ¾æ¯ä¾ |
| | | y_scale2 = (ylim2[1] - ylim2[0]) / (ylim[1] - ylim[0]) |
| | | # 计ç®ç¬¬äºä¸ªè½´çæ°é«åº¦ |
| | | new_height2 = new_height * y_scale2 |
| | | # 计ç®ç¬¬äºä¸ªè½´çæ°y0 |
| | | y02 = ylim2[0] + (y0 - ylim[0]) * y_scale2 |
| | | current_ax2.set_xlim([x0, x0+new_width]) |
| | | current_ax2.set_ylim([y02, y02+new_height2]) |
| | | |
| | | canvas.draw_idle() |
| | | |
| | | def on_motion(event): |
| | | if not hasattr(ax, '_pan_start') or ax._pan_start is None: |
| | | # è·åå½åæ´»å¨çè½´ |
| | | current_ax2 = None |
| | | for ax_in_fig in fig.get_axes(): |
| | | if ax_in_fig != ax and hasattr(ax_in_fig, 'get_shared_x_axes'): |
| | | current_ax2 = ax_in_fig |
| | | break |
| | | |
| | | # æ£æ¥é¼ æ æ¯å¦å¨ä»»ä¸è½´åºåå
|
| | | if event.inaxes == ax or (current_ax2 is not None and event.inaxes == current_ax2): |
| | | canvas.get_tk_widget().config(cursor="fleur") |
| | | else: |
| | | canvas.get_tk_widget().config(cursor="") |
| | | return |
| | | |
| | | # è·åå½åæ´»å¨çè½´ |
| | | current_ax2 = None |
| | | for ax_in_fig in fig.get_axes(): |
| | | if ax_in_fig != ax and hasattr(ax_in_fig, 'get_shared_x_axes'): |
| | | current_ax2 = ax_in_fig |
| | | break |
| | | |
| | | # æ£æ¥é¼ æ æ¯å¦å¨ä»»ä¸è½´åºåå
|
| | | if event.inaxes != ax and (current_ax2 is None or event.inaxes != current_ax2): |
| | | return |
| | | |
| | | start_x, start_y, x_data, y_data = ax._pan_start |
| | | dx = event.x - start_x |
| | | dy = event.y - start_y |
| | | |
| | | # è·åå½åè§å¾ |
| | | xlim = ax.get_xlim() |
| | | ylim = ax.get_ylim() |
| | | |
| | | # 计ç®å¾è¡¨åæ ç³»ä¸çç§»å¨ |
| | | x_scale = (xlim[1] - xlim[0]) / canvas.get_tk_widget().winfo_width() |
| | | y_scale = (ylim[1] - ylim[0]) / canvas.get_tk_widget().winfo_height() |
| | | |
| | | # æ´æ°è§å¾ |
| | | new_xlim = (xlim[0] - dx * x_scale, xlim[1] - dx * x_scale) |
| | | new_ylim = (ylim[0] + dy * y_scale, ylim[1] + dy * y_scale) |
| | | |
| | | ax.set_xlim(new_xlim) |
| | | ax.set_ylim(new_ylim) |
| | | |
| | | # 妿æç¬¬äºä¸ªè½´ï¼åæ¥æ´æ° |
| | | if current_ax2 is not None: |
| | | # è·å第äºä¸ªè½´çå½åèå´ |
| | | ylim2 = current_ax2.get_ylim() |
| | | # 计ç®ç¬¬äºä¸ªè½´çç§»å¨æ¯ä¾ |
| | | y_scale2 = (ylim2[1] - ylim2[0]) / (ylim[1] - ylim[0]) |
| | | # æ´æ°ç¬¬äºä¸ªè½´çèå´ |
| | | new_ylim2 = (ylim2[0] + dy * y_scale * y_scale2, ylim2[1] + dy * y_scale * y_scale2) |
| | | current_ax2.set_xlim(new_xlim) |
| | | current_ax2.set_ylim(new_ylim2) |
| | | |
| | | # æ´æ°æå¨èµ·ç¹ |
| | | ax._pan_start = (event.x, event.y, event.xdata, event.ydata) |
| | | |
| | | canvas.draw_idle() |
| | | |
| | | root = tk.Tk() |
| | | root.title("é龿¸¯-éè¡çåº¦é¢æµç³»ç»") |
| | | try: |
| | | configure_gui_fonts() |
| | | except Exception as e: |
| | | print("åä½é
ç½®å¼å¸¸:", e) |
| | | |
| | | # æ¢å¤è¾å
¥æ¡åæ§å¶æé® |
| | | input_frame = ttk.Frame(root, padding="10") |
| | | input_frame.pack(fill=tk.X) |
| | | |
| | | ttk.Label(input_frame, text="è¾å
¥å¼å§æ¶é´ (YYYY-MM-DD HH:MM:SS)").pack(side=tk.LEFT) |
| | | entry = ttk.Entry(input_frame, width=25) |
| | | entry.pack(side=tk.LEFT, padx=5) |
| | | predict_button = ttk.Button(input_frame, text="颿µ", command=on_predict) |
| | | predict_button.pack(side=tk.LEFT) |
| | | status_label = ttk.Label(input_frame, text="æç¤º: ç¬¬ä¸æ¬¡è¿è¡è¯·å¾é'强å¶éæ°è®ç»æ¨¡å'") |
| | | status_label.pack(side=tk.LEFT, padx=10) |
| | | |
| | | control_frame = ttk.Frame(root, padding="5") |
| | | control_frame.pack(fill=tk.X) |
| | | retrain_var = tk.BooleanVar(value=False) |
| | | ttk.Checkbutton(control_frame, text="强å¶éæ°è®ç»æ¨¡å", variable=retrain_var).pack(side=tk.LEFT) |
| | | |
| | | # æ´æ°å¾ä¾è¯´æï¼å å
¥æ°´ä½æ°æ®ä¿¡æ¯ |
| | | if 'water_level' in df.columns: |
| | | legend_label = ttk.Label(control_frame, text="å¾ä¾: ç´«è²=é龿¸¯ä¸æ¸¸æ°æ®, èè²=ä¸åæ°´ä¸æ¸¸æ°æ®, 红è²=颿µå¼, 绿è²=é¿æ±æ°´ä½") |
| | | else: |
| | | legend_label = ttk.Label(control_frame, text="å¾ä¾: ç´«è²=é龿¸¯ä¸æ¸¸æ°æ®, èè²=ä¸åæ°´ä¸æ¸¸æ°æ®, 红è²=颿µå¼, æ©è²=å®é
å¼") |
| | | legend_label.pack(side=tk.LEFT, padx=10) |
| | | reset_button = ttk.Button(control_frame, text="éç½®è§å¾", command=reset_view) |
| | | reset_button.pack(side=tk.LEFT, padx=5) |
| | | |
| | | # æ·»å æ¾ç¤ºæ¨¡ååç¡®åº¦çæ ç¾ |
| | | metrics_frame = ttk.Frame(root, padding="5") |
| | | metrics_frame.pack(fill=tk.X) |
| | | model_metrics = get_model_metrics() |
| | | metrics_text = "模åå确度: æªç¥" if not model_metrics else f"模åå确度 - RMSE: {model_metrics['rmse']:.4f}, MAE: {model_metrics['mae']:.4f}" |
| | | metrics_label = ttk.Label(metrics_frame, text=metrics_text) |
| | | metrics_label.pack(side=tk.LEFT, padx=10) |
| | | |
| | | # ç»ææ¾ç¤ºåºå |
| | | result_frame = ttk.Frame(root, padding="10") |
| | | result_frame.pack(fill=tk.BOTH, expand=True) |
| | | |
| | | # 左侧æ¾ç½®å¾è¡¨ |
| | | plot_frame = ttk.Frame(result_frame, width=800, height=600) |
| | | plot_frame.pack(side=tk.LEFT, fill=tk.BOTH, expand=True) |
| | | plot_frame.pack_propagate(False) # ä¸å
è®¸æ¡æ¶æ ¹æ®å
å®¹è°æ´å¤§å° |
| | | |
| | | # å³ä¾§æ¾ç½®ææ¬ç»æ |
| | | text_frame = ttk.Frame(result_frame) |
| | | text_frame.pack(side=tk.RIGHT, fill=tk.Y) |
| | | |
| | | # 使ç¨ç宽å使¾ç¤ºç»æ |
| | | result_font = tkfont.Font(family="Courier New", size=10, weight="normal") |
| | | |
| | | # æ·»å ææ¬æ¡åæ»å¨æ¡ |
| | | result_text = tk.Text(text_frame, width=50, height=25, font=result_font, wrap=tk.NONE) |
| | | result_text.pack(side=tk.LEFT, fill=tk.BOTH) |
| | | result_scroll = ttk.Scrollbar(text_frame, orient="vertical", command=result_text.yview) |
| | | result_scroll.pack(side=tk.RIGHT, fill=tk.Y) |
| | | result_text.configure(yscrollcommand=result_scroll.set) |
| | | result_text.configure(state=tk.DISABLED) # åå§è®¾ä¸ºåªè¯» |
| | | |
| | | # æ´æ°ç»æææ¬ç彿° |
| | | def update_result_text(text): |
| | | result_text.configure(state=tk.NORMAL) |
| | | result_text.delete(1.0, tk.END) |
| | | result_text.insert(tk.END, text) |
| | | result_text.configure(state=tk.DISABLED) |
| | | |
| | | # å建æ´é«DPIçå¾å½¢ä»¥è·å¾æ´å¥½çæ¾ç¤ºè´¨é |
| | | fig, ax = plt.subplots(figsize=(10, 6), dpi=100) |
| | | fig.tight_layout(pad=3.0) # å¢å å
è¾¹è·ï¼é²æ¢æ ç¾è¢«æªæ |
| | | |
| | | # å建ç»å¸å¹¶æ·»å å°åºå®å¤§å°çæ¡æ¶ |
| | | canvas = FigureCanvasTkAgg(fig, master=plot_frame) |
| | | canvas.get_tk_widget().pack(side=tk.TOP, fill=tk.BOTH, expand=True) |
| | | |
| | | # æ·»å å·¥å
·æ ï¼å
å«ç¼©æ¾ãä¿åçåè½ |
| | | toolbar_frame = ttk.Frame(plot_frame) |
| | | toolbar_frame.pack(side=tk.BOTTOM, fill=tk.X) |
| | | toolbar = NavigationToolbar2Tk(canvas, toolbar_frame) |
| | | toolbar.update() |
| | | |
| | | # å¯ç¨ç´§åå¸å±ï¼å¹¶è®¾ç½®èªå¨è°æ´ä»¥ä½¿å¾è¡¨å®å
¨æ¾ç¤º |
| | | def on_resize(event): |
| | | fig.tight_layout() |
| | | canvas.draw_idle() |
| | | |
| | | # æ·»å å¾è¡¨äº¤äºåè½ |
| | | canvas.mpl_connect('resize_event', on_resize) |
| | | |
| | | # æ·»å é¼ æ æå¨åè½ |
| | | def on_press(event): |
| | | # è·åå½åæ´»å¨çè½´ |
| | | current_ax2 = None |
| | | for ax_in_fig in fig.get_axes(): |
| | | if ax_in_fig != ax and hasattr(ax_in_fig, 'get_shared_x_axes'): |
| | | current_ax2 = ax_in_fig |
| | | break |
| | | |
| | | # æ£æ¥é¼ æ æ¯å¦å¨ä»»ä¸è½´åºåå
|
| | | if event.inaxes == ax or (current_ax2 is not None and event.inaxes == current_ax2): |
| | | canvas.get_tk_widget().config(cursor="fleur") |
| | | ax._pan_start = (event.x, event.y, event.xdata, event.ydata) |
| | | else: |
| | | ax._pan_start = None |
| | | |
| | | def on_release(event): |
| | | ax._pan_start = None |
| | | canvas.get_tk_widget().config(cursor="") |
| | | canvas.draw_idle() |
| | | |
| | | def on_motion(event): |
| | | if not hasattr(ax, '_pan_start') or ax._pan_start is None: |
| | | # è·åå½åæ´»å¨çè½´ |
| | | current_ax2 = None |
| | | for ax_in_fig in fig.get_axes(): |
| | | if ax_in_fig != ax and hasattr(ax_in_fig, 'get_shared_x_axes'): |
| | | current_ax2 = ax_in_fig |
| | | break |
| | | |
| | | # æ£æ¥é¼ æ æ¯å¦å¨ä»»ä¸è½´åºåå
|
| | | if event.inaxes == ax or (current_ax2 is not None and event.inaxes == current_ax2): |
| | | canvas.get_tk_widget().config(cursor="fleur") |
| | | else: |
| | | canvas.get_tk_widget().config(cursor="") |
| | | return |
| | | |
| | | # è·åå½åæ´»å¨çè½´ |
| | | current_ax2 = None |
| | | for ax_in_fig in fig.get_axes(): |
| | | if ax_in_fig != ax and hasattr(ax_in_fig, 'get_shared_x_axes'): |
| | | current_ax2 = ax_in_fig |
| | | break |
| | | |
| | | # æ£æ¥é¼ æ æ¯å¦å¨ä»»ä¸è½´åºåå
|
| | | if event.inaxes != ax and (current_ax2 is None or event.inaxes != current_ax2): |
| | | return |
| | | |
| | | start_x, start_y, x_data, y_data = ax._pan_start |
| | | dx = event.x - start_x |
| | | dy = event.y - start_y |
| | | |
| | | # è·åå½åè§å¾ |
| | | xlim = ax.get_xlim() |
| | | ylim = ax.get_ylim() |
| | | |
| | | # 计ç®å¾è¡¨åæ ç³»ä¸çç§»å¨ |
| | | x_scale = (xlim[1] - xlim[0]) / canvas.get_tk_widget().winfo_width() |
| | | y_scale = (ylim[1] - ylim[0]) / canvas.get_tk_widget().winfo_height() |
| | | |
| | | # æ´æ°è§å¾ |
| | | new_xlim = (xlim[0] - dx * x_scale, xlim[1] - dx * x_scale) |
| | | new_ylim = (ylim[0] + dy * y_scale, ylim[1] + dy * y_scale) |
| | | |
| | | ax.set_xlim(new_xlim) |
| | | ax.set_ylim(new_ylim) |
| | | |
| | | # 妿æç¬¬äºä¸ªè½´ï¼åæ¥æ´æ° |
| | | if current_ax2 is not None: |
| | | # è·å第äºä¸ªè½´çå½åèå´ |
| | | ylim2 = current_ax2.get_ylim() |
| | | # 计ç®ç¬¬äºä¸ªè½´çç§»å¨æ¯ä¾ |
| | | y_scale2 = (ylim2[1] - ylim2[0]) / (ylim[1] - ylim[0]) |
| | | # æ´æ°ç¬¬äºä¸ªè½´çèå´ |
| | | new_ylim2 = (ylim2[0] + dy * y_scale * y_scale2, ylim2[1] + dy * y_scale * y_scale2) |
| | | current_ax2.set_xlim(new_xlim) |
| | | current_ax2.set_ylim(new_ylim2) |
| | | |
| | | # æ´æ°æå¨èµ·ç¹ |
| | | ax._pan_start = (event.x, event.y, event.xdata, event.ydata) |
| | | |
| | | canvas.draw_idle() |
| | | |
| | | # è¿æ¥é¼ æ äºä»¶ |
| | | canvas.mpl_connect('button_press_event', on_press) |
| | | canvas.mpl_connect('button_release_event', on_release) |
| | | canvas.mpl_connect('motion_notify_event', on_motion) |
| | | |
| | | # åå§åå¾è¡¨æ¾ç¤º |
| | | def init_plot(): |
| | | try: |
| | | ax.clear() |
| | | ax.set_xlabel('æ¥æ') |
| | | ax.set_ylabel('ç度') |
| | | ax.set_title('çåº¦é¢æµ') |
| | | fig.tight_layout() |
| | | canvas.draw() |
| | | except Exception as e: |
| | | status_label.config(text=f"åå§åå¾è¡¨æ¶åºé: {str(e)}") |
| | | |
| | | init_plot() |
| | | root.mainloop() |
| | | |
| | | |
| | | |
| | | |
| | | |
| | | def resample_to_hourly(df): |
| | | """ |
| | | æ°æ®ä¿®æ¹ï¼è®¡ç®æ¯å°æ¶çå¹³åå¼ |
| | | """ |
| | | try: |
| | | # ç¡®ä¿DateTimeæ¯ç´¢å¼ |
| | | if 'DateTime' in df.columns: |
| | | df = df.set_index('DateTime') |
| | | |
| | | # è·åæææ°å¼å |
| | | numeric_columns = df.select_dtypes(include=[np.number]).columns.tolist() |
| | | |
| | | # æå°æ¶ééæ ·ï¼è®¡ç®å¹³åå¼ |
| | | hourly_df = df[numeric_columns].resample('H').mean() |
| | | |
| | | # é置索å¼ï¼å°DateTimeä½ä¸ºå |
| | | hourly_df = hourly_df.reset_index() |
| | | |
| | | print(f"æ°æ®å·²ä»åé级ééæ ·ä¸ºå°æ¶çº§ï¼åå§æ°æ®è¡æ°: {len(df)}ï¼ééæ ·åè¡æ°: {len(hourly_df)}") |
| | | return hourly_df |
| | | |
| | | except Exception as e: |
| | | print(f"ééæ ·æ°æ®å¼å¸¸: {e}") |
| | | return df |
| | | |
| | | |
| | | |
| | | # ------------------------------- |
| | | # 主ç¨åºå
¥å£ï¼å è½½æ°æ®ãæ·»å ç¹å¾ãçæå»¶è¿ç¹å¾åå¯å¨GUI |
| | | # ------------------------------- |
| | | def save_processed_data(df, filename='processed_data.pkl'): |
| | | try: |
| | | df.to_pickle(filename) |
| | | print(f"å·²ä¿åå¤çåçæ°æ®å° {filename}") |
| | | return True |
| | | except Exception as e: |
| | | print(f"ä¿åæ°æ®å¤±è´¥: {e}") |
| | | return False |
| | | |
| | | def load_processed_data(filename='processed_data.pkl'): |
| | | try: |
| | | if os.path.exists(filename): |
| | | df = pd.read_pickle(filename) |
| | | print(f"å·²ä» {filename} å è½½å¤çåçæ°æ®") |
| | | return df |
| | | else: |
| | | print(f"æ¾ä¸å°å¤çåçæ°æ®æä»¶ {filename}") |
| | | return None |
| | | except Exception as e: |
| | | print(f"å è½½æ°æ®å¤±è´¥: {e}") |
| | | return None |
| | | |
| | | # # å 餿§çå¤çæ°æ®ï¼å¦æåå¨ï¼ï¼ä»¥åºç¨ä¿®å¤åç代ç |
| | | # if os.path.exists('processed_data.pkl'): |
| | | # try: |
| | | # os.remove('processed_data.pkl') |
| | | # print("å·²å 餿§çå¤çæ°æ®ç¼åï¼å°ä½¿ç¨ä¿®å¤åç代ç éæ°å¤çæ°æ®") |
| | | # except Exception as e: |
| | | # print(f"å é¤ç¼åæä»¶å¤±è´¥: {e}") |
| | | |
| | | # # å 餿§ç模åæä»¶ï¼å¦æåå¨ï¼ |
| | | # if os.path.exists('salinity_model.pkl'): |
| | | # try: |
| | | # os.remove('salinity_model.pkl') |
| | | # print("å·²å 餿§ç模åæä»¶ï¼å°éæ°è®ç»æ¨¡å") |
| | | # except Exception as e: |
| | | # print(f"å 餿¨¡åæä»¶å¤±è´¥: {e}") |
| | | |
| | | # å°è¯å è½½å¤çåçæ°æ®ï¼å¦æä¸åå¨åéæ°å¤ç |
| | | processed_data = load_processed_data() |
| | | if processed_data is not None: |
| | | df = processed_data |
| | | else: |
| | | # æ·»å é¿æ±æ¶²ä½æ°æ®ä½ä¸ºåæ° |
| | | df = load_data('é龿¸¯1.csv', 'ä¸åæ°´.csv', 'é¿æ±æ¶²ä½.csv', '大鿵é.csv') |
| | | if df is not None: |
| | | # æ·»å æ¶é´ç¹å¾ |
| | | df['hour'] = df['DateTime'].dt.hour |
| | | df['weekday'] = df['DateTime'].dt.dayofweek |
| | | df['month'] = df['DateTime'].dt.month |
| | | |
| | | # æ·»å ååç¹å¾ |
| | | df = add_lunar_features(df) |
| | | |
| | | # æ·»å å»¶è¿ç¹å¾ä¸æ¸¸å°ä¸æ¸¸3-5å¤©ï¼ææ¶æ¯12å°æ¶ä¸ºä¸ä¸ªèç¹ï¼æ ¹æ®ææåç»åè°æ´ |
| | | # delay_hours = [1,2,3,4,6,12,24,36,48,60,72,84,96,108,120] |
| | | delay_hours = [72,84,96,108,120] |
| | | df = batch_create_delay_features(df, delay_hours) |
| | | |
| | | # æ·»å ç»è®¡ç¹å¾ |
| | | df['mean_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).mean() |
| | | df['mean_3d_up'] = df['upstream_smooth'].rolling(window=72, min_periods=1).mean() |
| | | df['std_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).std() |
| | | df['max_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).max() |
| | | df['min_1d_up'] = df['upstream_smooth'].rolling(window=24, min_periods=1).min() |
| | | |
| | | df['mean_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).mean() |
| | | df['mean_3d_down'] = df['downstream_smooth'].rolling(window=72, min_periods=1).mean() |
| | | df['std_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).std() |
| | | df['max_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).max() |
| | | df['min_1d_down'] = df['downstream_smooth'].rolling(window=24, min_periods=1).min() |
| | | |
| | | # æ·»å æ°´ä½ç»è®¡ç¹å¾ï¼å¦ææ°´ä½æ°æ®åå¨ï¼ |
| | | if 'water_level' in df.columns: |
| | | # é¦å
å建水ä½å¹³æ»ç¹å¾ |
| | | if 'water_level_smooth' not in df.columns: |
| | | df['water_level_smooth'] = df['water_level'].rolling(window=24, min_periods=1, center=True).mean() |
| | | df['water_level_smooth'] = df['water_level_smooth'].fillna(df['water_level']) |
| | | |
| | | # æ·»å æ°´ä½ç»è®¡ç¹å¾ |
| | | df['mean_1d_water_level'] = df['water_level_smooth'].rolling(window=24, min_periods=1).mean() |
| | | df['mean_3d_water_level'] = df['water_level_smooth'].rolling(window=72, min_periods=1).mean() |
| | | df['std_1d_water_level'] = df['water_level_smooth'].rolling(window=24, min_periods=1).std() |
| | | df['max_1d_water_level'] = df['water_level_smooth'].rolling(window=24, min_periods=1).max() |
| | | df['min_1d_water_level'] = df['water_level_smooth'].rolling(window=24, min_periods=1).min() |
| | | |
| | | # è®¡ç®æ°´ä½ååç |
| | | df['water_level_change_1h'] = df['water_level_smooth'].diff() |
| | | df['water_level_change_24h'] = df['water_level_smooth'].diff(24) |
| | | |
| | | # è®¡ç®æ°´ä½ä¸ç度çç¸å
³ç¹å¾ |
| | | df['water_level_sal_ratio'] = df['water_level_smooth'] / df['downstream_smooth'] |
| | | |
| | | print("æ°´ä½ç¹å¾å·²æ·»å ") |
| | | |
| | | # æ·»å å
¶ä»ç¹å¾ |
| | | df = generate_features(df) |
| | | |
| | | # å°æ°æ®ééæ ·ä¸ºå°æ¶çº§ |
| | | df = resample_to_hourly(df) |
| | | |
| | | # ä¿åå¤çåçæ°æ® |
| | | df.to_csv('merged_data_hour.csv', index=False) |
| | | print(f"Merged data saved to 'merged_data_hour.csv' successfully") |
| | | save_processed_data(df) |
| | | |
| | | if df is not None: |
| | | run_gui() |
| | | else: |
| | | print("æ°æ®å è½½å¤±è´¥ï¼æ æ³è¿è¡é¢æµã") |