import pandas as pd
|
import numpy as np
|
import tkinter as tk
|
from tkinter import ttk
|
from datetime import timedelta
|
import matplotlib.pyplot as plt
|
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg, NavigationToolbar2Tk
|
from sklearn.model_selection import train_test_split
|
from sklearn.preprocessing import MinMaxScaler
|
import tensorflow as tf
|
from tensorflow.keras.models import Sequential, load_model, save_model
|
from tensorflow.keras.layers import LSTM, Dense, Dropout
|
from tensorflow.keras.callbacks import EarlyStopping
|
import pickle
|
import os
|
from time import time
|
import matplotlib
|
|
# 配置matplotlib中文显示
|
# 设置中文字体,使用系统提供的字体
|
matplotlib.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'SimSun', 'Arial Unicode MS'] # 优先使用的中文字体
|
matplotlib.rcParams['axes.unicode_minus'] = False # 解决负号显示问题
|
matplotlib.rcParams['font.family'] = 'sans-serif' # 使用无衬线字体
|
|
# 缓存变量
|
cached_model = None
|
last_training_time = None
|
feature_columns = None
|
feature_scaler = None
|
target_scaler = None
|
|
# 数据加载函数
|
def load_data(upstream_file, downstream_file, qinglong_lake_file=None):
|
try:
|
# 使用逗号作为分隔符读取数据
|
upstream_df = pd.read_csv(upstream_file)
|
downstream_df = pd.read_csv(downstream_file)
|
if qinglong_lake_file:
|
qinglong_lake_df = pd.read_csv(qinglong_lake_file)
|
except FileNotFoundError:
|
print("文件未找到,请检查文件路径")
|
return None
|
|
# 假设列名被读取为 'DateTime,TagName,Value',我们需要分割
|
upstream_df.columns = ['DateTime', 'TagName', 'Value']
|
downstream_df.columns = ['DateTime', 'TagName', 'Value']
|
if qinglong_lake_file:
|
qinglong_lake_df.columns = ['DateTime', 'TagName', 'Value']
|
|
# 将 'DateTime' 列转换为日期格式
|
upstream_df['DateTime'] = pd.to_datetime(upstream_df['DateTime'])
|
downstream_df['DateTime'] = pd.to_datetime(downstream_df['DateTime'])
|
if qinglong_lake_file:
|
qinglong_lake_df['DateTime'] = pd.to_datetime(qinglong_lake_df['DateTime'])
|
|
# 检测并处理异常值 (先转换为数值型)
|
upstream_df['Value'] = pd.to_numeric(upstream_df['Value'], errors='coerce')
|
downstream_df['Value'] = pd.to_numeric(downstream_df['Value'], errors='coerce')
|
if qinglong_lake_file:
|
qinglong_lake_df['Value'] = pd.to_numeric(qinglong_lake_df['Value'], errors='coerce')
|
|
# 过滤掉盐度值小于5的数据
|
upstream_df = upstream_df[upstream_df['Value'] >= 5]
|
downstream_df = downstream_df[downstream_df['Value'] >= 5]
|
if qinglong_lake_file:
|
qinglong_lake_df = qinglong_lake_df[qinglong_lake_df['Value'] >= 5]
|
|
# 将0值替换为NaN,以便后续进行插值处理
|
upstream_df.loc[upstream_df['Value'] == 0, 'Value'] = np.nan
|
downstream_df.loc[downstream_df['Value'] == 0, 'Value'] = np.nan
|
if qinglong_lake_file:
|
qinglong_lake_df.loc[qinglong_lake_df['Value'] == 0, 'Value'] = np.nan
|
|
# 使用基本统计方法识别并替换异常值 (3倍标准差法)
|
for df in [upstream_df, downstream_df]:
|
mean = df['Value'].mean()
|
std = df['Value'].std()
|
lower_bound = mean - 3 * std
|
upper_bound = mean + 3 * std
|
# 将超出范围的值替换为NaN
|
df.loc[(df['Value'] < lower_bound) | (df['Value'] > upper_bound), 'Value'] = np.nan
|
|
if qinglong_lake_file:
|
mean = qinglong_lake_df['Value'].mean()
|
std = qinglong_lake_df['Value'].std()
|
lower_bound = mean - 3 * std
|
upper_bound = mean + 3 * std
|
qinglong_lake_df.loc[(qinglong_lake_df['Value'] < lower_bound) | (qinglong_lake_df['Value'] > upper_bound), 'Value'] = np.nan
|
|
# 重命名 'Value' 为 'upstream' 和 'downstream'
|
upstream_df = upstream_df.rename(columns={'Value': 'upstream'})[['DateTime', 'upstream']]
|
downstream_df = downstream_df.rename(columns={'Value': 'downstream'})[['DateTime', 'downstream']]
|
if qinglong_lake_file:
|
qinglong_lake_df = qinglong_lake_df.rename(columns={'Value': 'qinglong_lake'})[['DateTime', 'qinglong_lake']]
|
|
# 合并数据
|
merged_df = pd.merge(upstream_df, downstream_df, on='DateTime', how='inner')
|
if qinglong_lake_file:
|
merged_df = pd.merge(merged_df, qinglong_lake_df, on='DateTime', how='left')
|
|
# 处理 NaN 和无效值
|
print(f"合并前数据行数: {len(merged_df)}")
|
|
# 设置DateTime为索引以允许时间插值
|
merged_df = merged_df.set_index('DateTime')
|
|
# 使用多种插值方法处理NaN值
|
# 1. 首先使用线性插值填充短时间的NaN
|
merged_df['upstream'] = merged_df['upstream'].interpolate(method='linear', limit=4)
|
merged_df['downstream'] = merged_df['downstream'].interpolate(method='linear', limit=4)
|
if qinglong_lake_file:
|
merged_df['qinglong_lake'] = merged_df['qinglong_lake'].interpolate(method='linear', limit=4)
|
|
# 2. 对于较长时间的NaN,使用时间加权插值
|
merged_df['upstream'] = merged_df['upstream'].interpolate(method='time', limit=24)
|
merged_df['downstream'] = merged_df['downstream'].interpolate(method='time', limit=24)
|
if qinglong_lake_file:
|
merged_df['qinglong_lake'] = merged_df['qinglong_lake'].interpolate(method='time', limit=24)
|
|
# 3. 对于仍然存在的NaN,使用前向填充和后向填充
|
merged_df['upstream'] = merged_df['upstream'].fillna(method='ffill').fillna(method='bfill')
|
merged_df['downstream'] = merged_df['downstream'].fillna(method='ffill').fillna(method='bfill')
|
if qinglong_lake_file:
|
merged_df['qinglong_lake'] = merged_df['qinglong_lake'].fillna(method='ffill').fillna(method='bfill')
|
|
# 4. 添加平滑处理
|
# 使用移动平均进行平滑处理
|
merged_df['upstream_smooth'] = merged_df['upstream'].rolling(window=24, min_periods=1, center=True).mean()
|
merged_df['downstream_smooth'] = merged_df['downstream'].rolling(window=24, min_periods=1, center=True).mean()
|
if qinglong_lake_file:
|
merged_df['qinglong_lake_smooth'] = merged_df['qinglong_lake'].rolling(window=24, min_periods=1, center=True).mean()
|
|
# 对青龙港数据中盐度值低于50的部分进行额外平滑处理
|
low_salinity_mask = merged_df['upstream'] < 50
|
if low_salinity_mask.any():
|
# 对低盐度部分使用更大的平滑窗口
|
merged_df.loc[low_salinity_mask, 'upstream_smooth'] = merged_df.loc[low_salinity_mask, 'upstream'].rolling(
|
window=48, min_periods=1, center=True).mean()
|
|
# 删除剩余的NaN和无穷大值
|
merged_df = merged_df.dropna()
|
merged_df = merged_df[merged_df['upstream'].apply(lambda x: np.isfinite(x))]
|
merged_df = merged_df[merged_df['downstream'].apply(lambda x: np.isfinite(x))]
|
if qinglong_lake_file:
|
merged_df = merged_df[merged_df['qinglong_lake'].apply(lambda x: np.isfinite(x))]
|
|
# 重置索引,将DateTime重新作为列
|
merged_df = merged_df.reset_index()
|
|
# 最终检查数据
|
print(f"清洗后数据行数: {len(merged_df)}")
|
print(f"上游盐度范围: {merged_df['upstream'].min()} - {merged_df['upstream'].max()}")
|
print(f"下游盐度范围: {merged_df['downstream'].min()} - {merged_df['downstream'].max()}")
|
if qinglong_lake_file:
|
print(f"青龙湖盐度范围: {merged_df['qinglong_lake'].min()} - {merged_df['qinglong_lake'].max()}")
|
|
# 确保数据按时间排序
|
merged_df = merged_df.sort_values('DateTime')
|
return merged_df
|
|
# 特征工程 - LSTM版本
|
def create_sequences(df, look_back=96, forecast_horizon=5):
|
print("开始特征工程(LSTM序列模式)...")
|
start_time = time()
|
|
# 提取主要特征列
|
upstream = df['upstream'].values
|
downstream = df['downstream'].values
|
|
# 预先计算时间特征
|
date_features = np.array([
|
[x.hour/24, x.dayofweek/7, x.month/12]
|
for x in df['DateTime']
|
])
|
|
# 创建X和y序列
|
X = []
|
y = []
|
|
# 计算可用的样本数
|
total_samples = len(df) - look_back - forecast_horizon
|
if total_samples <= 0:
|
print("数据不足以创建特征")
|
return np.array([]), np.array([])
|
|
print(f"开始创建序列,总样本数: {total_samples}")
|
|
# 批量处理以提高效率
|
batch_size = 1000
|
for batch_start in range(0, total_samples, batch_size):
|
batch_end = min(batch_start + batch_size, total_samples)
|
print(f"处理样本批次: {batch_start}-{batch_end}/{total_samples}")
|
|
for i in range(batch_start, batch_end):
|
# 获取当前时间窗口索引
|
end_idx = i + look_back
|
|
# 基本序列特征
|
upstream_seq = upstream[i:end_idx]
|
downstream_seq = downstream[i:end_idx]
|
time_seq = date_features[i:end_idx]
|
|
# 跳过含有NaN的窗口
|
if np.isnan(upstream_seq).any() or np.isnan(downstream_seq).any():
|
continue
|
|
# 合并特征 [samples, timesteps, features]
|
# 每个时间步包含: 上游盐度, 下游盐度, 时间特征(小时,星期,月份)
|
input_seq = np.column_stack([
|
upstream_seq,
|
downstream_seq,
|
time_seq
|
])
|
|
# 目标是预测未来forecast_horizon天的下游盐度
|
target_seq = downstream[end_idx:end_idx+forecast_horizon]
|
|
# 确保目标没有NaN
|
if not np.isnan(target_seq).any():
|
X.append(input_seq)
|
y.append(target_seq)
|
|
X = np.array(X)
|
y = np.array(y)
|
|
if len(X) == 0 or len(y) == 0:
|
print("警告:没有能够生成有效的特征和标签对")
|
return np.array([]), np.array([])
|
|
end_time = time()
|
print(f"特征工程完成,有效样本数: {len(X)},特征形状: {X.shape},标签形状: {y.shape},耗时: {end_time-start_time:.2f}秒")
|
|
# 保存特征列名称以备将来使用
|
global feature_columns
|
feature_columns = ['upstream', 'downstream', 'hour', 'day_of_week', 'month']
|
|
return X, y
|
|
# 构建LSTM模型
|
def build_lstm_model(input_shape, output_length):
|
model = Sequential([
|
LSTM(units=64, return_sequences=True, input_shape=input_shape),
|
Dropout(0.2),
|
LSTM(units=32),
|
Dropout(0.2),
|
Dense(output_length)
|
])
|
|
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
|
return model
|
|
# 训练和预测
|
def train_and_predict(df, start_time, force_retrain=False):
|
global cached_model, last_training_time, feature_scaler, target_scaler
|
|
# 检查是否有缓存的模型和上次训练的时间
|
model_cache_file = 'salinity_lstm_model.h5'
|
scaler_cache_file = 'salinity_scalers.pkl'
|
model_needs_training = True
|
|
# 特征结构已改变,强制重新训练 - 删除旧模型文件
|
if os.path.exists(model_cache_file) and force_retrain:
|
try:
|
os.remove(model_cache_file)
|
if os.path.exists(scaler_cache_file):
|
os.remove(scaler_cache_file)
|
print("特征结构已更改,已删除旧模型缓存")
|
except:
|
pass
|
|
# 只使用时间点之前的数据
|
train_df = df[df['DateTime'] < start_time].copy()
|
|
# 如果有缓存模型且不需要强制重新训练
|
if not force_retrain and cached_model is not None and last_training_time is not None and feature_scaler is not None and target_scaler is not None:
|
# 如果上次训练后没有新数据,使用缓存的模型
|
if last_training_time >= train_df['DateTime'].max():
|
model_needs_training = False
|
print(f"使用缓存模型 (上次训练时间: {last_training_time})")
|
# 如果文件存在且不需要强制重新训练
|
elif not force_retrain and os.path.exists(model_cache_file) and os.path.exists(scaler_cache_file):
|
try:
|
cached_model = load_model(model_cache_file)
|
with open(scaler_cache_file, 'rb') as f:
|
scalers = pickle.load(f)
|
feature_scaler = scalers['feature_scaler']
|
target_scaler = scalers['target_scaler']
|
last_training_time = scalers['training_time']
|
|
# 如果上次训练后没有新数据,使用缓存的模型
|
if last_training_time >= train_df['DateTime'].max():
|
model_needs_training = False
|
print(f"从文件加载模型 (上次训练时间: {last_training_time})")
|
except Exception as e:
|
print(f"加载模型失败: {e}")
|
|
if model_needs_training:
|
print("需要训练新模型...")
|
|
if len(train_df) < 10: # 确保有足够的训练数据
|
print("训练数据不足")
|
return None, None, None
|
|
# 检查训练数据质量
|
print(f"训练数据范围: {train_df['DateTime'].min()} 到 {train_df['DateTime'].max()}")
|
print(f"训练数据中NaN值统计:\n上游: {train_df['upstream'].isna().sum()}\n下游: {train_df['downstream'].isna().sum()}")
|
|
# 计时开始
|
start_time_training = time()
|
|
# 进行特征工程
|
X, y = create_sequences(train_df)
|
|
# 检查是否有足够的样本
|
if len(X) == 0 or len(y) == 0:
|
print("没有足够的有效样本进行训练")
|
return None, None, None
|
|
# 检查样本数量
|
print(f"用于训练的样本数: {len(X)}")
|
|
# 创建并应用缩放器 - 为LSTM缩放数据
|
feature_scaler = MinMaxScaler(feature_range=(0, 1))
|
|
# 对每个样本的每个时间点进行缩放
|
# 原始形状 [samples, timesteps, features]
|
n_samples, n_timesteps, n_features = X.shape
|
X_reshaped = X.reshape(n_samples * n_timesteps, n_features)
|
X_scaled = feature_scaler.fit_transform(X_reshaped)
|
X_scaled = X_scaled.reshape(n_samples, n_timesteps, n_features)
|
|
# 对目标变量进行缩放
|
target_scaler = MinMaxScaler(feature_range=(0, 1))
|
y_scaled = target_scaler.fit_transform(y)
|
|
# 划分训练集和验证集
|
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)
|
|
# 构建LSTM模型
|
input_shape = (X_train.shape[1], X_train.shape[2])
|
output_length = y_train.shape[1]
|
model = build_lstm_model(input_shape, output_length)
|
|
# 训练模型
|
try:
|
print("开始训练模型...")
|
|
# 早停设置
|
early_stopping = EarlyStopping(
|
monitor='val_loss',
|
patience=20,
|
restore_best_weights=True
|
)
|
|
# 训练模型
|
history = model.fit(
|
X_train, y_train,
|
validation_data=(X_val, y_val),
|
epochs=10, #修改记得
|
batch_size=32,
|
callbacks=[early_stopping],
|
verbose=1
|
)
|
|
# 计算验证集性能
|
val_pred_scaled = model.predict(X_val)
|
val_pred = target_scaler.inverse_transform(val_pred_scaled)
|
y_val_inv = target_scaler.inverse_transform(y_val)
|
rmse = np.sqrt(np.mean((val_pred - y_val_inv) ** 2))
|
print(f"验证集RMSE: {rmse:.4f}")
|
|
# 记录训练时间
|
last_training_time = start_time
|
|
# 缓存模型
|
cached_model = model
|
|
# 保存模型到文件
|
model.save(model_cache_file)
|
with open(scaler_cache_file, 'wb') as f:
|
pickle.dump({
|
'feature_scaler': feature_scaler,
|
'target_scaler': target_scaler,
|
'training_time': last_training_time,
|
'feature_columns': feature_columns,
|
'rmse': rmse
|
}, f)
|
|
print(f"模型训练完成,耗时: {time()-start_time_training:.2f}秒")
|
|
except Exception as e:
|
print(f"模型训练失败: {e}")
|
return None, None, None
|
else:
|
# 使用缓存的模型
|
model = cached_model
|
|
# 准备最新数据进行预测
|
try:
|
print("准备预测数据...")
|
look_back = 96 # 确保与训练时一致
|
|
# 获取更多的历史数据以考虑延迟
|
latest_data = df[df['DateTime'] < start_time].tail(look_back).copy()
|
if len(latest_data) < look_back: # 确保有足够的历史数据
|
print("预测所需的历史数据不足")
|
return None, None, None
|
|
# 处理可能的NaN值
|
if latest_data['upstream'].isna().any() or latest_data['downstream'].isna().any():
|
latest_data['upstream'] = latest_data['upstream'].fillna(method='ffill').fillna(method='bfill')
|
latest_data['downstream'] = latest_data['downstream'].fillna(method='ffill').fillna(method='bfill')
|
|
# 提取特征序列
|
upstream_seq = latest_data['upstream'].values
|
downstream_seq = latest_data['downstream'].values
|
|
# 时间特征
|
time_seq = np.array([
|
[x.hour/24, x.dayofweek/7, x.month/12]
|
for x in latest_data['DateTime']
|
])
|
|
# 合并特征
|
input_seq = np.column_stack([
|
upstream_seq,
|
downstream_seq,
|
time_seq
|
])
|
|
# 检查特征是否有效
|
if np.isnan(input_seq).any() or np.isinf(input_seq).any():
|
print("预测特征包含无效值")
|
input_seq = np.nan_to_num(input_seq, nan=0.0, posinf=1e6, neginf=-1e6)
|
|
# 增加批次维度并缩放
|
input_seq = input_seq.reshape(1, look_back, -1)
|
input_seq_reshaped = input_seq.reshape(look_back, -1)
|
input_seq_scaled = feature_scaler.transform(input_seq_reshaped)
|
input_seq_scaled = input_seq_scaled.reshape(1, look_back, -1)
|
|
# 预测
|
print("执行预测...")
|
predictions_scaled = model.predict(input_seq_scaled)
|
|
# 反向缩放
|
predictions = target_scaler.inverse_transform(predictions_scaled)[0]
|
|
# 生成未来日期
|
forecast_horizon = 5 # 确保与训练时一致
|
future_dates = [start_time + timedelta(days=i) for i in range(forecast_horizon)]
|
|
print("预测成功完成")
|
return future_dates, predictions, model
|
|
except Exception as e:
|
print(f"预测过程发生错误: {e}")
|
return None, None, None
|
|
# GUI 界面
|
def run_gui():
|
# 配置tkinter中文显示
|
def configure_gui_fonts():
|
# 尝试设置支持中文的字体
|
font_names = ['微软雅黑', 'Microsoft YaHei', 'SimSun', 'SimHei']
|
for font_name in font_names:
|
try:
|
default_font = tk.font.nametofont("TkDefaultFont")
|
default_font.configure(family=font_name)
|
text_font = tk.font.nametofont("TkTextFont")
|
text_font.configure(family=font_name)
|
fixed_font = tk.font.nametofont("TkFixedFont")
|
fixed_font.configure(family=font_name)
|
return True
|
except:
|
continue
|
return False
|
|
def on_predict():
|
try:
|
predict_start_time = time()
|
status_label.config(text="预测中...")
|
root.update()
|
|
start_time = pd.to_datetime(entry.get())
|
# 检查模型缓存情况
|
cache_exists = os.path.exists('salinity_lstm_model.h5')
|
if cache_exists and not retrain_var.get():
|
try:
|
with open('salinity_scalers.pkl', 'rb') as f:
|
scalers = pickle.load(f)
|
# 检查模型特征数量是否一致
|
model_features = scalers.get('feature_columns', [])
|
expected_features = ['upstream', 'downstream', 'hour', 'day_of_week', 'month']
|
|
if len(model_features) != len(expected_features):
|
status_label.config(text="特征结构已更改,请勾选'强制重新训练模型'")
|
return
|
except:
|
pass
|
|
force_retrain = retrain_var.get()
|
future_dates, predictions, model = train_and_predict(df, start_time, force_retrain)
|
|
if future_dates is None or predictions is None:
|
status_label.config(text="预测失败")
|
return
|
|
# 清空之前的图形
|
ax.clear()
|
|
# 绘制历史数据
|
history_end = min(start_time, df['DateTime'].max())
|
history_start = history_end - timedelta(days=120) # 显示近30天的历史数据
|
history_data = df[(df['DateTime'] >= history_start) & (df['DateTime'] <= history_end)]
|
|
# 绘制一取水(下游)的历史数据
|
ax.plot(history_data['DateTime'], history_data['downstream'],
|
label='一取水(下游)盐度', color='blue', linewidth=1.5)
|
|
# 绘制青龙港(上游)的历史数据 - 使用平滑后的数据
|
ax.plot(history_data['DateTime'], history_data['upstream_smooth'],
|
label='青龙港(上游)盐度', color='purple', linewidth=1.5, alpha=0.7)
|
|
# 绘制青龙湖的历史数据 - 使用平滑后的数据
|
if 'qinglong_lake_smooth' in history_data.columns:
|
ax.plot(history_data['DateTime'], history_data['qinglong_lake_smooth'],
|
label='青龙湖盐度', color='green', linewidth=1.5, alpha=0.7)
|
|
# 获取预测期间的真实值(如果有)
|
actual_data = df[(df['DateTime'] >= start_time) &
|
(df['DateTime'] <= future_dates[-1])]
|
|
# 绘制预测数据
|
ax.plot(future_dates, predictions, marker='o', linestyle='--',
|
label='预测盐度', color='red', linewidth=2)
|
|
# 如果有真实值,绘制真实值
|
if not actual_data.empty:
|
ax.plot(actual_data['DateTime'], actual_data['downstream'],
|
marker='s', linestyle='-', label='真实盐度',
|
color='orange', linewidth=2)
|
|
# 计算预测误差
|
# 找到最接近预测日期的实际值
|
actual_values = []
|
for pred_date in future_dates:
|
# 找到最接近的日期
|
closest_idx = (actual_data['DateTime'] - pred_date).abs().idxmin()
|
actual_values.append(actual_data.loc[closest_idx, 'downstream'])
|
|
if len(actual_values) == len(predictions):
|
mse = np.mean((np.array(actual_values) - predictions) ** 2)
|
rmse = np.sqrt(mse)
|
mae = np.mean(np.abs(np.array(actual_values) - predictions))
|
|
# 在图上显示误差指标
|
error_text = f"RMSE: {rmse:.2f}, MAE: {mae:.2f}"
|
ax.text(0.02, 0.05, error_text, transform=ax.transAxes,
|
bbox=dict(facecolor='white', alpha=0.8))
|
|
# 添加置信区间
|
std_dev = history_data['downstream'].std() * 0.5 # 使用历史数据的标准差作为预测不确定性的估计
|
ax.fill_between(future_dates,
|
predictions - std_dev,
|
predictions + std_dev,
|
color='red', alpha=0.2)
|
|
# 设置标题、标签和图例
|
ax.set_xlabel('日期')
|
ax.set_ylabel('盐度')
|
ax.set_title(f"从 {start_time.strftime('%Y-%m-%d %H:%M:%S')} 开始的盐度预测")
|
ax.legend(loc='upper left')
|
|
# 调整布局,防止标签被遮挡
|
fig.tight_layout()
|
|
# 更新画布显示
|
canvas.draw()
|
|
# 计算预测总耗时
|
predict_time = time() - predict_start_time
|
|
# 更新状态
|
status_label.config(text=f"预测完成 (耗时: {predict_time:.2f}秒)")
|
|
# 展示预测结果
|
result_text = "预测结果:\n"
|
for i, (date, pred) in enumerate(zip(future_dates, predictions)):
|
result_text += f"第 {i+1} 天 ({date.strftime('%Y-%m-%d')}): {pred:.2f}\n"
|
|
# 如果有真实值,添加到结果中
|
if not actual_data.empty and i < len(actual_values):
|
result_text += f" 实际盐度: {actual_values[i]:.2f}\n"
|
result_text += f" 误差: {abs(actual_values[i] - pred):.2f}\n"
|
|
result_label.config(text=result_text)
|
|
except Exception as e:
|
status_label.config(text=f"错误: {str(e)}")
|
|
def on_scroll(event):
|
# 获取当前坐标轴的范围
|
xlim = ax.get_xlim()
|
ylim = ax.get_ylim()
|
|
# 设置滚轮缩放的增量
|
zoom_factor = 1.1
|
|
# 确定鼠标位置到轴的相对位置
|
x_data = event.xdata
|
y_data = event.ydata
|
|
# 如果鼠标不在坐标轴内,则使用轴中心
|
if x_data is None:
|
x_data = (xlim[0] + xlim[1]) / 2
|
if y_data is None:
|
y_data = (ylim[0] + ylim[1]) / 2
|
|
# 计算相对位置
|
x_rel = (x_data - xlim[0]) / (xlim[1] - xlim[0])
|
y_rel = (y_data - ylim[0]) / (ylim[1] - ylim[0])
|
|
# 检查滚轮的滚动方向 - 使用event.step替代event.button,更准确
|
# 向上滚动(放大)为正值,向下滚动(缩小)为负值
|
if event.step > 0: # 向上滚动 = 放大
|
# 计算新的区间
|
new_width = (xlim[1] - xlim[0]) / zoom_factor
|
new_height = (ylim[1] - ylim[0]) / zoom_factor
|
|
# 计算新的区间边界,保持鼠标位置相对不变
|
x0 = x_data - x_rel * new_width
|
y0 = y_data - y_rel * new_height
|
x1 = x0 + new_width
|
y1 = y0 + new_height
|
|
ax.set_xlim([x0, x1])
|
ax.set_ylim([y0, y1])
|
else: # 向下滚动 = 缩小
|
# 计算新的区间
|
new_width = (xlim[1] - xlim[0]) * zoom_factor
|
new_height = (ylim[1] - ylim[0]) * zoom_factor
|
|
# 计算新的区间边界,保持鼠标位置相对不变
|
x0 = x_data - x_rel * new_width
|
y0 = y_data - y_rel * new_height
|
x1 = x0 + new_width
|
y1 = y0 + new_height
|
|
ax.set_xlim([x0, x1])
|
ax.set_ylim([y0, y1])
|
|
# 更新画布显示
|
canvas.draw_idle()
|
|
# 定义鼠标拖动功能
|
def on_mouse_press(event):
|
if event.button == 1: # 左键
|
canvas.mpl_disconnect(hover_cid)
|
canvas._pan_start = (event.x, event.y)
|
canvas._xlim = ax.get_xlim()
|
canvas._ylim = ax.get_ylim()
|
canvas.mpl_connect('motion_notify_event', on_mouse_move)
|
|
def on_mouse_release(event):
|
if event.button == 1: # 左键
|
canvas.mpl_disconnect(move_cid[0])
|
global hover_cid
|
hover_cid = canvas.mpl_connect('motion_notify_event', update_cursor)
|
|
def on_mouse_move(event):
|
if event.button == 1 and hasattr(canvas, '_pan_start'):
|
dx = event.x - canvas._pan_start[0]
|
dy = event.y - canvas._pan_start[1]
|
|
# 转换像素移动到数据坐标移动
|
x_span = canvas._xlim[1] - canvas._xlim[0]
|
y_span = canvas._ylim[1] - canvas._ylim[0]
|
|
# 转换因子(用于将像素移动转换为数据范围移动)
|
width, height = canvas.get_width_height()
|
x_scale = x_span / width
|
y_scale = y_span / height
|
|
# 计算新的限制
|
xlim = [canvas._xlim[0] - dx * x_scale,
|
canvas._xlim[1] - dx * x_scale]
|
ylim = [canvas._ylim[0] + dy * y_scale,
|
canvas._ylim[1] + dy * y_scale]
|
|
# 设置新的限制
|
ax.set_xlim(xlim)
|
ax.set_ylim(ylim)
|
canvas.draw_idle()
|
|
# 更新鼠标指针样式
|
def update_cursor(event):
|
if event.inaxes == ax:
|
canvas.get_tk_widget().config(cursor="fleur") # 手形光标表示可拖动
|
else:
|
canvas.get_tk_widget().config(cursor="")
|
|
# 重置视图
|
def reset_view():
|
display_history()
|
status_label.config(text="图表视图已重置")
|
|
root = tk.Tk()
|
root.title("青龙港-陈行盐度预测系统") # 修改为中文标题
|
|
# 尝试配置中文字体
|
try:
|
import tkinter.font as tkfont
|
configure_gui_fonts()
|
except:
|
print("无法配置GUI字体,可能影响中文显示")
|
|
# 创建框架
|
input_frame = ttk.Frame(root, padding="10")
|
input_frame.pack(fill=tk.X)
|
|
control_frame = ttk.Frame(root, padding="5")
|
control_frame.pack(fill=tk.X)
|
|
result_frame = ttk.Frame(root, padding="10")
|
result_frame.pack(fill=tk.BOTH, expand=True)
|
|
# 输入框和预测按钮
|
ttk.Label(input_frame, text="输入开始时间 (YYYY-MM-DD HH:MM:SS)").pack(side=tk.LEFT)
|
entry = ttk.Entry(input_frame, width=25)
|
entry.pack(side=tk.LEFT, padx=5)
|
predict_button = ttk.Button(input_frame, text="预测", command=on_predict)
|
predict_button.pack(side=tk.LEFT)
|
|
# 状态标签
|
status_label = ttk.Label(input_frame, text="提示: 第一次运行请勾选'强制重新训练模型'")
|
status_label.pack(side=tk.LEFT, padx=10)
|
|
# 控制选项
|
retrain_var = tk.BooleanVar(value=False)
|
ttk.Checkbutton(control_frame, text="强制重新训练模型", variable=retrain_var).pack(side=tk.LEFT)
|
|
# 添加图例说明
|
legend_label = ttk.Label(control_frame, text="图例: 紫色=青龙港上游数据, 蓝色=一取水下游数据, 红色=预测值, 绿色=实际值")
|
legend_label.pack(side=tk.LEFT, padx=10)
|
|
# 结果标签
|
result_label = ttk.Label(result_frame, text="", justify=tk.LEFT)
|
result_label.pack(side=tk.RIGHT, fill=tk.Y)
|
|
# 绘图区域 - 设置dpi提高清晰度
|
fig, ax = plt.subplots(figsize=(10, 5), dpi=100)
|
|
# 创建画布并添加工具栏
|
canvas = FigureCanvasTkAgg(fig, master=result_frame)
|
canvas.get_tk_widget().pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
|
# 添加工具栏
|
toolbar_frame = ttk.Frame(result_frame)
|
toolbar_frame.pack(side=tk.BOTTOM, fill=tk.X)
|
toolbar = NavigationToolbar2Tk(canvas, toolbar_frame)
|
toolbar.update()
|
|
# 添加自定义重置按钮
|
reset_button = ttk.Button(control_frame, text="重置视图", command=reset_view)
|
reset_button.pack(side=tk.LEFT, padx=5)
|
|
# 连接鼠标事件
|
canvas.mpl_connect('button_press_event', on_mouse_press)
|
canvas.mpl_connect('button_release_event', on_mouse_release)
|
canvas.mpl_connect('scroll_event', on_scroll)
|
|
# 全局变量,用于存储事件连接ID
|
move_cid = [None]
|
hover_cid = canvas.mpl_connect('motion_notify_event', update_cursor)
|
|
# 默认加载历史数据
|
def display_history():
|
# 清空之前的图形
|
ax.clear()
|
|
# 确保显示全部历史数据但不超过60天
|
end_date = df['DateTime'].max()
|
start_date = max(df['DateTime'].min(), end_date - timedelta(days=60))
|
display_data = df[(df['DateTime'] >= start_date) & (df['DateTime'] <= end_date)]
|
|
# 绘制一取水(下游)历史数据
|
ax.plot(display_data['DateTime'], display_data['downstream'],
|
label='一取水(下游)盐度', color='blue', linewidth=1.5)
|
|
# 绘制青龙港(上游)的历史数据 - 使用平滑后的数据
|
ax.plot(display_data['DateTime'], display_data['upstream_smooth'],
|
label='青龙港(上游)盐度', color='purple', linewidth=1.5, alpha=0.7)
|
|
# # 绘制青龙港历史数据 - 使用平滑后的数据
|
# if 'qinglong_lake_smooth' in display_data.columns:
|
# ax.plot(display_data['DateTime'], display_data['qinglong_lake_smooth'],
|
# label='青龙湖盐度', color='green', linewidth=1.5, alpha=0.7)
|
|
# 设置标题、标签和图例
|
ax.set_xlabel('日期')
|
ax.set_ylabel('盐度')
|
ax.set_title('历史盐度数据对比')
|
ax.legend()
|
|
# 调整布局,防止标签被遮挡
|
fig.tight_layout()
|
|
# 更新画布显示
|
canvas.draw()
|
|
# 默认加载历史数据
|
display_history()
|
|
# 启动GUI
|
root.mainloop()
|
|
# 运行
|
df = load_data('青龙港1.csv', '一取水.csv', )
|
|
# 如果数据加载成功,则运行GUI界面
|
if df is not None:
|
run_gui()
|
else:
|
print("数据加载失败,无法运行预测。")
|