import sys
|
import pandas as pd
|
import numpy as np
|
import matplotlib.pyplot as plt
|
from sklearn.preprocessing import MinMaxScaler
|
from sklearn.metrics import mean_absolute_error, mean_squared_error
|
|
# Check numpy version
|
if np.__version__ < '1.23.5':
|
print(f"Warning: Current numpy version {np.__version__} may cause compatibility issues.")
|
print("Please upgrade numpy to version 1.23.5 or higher.")
|
|
try:
|
import tensorflow as tf
|
print(f"TensorFlow version: {tf.__version__}")
|
except ImportError as e:
|
print("Error importing TensorFlow:", e)
|
sys.exit(1)
|
|
# Check TensorFlow version
|
if tf.__version__ < '2.10.0':
|
print(f"Warning: Current TensorFlow version {tf.__version__} may cause compatibility issues.")
|
print("Please upgrade TensorFlow to version 2.10.0 or higher.")
|
|
from tensorflow.keras.models import Model
|
from tensorflow.keras.layers import Input, Conv1D, LSTM, Dense, Layer
|
from tensorflow.keras.callbacks import EarlyStopping
|
|
# ================================
|
# 1. 数据预处理模块
|
# ================================
|
class DataPreprocessor:
|
def __init__(self, file_path, target_col='Value'):
|
# 使用正确的编码和列名
|
self.df = pd.read_csv(file_path, encoding='utf-8-sig', parse_dates=['DateTime'], index_col='DateTime')
|
self.target_col = target_col
|
self.scaler = MinMaxScaler(feature_range=(0, 1))
|
|
def preprocess(self, resample_freq='h'): # 使用小写的'h'代替大写的'H'
|
"""数据重采样与归一化"""
|
# 只选择Value列进行处理
|
value_series = self.df[self.target_col]
|
|
# 处理非等间隔采样
|
df_resampled = value_series.resample(resample_freq).mean()
|
df_filled = df_resampled.fillna(method='ffill').fillna(method='bfill') # 双向填充
|
|
# 归一化处理
|
self.scaled_data = self.scaler.fit_transform(df_filled.values.reshape(-1, 1))
|
self.dates = df_filled.index
|
return self.scaled_data, self.dates
|
|
def create_sequences(self, data, look_back=72, pred_steps=120):
|
"""创建监督学习序列"""
|
X, Y = [], []
|
for i in range(len(data) - look_back - pred_steps):
|
X.append(data[i:(i + look_back)])
|
Y.append(data[(i + look_back):(i + look_back + pred_steps)])
|
return np.array(X), np.array(Y)
|
|
# ================================
|
# 2. 模型构建模块
|
# ================================
|
class TemporalAttention(Layer):
|
"""时间注意力机制层"""
|
def __init__(self, units):
|
super(TemporalAttention, self).__init__()
|
self.W1 = Dense(units)
|
self.W2 = Dense(units)
|
self.V = Dense(1)
|
|
def call(self, encoder_output, lstm_output):
|
lstm_output = tf.expand_dims(lstm_output, 1)
|
score = self.V(tf.nn.tanh(
|
self.W1(encoder_output) + self.W2(lstm_output)))
|
attention_weights = tf.nn.softmax(score, axis=1)
|
context_vector = attention_weights * encoder_output
|
context_vector = tf.reduce_sum(context_vector, axis=1)
|
return context_vector, attention_weights
|
|
class SalinityPredictor:
|
def __init__(self, look_back=72, pred_steps=120):
|
self.look_back = look_back # 历史窗口(小时)
|
self.pred_steps = pred_steps # 预测步长(5天=120小时)
|
|
def build_model(self):
|
"""构建CNN-LSTM-Attention混合模型"""
|
inputs = Input(shape=(self.look_back, 1))
|
|
# CNN特征提取
|
cnn = Conv1D(64, 3, activation='relu', padding='same')(inputs)
|
cnn = Conv1D(32, 3, activation='relu', padding='same')(cnn)
|
|
# LSTM时序建模
|
lstm_out = LSTM(128, return_sequences=True)(cnn)
|
lstm_out = LSTM(64, return_sequences=False)(lstm_out)
|
|
# 注意力机制
|
context_vector, _ = TemporalAttention(64)(cnn, lstm_out)
|
|
# 输出层
|
outputs = Dense(self.pred_steps)(context_vector)
|
|
self.model = Model(inputs=inputs, outputs=outputs)
|
self.model.compile(optimizer='adam', loss='mse')
|
return self.model
|
|
def dynamic_split(self, data, dates, cutoff_date):
|
"""动态划分训练集"""
|
cutoff_idx = np.where(dates <= cutoff_date)[0][-self.look_back]
|
train_data = data[:cutoff_idx]
|
return train_data
|
|
def train(self, X_train, y_train, epochs=200, batch_size=32):
|
"""模型训练"""
|
early_stop = EarlyStopping(monitor='val_loss', patience=20)
|
history = self.model.fit(
|
X_train, y_train,
|
epochs=epochs,
|
batch_size=batch_size,
|
validation_split=0.2,
|
callbacks=[early_stop],
|
verbose=1
|
)
|
return history
|
|
def predict(self, last_sequence):
|
"""递归多步预测"""
|
predictions = []
|
current_seq = last_sequence.copy()
|
|
for _ in range(self.pred_steps):
|
pred = self.model.predict(current_seq[np.newaxis, :, :], verbose=0)
|
predictions.append(pred[0][0]) # 只取第一个预测值
|
current_seq = np.roll(current_seq, -1, axis=0)
|
current_seq[-1] = pred[0][0] # 使用单个预测值更新序列
|
|
return np.array(predictions)
|
|
# ================================
|
# 3. 完整流程执行
|
# ================================
|
if __name__ == "__main__":
|
# 参数配置
|
DATA_PATH = 'D:\opencv\.venv\一取水.csv'
|
CUTOFF_DATE = '2024-12-20 00:00' # 用户指定分割时间点
|
LOOK_BACK = 72 # 3天历史数据
|
PRED_STEPS = 120 # 预测5天
|
|
# 数据预处理
|
preprocessor = DataPreprocessor(DATA_PATH)
|
scaled_data, dates = preprocessor.preprocess()
|
X, Y = preprocessor.create_sequences(scaled_data, LOOK_BACK, PRED_STEPS)
|
|
# 模型构建
|
predictor = SalinityPredictor(LOOK_BACK, PRED_STEPS)
|
model = predictor.build_model()
|
model.summary()
|
|
# 动态训练
|
train_data = predictor.dynamic_split(scaled_data, dates, pd.to_datetime(CUTOFF_DATE))
|
X_train, y_train = preprocessor.create_sequences(train_data, LOOK_BACK, PRED_STEPS)
|
history = predictor.train(X_train, y_train)
|
|
# 预测验证
|
cutoff_idx = np.where(dates <= pd.to_datetime(CUTOFF_DATE))[0][-1]
|
last_seq = scaled_data[cutoff_idx-LOOK_BACK:cutoff_idx] # 使用分割点前的数据作为输入
|
scaled_pred = predictor.predict(last_seq)
|
predictions = preprocessor.scaler.inverse_transform(scaled_pred.reshape(-1, 1))
|
|
# 结果可视化
|
true_dates = pd.date_range(start=pd.to_datetime(CUTOFF_DATE), periods=PRED_STEPS, freq='h')
|
plt.figure(figsize=(15, 6))
|
|
# 绘制分割点前的历史数据
|
plt.plot(dates[cutoff_idx-PRED_STEPS:cutoff_idx],
|
preprocessor.scaler.inverse_transform(scaled_data[cutoff_idx-PRED_STEPS:cutoff_idx]),
|
'b-', label='历史数据(分割点前)')
|
|
# 绘制分割点后的实际数据
|
plt.plot(dates[cutoff_idx:cutoff_idx+PRED_STEPS],
|
preprocessor.scaler.inverse_transform(scaled_data[cutoff_idx:cutoff_idx+PRED_STEPS]),
|
'g-', label='实际数据(分割点后)')
|
|
# 绘制预测数据
|
plt.plot(true_dates, predictions, 'r--', label='预测数据')
|
|
# 添加分割线
|
plt.axvline(x=pd.to_datetime(CUTOFF_DATE), color='k', linestyle='--', label='分割时间点')
|
|
plt.title(f'盐度预测对比({CUTOFF_DATE}后5天)')
|
plt.xlabel('时间')
|
plt.ylabel('盐度值')
|
plt.legend()
|
plt.grid(True)
|
plt.xticks(rotation=45)
|
plt.tight_layout()
|
plt.show()
|
|
# 性能指标
|
true_values = preprocessor.scaler.inverse_transform(scaled_data[cutoff_idx:cutoff_idx+PRED_STEPS])
|
mae = mean_absolute_error(true_values, predictions)
|
rmse = np.sqrt(mean_squared_error(true_values, predictions))
|
print(f'验证指标 => MAE: {mae:.3f}, RMSE: {rmse:.3f}')
|
|
# ================================
|
# 4. 模型保存与加载(可选)
|
# ================================
|
# model.save('salinity_predictor.h5')
|
# loaded_model = tf.keras.models.load_model('salinity_predictor.h5', custom_objects={'TemporalAttention': TemporalAttention})
|