rp
3 天以前 3ff98c7953b710f91eb7c9812717019f96697822
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error
 
# Check numpy version
if np.__version__ < '1.23.5':
    print(f"Warning: Current numpy version {np.__version__} may cause compatibility issues.")
    print("Please upgrade numpy to version 1.23.5 or higher.")
 
try:
    import tensorflow as tf
    print(f"TensorFlow version: {tf.__version__}")
except ImportError as e:
    print("Error importing TensorFlow:", e)
    sys.exit(1)
 
# Check TensorFlow version
if tf.__version__ < '2.10.0':
    print(f"Warning: Current TensorFlow version {tf.__version__} may cause compatibility issues.")
    print("Please upgrade TensorFlow to version 2.10.0 or higher.")
 
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, LSTM, Dense, Layer
from tensorflow.keras.callbacks import EarlyStopping
 
# ================================
# 1. 数据预处理模块
# ================================
class DataPreprocessor:
    def __init__(self, file_path, target_col='Value'):
        # 使用正确的编码和列名
        self.df = pd.read_csv(file_path, encoding='utf-8-sig', parse_dates=['DateTime'], index_col='DateTime')
        self.target_col = target_col
        self.scaler = MinMaxScaler(feature_range=(0, 1))
        
    def preprocess(self, resample_freq='h'):  # 使用小写的'h'代替大写的'H'
        """数据重采样与归一化"""
        # 只选择Value列进行处理
        value_series = self.df[self.target_col]
        
        # 处理非等间隔采样
        df_resampled = value_series.resample(resample_freq).mean()
        df_filled = df_resampled.fillna(method='ffill').fillna(method='bfill')  # 双向填充
        
        # 归一化处理
        self.scaled_data = self.scaler.fit_transform(df_filled.values.reshape(-1, 1))
        self.dates = df_filled.index
        return self.scaled_data, self.dates
    
    def create_sequences(self, data, look_back=72, pred_steps=120):
        """创建监督学习序列"""
        X, Y = [], []
        for i in range(len(data) - look_back - pred_steps):
            X.append(data[i:(i + look_back)])
            Y.append(data[(i + look_back):(i + look_back + pred_steps)])
        return np.array(X), np.array(Y)
 
# ================================
# 2. 模型构建模块
# ================================
class TemporalAttention(Layer):
    """时间注意力机制层"""
    def __init__(self, units):
        super(TemporalAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)
 
    def call(self, encoder_output, lstm_output):
        lstm_output = tf.expand_dims(lstm_output, 1)
        score = self.V(tf.nn.tanh(
            self.W1(encoder_output) + self.W2(lstm_output)))
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * encoder_output
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector, attention_weights
 
class SalinityPredictor:
    def __init__(self, look_back=72, pred_steps=120):
        self.look_back = look_back    # 历史窗口(小时)
        self.pred_steps = pred_steps  # 预测步长(5天=120小时)
        
    def build_model(self):
        """构建CNN-LSTM-Attention混合模型"""
        inputs = Input(shape=(self.look_back, 1))
        
        # CNN特征提取
        cnn = Conv1D(64, 3, activation='relu', padding='same')(inputs)
        cnn = Conv1D(32, 3, activation='relu', padding='same')(cnn)
        
        # LSTM时序建模
        lstm_out = LSTM(128, return_sequences=True)(cnn)
        lstm_out = LSTM(64, return_sequences=False)(lstm_out)
        
        # 注意力机制
        context_vector, _ = TemporalAttention(64)(cnn, lstm_out)
        
        # 输出层
        outputs = Dense(self.pred_steps)(context_vector)
        
        self.model = Model(inputs=inputs, outputs=outputs)
        self.model.compile(optimizer='adam', loss='mse')
        return self.model
    
    def dynamic_split(self, data, dates, cutoff_date):
        """动态划分训练集"""
        cutoff_idx = np.where(dates <= cutoff_date)[0][-self.look_back]
        train_data = data[:cutoff_idx]
        return train_data
    
    def train(self, X_train, y_train, epochs=200, batch_size=32):
        """模型训练"""
        early_stop = EarlyStopping(monitor='val_loss', patience=20)
        history = self.model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=0.2,
            callbacks=[early_stop],
            verbose=1
        )
        return history
    
    def predict(self, last_sequence):
        """递归多步预测"""
        predictions = []
        current_seq = last_sequence.copy()
        
        for _ in range(self.pred_steps):
            pred = self.model.predict(current_seq[np.newaxis, :, :], verbose=0)
            predictions.append(pred[0][0])  # 只取第一个预测值
            current_seq = np.roll(current_seq, -1, axis=0)
            current_seq[-1] = pred[0][0]  # 使用单个预测值更新序列
        
        return np.array(predictions)
 
# ================================
# 3. 完整流程执行
# ================================
if __name__ == "__main__":
    # 参数配置
    DATA_PATH = 'D:\opencv\.venv\一取水.csv'
    CUTOFF_DATE = '2024-12-20 00:00'  # 用户指定分割时间点
    LOOK_BACK = 72    # 3天历史数据
    PRED_STEPS = 120  # 预测5天
    
    # 数据预处理
    preprocessor = DataPreprocessor(DATA_PATH)
    scaled_data, dates = preprocessor.preprocess()
    X, Y = preprocessor.create_sequences(scaled_data, LOOK_BACK, PRED_STEPS)
    
    # 模型构建
    predictor = SalinityPredictor(LOOK_BACK, PRED_STEPS)
    model = predictor.build_model()
    model.summary()
    
    # 动态训练
    train_data = predictor.dynamic_split(scaled_data, dates, pd.to_datetime(CUTOFF_DATE))
    X_train, y_train = preprocessor.create_sequences(train_data, LOOK_BACK, PRED_STEPS)
    history = predictor.train(X_train, y_train)
    
    # 预测验证
    cutoff_idx = np.where(dates <= pd.to_datetime(CUTOFF_DATE))[0][-1]
    last_seq = scaled_data[cutoff_idx-LOOK_BACK:cutoff_idx]  # 使用分割点前的数据作为输入
    scaled_pred = predictor.predict(last_seq)
    predictions = preprocessor.scaler.inverse_transform(scaled_pred.reshape(-1, 1))
    
    # 结果可视化
    true_dates = pd.date_range(start=pd.to_datetime(CUTOFF_DATE), periods=PRED_STEPS, freq='h')
    plt.figure(figsize=(15, 6))
    
    # 绘制分割点前的历史数据
    plt.plot(dates[cutoff_idx-PRED_STEPS:cutoff_idx], 
             preprocessor.scaler.inverse_transform(scaled_data[cutoff_idx-PRED_STEPS:cutoff_idx]), 
             'b-', label='历史数据(分割点前)')
    
    # 绘制分割点后的实际数据
    plt.plot(dates[cutoff_idx:cutoff_idx+PRED_STEPS], 
             preprocessor.scaler.inverse_transform(scaled_data[cutoff_idx:cutoff_idx+PRED_STEPS]), 
             'g-', label='实际数据(分割点后)')
    
    # 绘制预测数据
    plt.plot(true_dates, predictions, 'r--', label='预测数据')
    
    # 添加分割线
    plt.axvline(x=pd.to_datetime(CUTOFF_DATE), color='k', linestyle='--', label='分割时间点')
    
    plt.title(f'盐度预测对比({CUTOFF_DATE}后5天)')
    plt.xlabel('时间')
    plt.ylabel('盐度值')
    plt.legend()
    plt.grid(True)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
    
    # 性能指标
    true_values = preprocessor.scaler.inverse_transform(scaled_data[cutoff_idx:cutoff_idx+PRED_STEPS])
    mae = mean_absolute_error(true_values, predictions)
    rmse = np.sqrt(mean_squared_error(true_values, predictions))
    print(f'验证指标 => MAE: {mae:.3f}, RMSE: {rmse:.3f}')
 
# ================================
# 4. 模型保存与加载(可选)
# ================================
# model.save('salinity_predictor.h5') 
# loaded_model = tf.keras.models.load_model('salinity_predictor.h5', custom_objects={'TemporalAttention': TemporalAttention})