简单的基于LSTM的黄金价格预测(Python)

avatar
作者
猴君
阅读量:0
import numpy as np import pandas as pd import matplotlib.pyplot as plt import plotly.express as px from sklearn.preprocessing import MinMaxScaler from sklearn.model_selection import train_test_split from sklearn.metrics import mean_absolute_percentage_error import tensorflow as tf from keras import Model from keras.layers import Input, Dense, Dropout, LSTM df = pd.read_csv('Gold Price (2013-2023).csv' ) df

df.info() <class 'pandas.core.frame.DataFrame'> RangeIndex: 2583 entries, 0 to 2582 Data columns (total 7 columns):  #   Column    Non-Null Count  Dtype  ---  ------    --------------  -----   0   Date      2583 non-null   object  1   Price     2583 non-null   object  2   Open      2583 non-null   object  3   High      2583 non-null   object  4   Low       2583 non-null   object  5   Vol.      2578 non-null   object  6   Change %  2583 non-null   object dtypes: object(7) memory usage: 141.4+ KB df.drop(['Vol.', 'Change %'], axis=1, inplace=True) df

# Convert the 'Date' column to datetime df['Date'] = pd.to_datetime(df['Date'])   # Sort the DataFrame by the 'Date' column in ascending order df.sort_values(by='Date', ascending=True, inplace=True)   # Reset the index of the DataFrame df.reset_index(drop=True, inplace=True) numCols = df.columns.drop('Date') df[numCols] = df[numCols].replace({',': ''}, regex=True) df[numCols] = df[numCols].astype('float64') df.head() df.duplicated().sum() df.isnull().sum() Date     0 Price    0 Open     0 High     0 Low      0 dtype: int64 import plotly.express as px   fig = px.line(y=df['Price'], x=df['Date']) fig.update_traces(line_color='black') fig.update_layout(     xaxis_title='Date',     yaxis_title='Price',     title={         'text': 'Gold Price Data',         'y': 0.95,         'x': 0.5,         'xanchor': 'center',         'yanchor': 'top'     },     plot_bgcolor='rgba(255,223,0,0.9)' ) fig.show() test_size = df[df.Date.dt.year == 2022].shape[0] print(test_size)

260

import matplotlib.pyplot as plt   plt.figure(figsize=(15, 6), dpi=150) plt.rcParams['axes.facecolor'] = 'cyan' plt.rc('axes', edgecolor='white')   plt.plot(df.Date[:-test_size], df.Price[:-test_size], color='black', lw=2) plt.plot(df.Date[-test_size:], df.Price[-test_size:], color='red', lw=2)   plt.title('Gold Price Train and Test', fontsize=15) plt.xlabel('Date', fontsize=12) plt.ylabel('Price', fontsize=12) plt.legend(['Train Set', 'Test Set'], loc='upper left', prop={'size': 15}) plt.grid(color='white')   plt.show()

scaler = MinMaxScaler() scaler.fit(df.Price.values.reshape(-1, 1)) MinMaxScaler() window_size = 60 train_data = df.Price[:-test_size] train_data = scaler.fit_transform(train_data.values.reshape(-1, 1)) window_size = 60 X_train = [] y_train = []   for i in range(window_size, len(train_data)):     X_train.append(train_data[i-window_size:i, 0])     y_train.append(train_data[i, 0]) test_data = df.Price[-test_size-window_size:] test_data = scaler.transform(test_data.values.reshape(-1, 1)) X_test = [] y_test = []   for i in range(window_size, len(test_data)):     X_test.append(test_data[i-window_size:i, 0])     y_test.append(test_data[i, 0]) X_train = np.array(X_train) X_test = np.array(X_test) y_train = np.array(y_train) y_test = np.array(y_test)   X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) y_train = np.reshape(y_train, (-1, 1)) y_test = np.reshape(y_test, (-1, 1)) print('X_train shape:', X_train.shape) print('y_train shape:', y_train.shape) print('X_test shape:', X_test.shape) print('y_test shape:', y_test.shape) X_train shape: (2263, 60, 1) y_train shape: (2263, 1) X_test shape: (260, 60, 1) y_test shape: (260, 1) import tensorflow as tf   def define_model():     input1 = Input(shape=(window_size, 1))     x = tf.keras.layers.LSTM(units=64, return_sequences=True)(input1)     x = tf.keras.layers.Dropout(0.2)(x)     x = tf.keras.layers.LSTM(units=64, return_sequences=True)(x)     x = tf.keras.layers.Dropout(0.2)(x)     x = tf.keras.layers.LSTM(units=64)(x)     x = tf.keras.layers.Dropout(0.2)(x)     x = tf.keras.layers.Dense(32, activation='softmax')(x)     dnn_output = tf.keras.layers.Dense(1)(x)     model = tf.keras.models.Model(inputs=input1, outputs=dnn_output)     # Import and use the Nadam optimizer     model.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Nadam())     model.summary()     return model model = define_model()   history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_split=0.1, verbose=1) Model: "model_3" _________________________________________________________________  Layer (type)                Output Shape              Param #    =================================================================  input_4 (InputLayer)        [(None, 60, 1)]           0                                                                             lstm_9 (LSTM)               (None, 60, 64)            16896                                                                         dropout_9 (Dropout)         (None, 60, 64)            0                                                                             lstm_10 (LSTM)              (None, 60, 64)            33024                                                                         dropout_10 (Dropout)        (None, 60, 64)            0                                                                             lstm_11 (LSTM)              (None, 64)                33024                                                                         dropout_11 (Dropout)        (None, 64)                0                                                                             dense_6 (Dense)             (None, 32)                2080                                                                          dense_7 (Dense)             (None, 1)                 33                                                                           ================================================================= Total params: 85057 (332.25 KB) Trainable params: 85057 (332.25 KB) Non-trainable params: 0 (0.00 Byte) _________________________________________________________________ result = model.evaluate(X_test, y_test)   y_pred = model.predict(X_test) MAPE = mean_absolute_percentage_error(y_test, y_pred) Accuracy = 1 - MAPE print('Test Loss:', result) print('Test MAPE:', MAPE) print('Test Accuracy:', Accuracy) Test Loss: 0.0008509838371537626 Test MAPE: 0.0319030650799213 Test Accuracy: 0.9680969349200788 y_test_true = scaler.inverse_transform(y_test.reshape(-1, 1)).flatten() y_test_pred = scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten() plt.figure(figsize=(15, 6), dpi=150) plt.rcParams['axes.facecolor'] = 'cyan' plt.rc('axes', edgecolor='white')   plt.plot(df.Date[:-test_size], df.Price[:-test_size], color='black', lw=2) plt.plot(df.Date[-test_size:], df.Price[-test_size:], color='red', lw=2)   plt.title('Gold Price Train and Test', fontsize=15) plt.xlabel('Date', fontsize=12) plt.ylabel('Price', fontsize=12) plt.legend(['Train Set', 'Test Set'], loc='upper left', prop={'size': 15}) plt.grid(color='white')   plt.show()

知乎学术咨询:https://www.zhihu.com/consult/people/792359672131756032?isMe=1

担任《Mechanical System and Signal Processing》审稿专家,担任《中国电机工程学报》,《控制与决策》等EI期刊审稿专家,擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。

广告一刻

为您即时展示最新活动产品广告消息,让您随时掌握产品活动新动态!