本篇仅为教育目的,教大家写一个有意思的深度学习算法和应用。真正实现可工业化的预测股票和加密货币市场的 AI 模型还需要太多的数据和更好的模型。
导入 dependencies
import pandas as pd
import numpy as np
from numpy import newaxis
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
通过 Quandl 获取以太坊历史价格
import quandl
eth_data = quandl.get("BITFINEX/ETHUSD"); eth_data.head()
eth_price = eth_data['Last']
eth_price.plot()
eth_price.to_csv('eth_price.csv',header = None, index=None)
导入深度学习集成框架 Keras
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
训练窗口期标准化
def normalise_windows(window_data):
normalised_data = []
for window in window_data:
normalised_window = [((float(p) / float(window[0])) - 1) for p in window]
normalised_data.append(normalised_window)
return normalised_data
导入以太价格数据,并在切分窗口后,选取训练组和测试组
def load_data(filename, seq_len, normalise_window):
with open(filename, 'r') as f:
data = f.read().split('\n')
sequence_length = seq_len + 1
result = []
for index in range(len(data) - sequence_length):
result.append(data[index:index+sequence_length])
if normalise_window:
result = normalise_windows(result)
result = np.array(result)
row = round(0.8 * result.shape[0])
train = result[:int(row),:]
np.random.shuffle(train)
x_train = train[:,:-1]
y_train = train[:,-1]
x_test = result[-150:,:-1]
y_test = result[-150:,-1]
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))
return [x_train, y_train, x_test, y_test]
X_train, y_train, X_test, y_test = load_data('eth_price.csv', 50, True)
X_train.shape
(658, 50, 1)
建立深度学习模型
model = Sequential()
model.add(LSTM(
input_dim = 1,
output_dim = 50,
return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(
output_dim = 100,
return_sequences = False))
model.add(Dropout(0.2))
model.add(Dense(
output_dim = 1))
model.add(Activation('linear'))
训练模型
model.fit(X_train, y_train, nb_epoch = 10, validation_split = 0.05)
预测结果
根据选定区预测下一天的价格
def predict_point_by_point(model, data):
predicted = model.predict(data)
predicted = np.reshape(predicted, (predicted.size,))
return predicted
predictions1 = predict_point_by_point(model, X_test)
def plot_results_single(predicted_data, true_data):
fig = plt.figure(facecolor='white')
ax = fig.add_subplot(111)
ax.plot(true_data, label='True Data')
plt.plot(predicted_data, label='Prediction')
plt.legend()
plt.show()
plot_results_single(predictions1, y_test)
根据选定区预测整个测试数据的价格
def predict_sequence_full(model, data, window_size):
curr_frame = data[0]
predicted = []
for i in xrange(len(data)):
predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
curr_frame = curr_frame[1:]
curr_frame = np.insert(curr_frame, [window_size-1], predicted[-1], axis=0)
return predicted
predictions2 = predict_sequence_full(model, X_test, 50)
plot_results_single(predictions2, y_test)
根据每个所选区预测下之后20天的价格
def predict_sequences_multiple(model, data, window_size, prediction_len):
prediction_seqs = []
for i in xrange(len(data)/prediction_len):
curr_frame = data[i*prediction_len]
predicted = []
for j in xrange(prediction_len):
predicted.append(model.predict(curr_frame[newaxis,:,:])[0,0])
curr_frame = curr_frame[1:]
curr_frame = np.insert(curr_frame,[window_size - 1], predicted[-1], axis = 0)
prediction_seqs.append(predicted)
return prediction_seqs
predictions3 = predict_sequences_multiple(model, X_test, 50, 20)
def plot_results_multiple(predicted_data, true_data, prediction_len):
fig = plt.figure(facecolor='white')
ax = fig.add_subplot(111)
ax.plot(true_data, label='True Data')
for i, data in enumerate(predicted_data):
padding = [None for p in xrange(i * prediction_len)]
plt.plot(padding + data, label='Prediction')
plt.show()
plot_results_multiple(predictions3,y_test, 20)