平稳性
from numpy import cumsum , log, polyfit, sqrt, std, subtract
from numpy.random import randn
def hurst(ts):
lags = range(2, 100)
tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags]
poly = polyfit(log(lags), log(tau), 1)
return poly[0]*2.0
gdm = log(cumsum(randn(100000))+1000) #构造的布朗运动
mr = log(randn(100000)+1000) #构造的均值回复
tr = log(cumsum(randn(100000)+1)+1000) #构造趋势的时间序列
print("hurst(GBM): %s" % hurst(gdm))
print("hurst(MR): %s" % hurst(mr))
print("hurst(TR): %s" % hurst(tr))
print("hurst(AMZN): %s" % hurst(amzn['AdjClose'])) # 股票价格的随机游走
协整-配对交易
对于单个股票来讲,平均回复交易策略几乎失效没用,因此平均回复交易策略应用于投资组合
def plot_price_series(df, tsl, ts2):
months = mdates.MonthLocator()
fig, ax = plt.subplots()
ax.plot(df.index, df[ts1], label=ts1)
ax.plot(df.index, df[ts2], label=ts2)
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_formatter(mdate.DateFormatter('%b %Y'))
ax.set_xlim(datetime.datetime(2012,1,1),datetime.datetime(2013,1,1))
ax.grid(True)
fig.autofmt.xdate()
plt.xlabel('Month/Year')
plt.ylabel('Price($)')
plt.title('%s and %s Daily Prices' % (ts1, ts2))
plt.legend()
plt.show()
def plot_scatter_series(df, ts1, ts2):
plt.xlabel('%s Price($)' % ts1)
plt.ylabel('%s Price($)' % ts2)
plt.title('%s and %s Price Scatterplot' % (ts1, ts2))
plt.scatter(df[ts1],df[ts2])
plt.show()
def plot_residuals(df):
months = mdate,MonthLocator()
fig, ax = plt.subplots()
ax.plot(df.index, df['res'], label='Residuals')
ax.xaxis.set_major_locator(months)
ax.xaxis.set_major_foematter(mdates.DateFormatter('%b %Y'))
ax.set_xlim(datetime.datetime(2012,1,1),datetime.datetime(2013,1,1))
ax.grid(True)
fig.autofmt_xdate()
plt.xlabel('Month/Year')
plt.ylabel('Price($)')
plt.title('Residual Plot')
plt.legend()
plt.plot(df['res'])
plt.show()
if __name__ == "__main__":
start = datetime.datetime(2012,1,1)
end = datetime.datetime(2013,1,1)
arex = web.DataReader("AREX","quandl", start,end).sort_index()
wll = web.DataReader("WLL", "quandl", start,end),sort_index()
df =pd.DataReader(index=arex.index)
df["AREX"] = arex["AdjClose"]
df["WLL"] = wll["AdjClose"]
plot_price_series(df,"AREX","WLL")
plot_scatter_series(df,"AREX","WLL")
y=df['WLL']
x=df['AREX']
res = sm.OLS(y,x).fit()
beta_hr = res.params.AREX
df["res"] = df['WLL'] - beta_hr*df["AREX"]
plot_residuals(df)
cadf = ts.adfuller(df["res"])
pprint.pprint(cadf)