import os import pandas as pd from sklearn import linear_model path = r'D:新数据每日收益率' filenames = os.listdir(path) for filename in filenames: print(filename) for i in filenames: excel_path = 'D:新数据每日收益率\' + i f = open(excel_path,'rb') data = pd.read_excel(f) #到此处已是循环读取某文件夹下所有excel文件,下面是在循环中对读进来的文件进行统一的重复的一致的处理 data['time'] = data.index data = data.reset_index(drop = True) data1 = data.iloc[0:110,]#估计窗口的真实收益率 data2 = data.iloc[110:,]#事件窗口的真实收益率 feature = data.columns.tolist() feature.remove('time') feature.remove('000300')#沪深300指数 dfR = pd.DataFrame(data2['time']) dfAR = pd.DataFrame(data2['time']) for m in feature: regr=linear_model.LinearRegression() regr.fit(data1['000300'].values.reshape(-1, 1),data1[m].values.reshape(-1, 1)) y_pred1 = regr.predict(data2['000300'].values.reshape(-1, 1))#事件窗口的预期收益率 AR = data2[m].values.reshape(-1, 1)-y_pred1#真实收益率-预期收益率=超额收益率(事件窗口) dfR[m] = y_pred1#预期收益率 dfAR[m] = AR #超额收益率 save_path1 = 'D:新数据日预期收益率\' + i save_path2 = 'D:新数据日超额收益率\' + i dfR.to_excel(save_path1,index=False) dfAR.to_excel(save_path2,index=False)