#!/usr/bin/env python import pandas as pd import os import threading import time import csv def dowork(df_only_name): for index in df_only_name.index: print(df_only_name[index]) split_file = minute_pathdir_split + "\" + str(df_only_name[index][0:2]) + "\" + df_only_name[index]+".csv" #print(split_file) pd_min_add = pd_min[pd_min["a_name"] == df_only_name[index]] if not os.path.isfile(split_file): pd_min_add.to_csv(split_file,sep=",",encoding="utf-8", index=False,columns=column_list) else: #追加模式: pd_min_add = pd_min_add[column_list] with open(split_file, "a+", newline='') as file1: # 处理csv读写时不同换行符 linux: windows: mac: csv_file = csv.writer(file1) datas = pd_min_add.values csv_file.writerows(datas) #非追加模式 #old_pd = pd.read_csv(split_file) #old_pd = old_pd.append(pd_min_add) #old_pd = old_pd.drop_duplicates(subset="date",keep='last', inplace=False) #old_pd.to_csv(split_file,sep=",",encoding="utf-8", index=False,columns=column_list) def run(df_only_name1, semaphore): semaphore.acquire() #加锁 dowork(df_only_name1) semaphore.release() #释放 if __name__ == '__main__': minute_pathdir = "G:\datas of status\python codes\everyday_data\n202008" minute_pathdir_add = "G:\datas of status\python codes\everyday_data\n202008add" minute_pathdir_split = "G:\datas of status\python codes\everyday_data\split-n202008\lx" column_list = ["a_name","date","amount_t","f_da","f_te_da","f_xiao","f_zhong","re_min_L2","time_l2","vol_t","z_da","z_te_da","z_xiao","z_zhong","925.1","925.2","925.3","930.1","930.2","930.3","931.1","931.2","931.3","932.1","932.2","932.3","933.1","933.2","933.3","934.1","934.2","934.3","935.1","935.2","935.3","936.1","936.2","936.3","937.1","937.2","937.3","938.1","938.2","938.3","939.1","939.2","939.3","940.1","940.2","940.3","941.1","941.2","941.3","942.1","942.2","942.3","943.1","943.2","943.3","944.1","944.2","944.3","945.1","945.2","945.3","946.1","946.2","946.3","947.1","947.2","947.3","948.1","948.2","948.3","949.1","949.2","949.3","950.1","950.2","950.3","951.1","951.2","951.3","952.1","952.2","952.3","953.1","953.2","953.3","954.1","954.2","954.3","955.1","955.2","955.3","956.1","956.2","956.3","957.1","957.2","957.3","958.1","958.2","958.3","959.1","959.2","959.3","1000.1","1000.2","1000.3","1001.1","1001.2","1001.3","1002.1","1002.2","1002.3","1003.1","1003.2","1003.3","1004.1","1004.2","1004.3","1005.1","1005.2","1005.3","1006.1","1006.2","1006.3","1007.1","1007.2","1007.3","1008.1","1008.2","1008.3","1009.1","1009.2","1009.3","1010.1","1010.2","1010.3","1011.1","1011.2","1011.3","1012.1","1012.2","1012.3","1013.1","1013.2","1013.3","1014.1","1014.2","1014.3","1015.1","1015.2","1015.3","1016.1","1016.2","1016.3","1017.1","1017.2","1017.3","1018.1","1018.2","1018.3","1019.1","1019.2","1019.3","1020.1","1020.2","1020.3","1021.1","1021.2","1021.3","1022.1","1022.2","1022.3","1023.1","1023.2","1023.3","1024.1","1024.2","1024.3","1025.1","1025.2","1025.3","1026.1","1026.2","1026.3","1027.1","1027.2","1027.3","1028.1","1028.2","1028.3","1029.1","1029.2","1029.3","1030.1","1030.2","1030.3","1031.1","1031.2","1031.3","1032.1","1032.2","1032.3","1033.1","1033.2","1033.3","1034.1","1034.2","1034.3","1035.1","1035.2","1035.3","1036.1","1036.2","1036.3","1037.1","1037.2","1037.3","1038.1","1038.2","1038.3","1039.1","1039.2","1039.3","1040.1","1040.2","1040.3","1041.1","1041.2","1041.3","1042.1","1042.2","1042.3","1043.1","1043.2","1043.3","1044.1","1044.2","1044.3","1045.1","1045.2","1045.3","1046.1","1046.2","1046.3","1047.1","1047.2","1047.3","1048.1","1048.2","1048.3","1049.1","1049.2","1049.3","1050.1","1050.2","1050.3","1051.1","1051.2","1051.3","1052.1","1052.2","1052.3","1053.1","1053.2","1053.3","1054.1","1054.2","1054.3","1055.1","1055.2","1055.3","1056.1","1056.2","1056.3","1057.1","1057.2","1057.3","1058.1","1058.2","1058.3","1059.1","1059.2","1059.3","1100.1","1100.2","1100.3","1101.1","1101.2","1101.3","1102.1","1102.2","1102.3","1103.1","1103.2","1103.3","1104.1","1104.2","1104.3","1105.1","1105.2","1105.3","1106.1","1106.2","1106.3","1107.1","1107.2","1107.3","1108.1","1108.2","1108.3","1109.1","1109.2","1109.3","1110.1","1110.2","1110.3","1111.1","1111.2","1111.3","1112.1","1112.2","1112.3","1113.1","1113.2","1113.3","1114.1","1114.2","1114.3","1115.1","1115.2","1115.3","1116.1","1116.2","1116.3","1117.1","1117.2","1117.3","1118.1","1118.2","1118.3","1119.1","1119.2","1119.3","1120.1","1120.2","1120.3","1121.1","1121.2","1121.3","1122.1","1122.2","1122.3","1123.1","1123.2","1123.3","1124.1","1124.2","1124.3","1125.1","1125.2","1125.3","1126.1","1126.2","1126.3","1127.1","1127.2","1127.3","1128.1","1128.2","1128.3","1129.1","1129.2","1129.3","1300.1","1300.2","1300.3","1301.1","1301.2","1301.3","1302.1","1302.2","1302.3","1303.1","1303.2","1303.3","1304.1","1304.2","1304.3","1305.1","1305.2","1305.3","1306.1","1306.2","1306.3","1307.1","1307.2","1307.3","1308.1","1308.2","1308.3","1309.1","1309.2","1309.3","1310.1","1310.2","1310.3","1311.1","1311.2","1311.3","1312.1","1312.2","1312.3","1313.1","1313.2","1313.3","1314.1","1314.2","1314.3","1315.1","1315.2","1315.3","1316.1","1316.2","1316.3","1317.1","1317.2","1317.3","1318.1","1318.2","1318.3","1319.1","1319.2","1319.3","1320.1","1320.2","1320.3","1321.1","1321.2","1321.3","1322.1","1322.2","1322.3","1323.1","1323.2","1323.3","1324.1","1324.2","1324.3","1325.1","1325.2","1325.3","1326.1","1326.2","1326.3","1327.1","1327.2","1327.3","1328.1","1328.2","1328.3","1329.1","1329.2","1329.3","1330.1","1330.2","1330.3","1331.1","1331.2","1331.3","1332.1","1332.2","1332.3","1333.1","1333.2","1333.3","1334.1","1334.2","1334.3","1335.1","1335.2","1335.3","1336.1","1336.2","1336.3","1337.1","1337.2","1337.3","1338.1","1338.2","1338.3","1339.1","1339.2","1339.3","1340.1","1340.2","1340.3","1341.1","1341.2","1341.3","1342.1","1342.2","1342.3","1343.1","1343.2","1343.3","1344.1","1344.2","1344.3","1345.1","1345.2","1345.3","1346.1","1346.2","1346.3","1347.1","1347.2","1347.3","1348.1","1348.2","1348.3","1349.1","1349.2","1349.3","1350.1","1350.2","1350.3","1351.1","1351.2","1351.3","1352.1","1352.2","1352.3","1353.1","1353.2","1353.3","1354.1","1354.2","1354.3","1355.1","1355.2","1355.3","1356.1","1356.2","1356.3","1357.1","1357.2","1357.3","1358.1","1358.2","1358.3","1359.1","1359.2","1359.3","1400.1","1400.2","1400.3","1401.1","1401.2","1401.3","1402.1","1402.2","1402.3","1403.1","1403.2","1403.3","1404.1","1404.2","1404.3","1405.1","1405.2","1405.3","1406.1","1406.2","1406.3","1407.1","1407.2","1407.3","1408.1","1408.2","1408.3","1409.1","1409.2","1409.3","1410.1","1410.2","1410.3","1411.1","1411.2","1411.3","1412.1","1412.2","1412.3","1413.1","1413.2","1413.3","1414.1","1414.2","1414.3","1415.1","1415.2","1415.3","1416.1","1416.2","1416.3","1417.1","1417.2","1417.3","1418.1","1418.2","1418.3","1419.1","1419.2","1419.3","1420.1","1420.2","1420.3","1421.1","1421.2","1421.3","1422.1","1422.2","1422.3","1423.1","1423.2","1423.3","1424.1","1424.2","1424.3","1425.1","1425.2","1425.3","1426.1","1426.2","1426.3","1427.1","1427.2","1427.3","1428.1","1428.2","1428.3","1429.1","1429.2","1429.3","1430.1","1430.2","1430.3","1431.1","1431.2","1431.3","1432.1","1432.2","1432.3","1433.1","1433.2","1433.3","1434.1","1434.2","1434.3","1435.1","1435.2","1435.3","1436.1","1436.2","1436.3","1437.1","1437.2","1437.3","1438.1","1438.2","1438.3","1439.1","1439.2","1439.3","1440.1","1440.2","1440.3","1441.1","1441.2","1441.3","1442.1","1442.2","1442.3","1443.1","1443.2","1443.3","1444.1","1444.2","1444.3","1445.1","1445.2","1445.3","1446.1","1446.2","1446.3","1447.1","1447.2","1447.3","1448.1","1448.2","1448.3","1449.1","1449.2","1449.3","1450.1","1450.2","1450.3","1451.1","1451.2","1451.3","1452.1","1452.2","1452.3","1453.1","1453.2","1453.3","1454.1","1454.2","1454.3","1455.1","1455.2","1455.3","1456.1","1456.2","1456.3","1500.1","1500.2","1500.3"] newfiles = os.listdir(minute_pathdir_add) pd_min = pd.DataFrame(columns=column_list) for file in newfiles: file = minute_pathdir_add + "\" +file if os.path.isfile(file): print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) print(file) pd_min = pd_min.append(pd.read_csv(file)) print(len(pd_min)) df_only_name = pd_min["a_name"] df_only_name = df_only_name.drop_duplicates(keep='last', inplace=False) df_only_name = df_only_name.reset_index(drop=True) all_nums = len(df_only_name) #all_nums = 20 every_batch = 50 epochs = int(all_nums / every_batch) print(epochs) num_of_thread = 30 # num = 1 semaphore = threading.BoundedSemaphore(num_of_thread) # 最多允许5个线程同时运行 print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) for i in range(epochs+1): begin = i*every_batch end = begin +every_batch if all_nums<=end: end = all_nums df_only_name1 = df_only_name[begin:end] t = threading.Thread(target=run, args=(df_only_name1, semaphore)) t.start() time.sleep(2) #print(i) print(i,begin,end) print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) while threading.active_count() != 1: print(threading.active_count()) time.sleep(20) pass # print threading.active_count() else: print('-----all threads done-----') print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))