# Project: Local
# Create time: 2019-12-30 19:13
import pandas as pd
def read_excel(excel_path):
excel_data = pd.read_excel(excel_path, encoding='gbk', header=None, names=['data_str'])
print(excel_data)
excel_data['data_str'] = excel_data['data_str'].apply(lambda x: modify_data_str(x))
excel_data['data_length'] = excel_data['data_str'].apply(lambda x: get_data_str_length(x))
return excel_data
def modify_data_str(data_str):
data_str = data_str.replace('"', '').replace('”', '').replace('“', '').replace(',', ',').replace('[', '').replace(
']', '').replace(':', ':')
return data_str
def split_data_str(data_str):
data_list = []
column_name, row_values = data_str.split(':')
column_name, row_values = column_name.strip(), row_values.strip()
row_value_list = [i.strip() for i in row_values.split(',')]
data_list.append(column_name)
data_list.extend(row_value_list)
return data_list
def get_data_str_length(data_str):
data_length = data_str.count(',') + 1
return data_length
def classify_sql_data(excel_data):
data_length_list = excel_data['data_length'].unique()
for data_length in data_length_list:
sql_data = excel_data[excel_data['data_length'] == data_length]
output_sql_data = pd.DataFrame(index=range(data_length))
for data_str in sql_data['data_str'].values:
column_name, *values = split_data_str(data_str)
output_sql_data[column_name] = values
yield output_sql_data
excel_path = r'D:Filespycharm_projsLocalxgxsql_testdata.xlsx'
excel_data = read_excel(excel_path)
for i in classify_sql_data(excel_data):
print(i)
# sql_str = r'"pppp":["苏州",“杭州”,“无锡”]'
# sql_str = sql_str.replace('”', '"').replace('“', '"')
# import re
#
# sql_str = re.findall('"(.+?)"', sql_str)
# sql_str = [i.strip() for i in sql_str]
# print(sql_str)
# if __name__ == '__main__':
# def read_excel(excel_path):
# """
# 解析excel
# """
# pass
# return
#
#
# def classify_excel(excel_data, length):
# pass
# return excel_data
#
#
# def parse_excel(excel_data):
# pass
# return excel_data