import os import re import logging from bs4 import BeautifulSoup from openpyxl import Workbook from openpyxl.utils import get_column_letter import datetime logging.basicConfig(level=logging.INFO,#控制台打印的日志级别 filename='food.log', filemode='a',##模式,有w和a,w就是写模式,每次都会重新写日志,覆盖之前的日志,#a是追加模式,默认如果不写的话,就是追加模式 format= '%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s' #日志格式 ) def Insert2Excel(allinfo): # 插入数据 try: tableTitle = ['name', 'time', 'score'] wb = Workbook() ws = wb.active ws.title = 'restaurants' ws.append(tableTitle) work_name = 'restaurantsinfo.xlsx' for i in range(1, ws.max_column + 1): ws.column_dimensions[get_column_letter(i)].width = 15 for info in allinfo : ws.append(info) wb.save(work_name) return 'Insert Excel succcessfully!' except: return 'Insert Excel failed!' if __name__ == '__main__': start = datetime.datetime.now().replace(microsecond=0) print('Start: ', start) path = os.getcwd() allinfo = [] # url = 'https://food.grab.com/sg/en/restaurants' ######################### test ################## with open('food.txt', 'rb') as f: # 设置文件对象 html = f.read() # 可以是随便对文件的操作 soup = BeautifulSoup(html, 'html.parser') tag = soup.find('div', attrs={'class': 'ant-row-flex RestaurantListRow___1SbZY'}) print(len(tag)) for restaurant in tag: resinfo = [] name = restaurant.find('h6', attrs={'class': 'name___2epcT'}).get_text() resinfo.append(name) lst = restaurant.find_all('div', attrs={'class': 'numbersChild___2qKMV'}) if len(lst) == 2: score = lst[0].get_text() time = re.findall("d+",lst[1].get_text())[0] else: score = '0' aa = re.findall("d+",lst[0].get_text()) time = aa[0] resinfo.append(time) resinfo.append(score) allinfo.append(resinfo) print(Insert2Excel(allinfo)) end = datetime.datetime.now().replace(microsecond=0) print('End:', end) print('Running time: %s Seconds' % (end - start))