• 123


    # -*- coding: utf-8 -*-
    # @Author  : xuchunlin
    # @Time    : 2020/7/20 10:41
    # @License : Copyright(C),Drcnet

    # from common.contest import *
    from selenium import webdriver
    import time
    from selenium.webdriver import ChromeOptions
    from selenium.webdriver.common.keys import Keys
    # from common.contest import logger
    driver = webdriver.Firefox()
    driver.get('http://www.customs.gov.cn/customs/302249/302274/302277/index.html')
    # driver.get('http://www.customs.gov.cn/customs/302249/302274/302277/302276/310398/index.html')

    time.sleep(20)
    result = driver.page_source
    # print(result)
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(result,'html.parser')
    result_mainBox = soup.select('div#mainBox')[0]

    import re

    def replace_br(newline):
    """
    # 用正则过滤掉网页的注释并替换掉掉换行符
    :param newline: 传入一个字符串,过滤掉网页并且换行
    :return:
    """
    newline = str(newline)
    newline = newline.replace(' ','').replace(' ','').replace(' ','').replace(' ','').replace(' ','').replace(' ','').replace('amp;','').replace('<br/>','').replace('<br>','')
    re_comment = re.compile('<!--[^>]*-->')
    newlines = re_comment.sub('', newline)
    newlines = newlines.replace('<!--','').replace('-->','')
    return newlines

    result_mainBox_replace = replace_br(result_mainBox)

    driver.close()

    result_list = re.findall('<tbody><tr align="center" height="25"(.*?)</tr></tbody>',str(result_mainBox_replace))
    print(len(result_list))

    for item in result_list:
    if 'TEXT-INDENT: 5px' in str(item):
    print(1111111111)
    print(str(item).replace(" ",'').replace(" ",'').replace(" ",''))


    time.sleep(2222)

  • 相关阅读:
    PHP数组(数组正则表达式、数组、预定义数组)
    面向对象。OOP三大特征:封装,继承,多态。 这个讲的是【封存】
    uvalive 3938 "Ray, Pass me the dishes!" 线段树 区间合并
    LA4329 Ping pong 树状数组
    HDU 1257 最少拦截系统
    HDU 1260 Tickets
    codeforce 621D
    codeforce 621C Wet Shark and Flowers
    codeforce 621B Wet Shark and Bishops
    codeforce 621A Wet Shark and Odd and Even
  • 原文地址:https://www.cnblogs.com/xuchunlin/p/13344655.html
Copyright © 2020-2023  润新知