• python例子-PyQuery抓取信息.


    #!/usr/bin/python
    #coding:utf-8
    
    from pyquery import PyQuery
    import re
    
    # 抓取:http://www.stylebop.com/cn/product_details.php?id=606526&special=sale
    # 获得   产品名 品牌 价格 size  图片(大图)
    def main():
        pqhtml = PyQuery(url = 'http://www.stylebop.com/cn/product_details.php?id=606526&special=sale')
        #产品图片:
        img_li = pqhtml('li').filter('.image_click_rotator')
        pattern_img = re.compile(".*?'(.*?jpg)'.*?'.*?'.*?'.*?'.*?'(.*?jpg)'.*?")
        img_list = []
        for li in img_li:
            #div = li.getchildren()[0]
            #a = div.getchildren()[0]
            href = li.getchildren()[0].getchildren()[0].get('href')
            items = re.findall(pattern_img,href)
            img_large = list(items[0])[1]
            if img_large[0:4] != 'http' :
                img_large = 'http://www.stylebop.com%s' %img_large
            img_list.append(img_large)
        print '产品图片:' , img_list
    
        #产品品牌:
        brand = pqhtml('div').filter('.productInfo')('a:first').text()
        print '品牌:%s' %brand
    
        #价格
        price_div = pqhtml('div').filter('#product_price')  #根据ID获取价格的div
        price_first_span = price_div('span:first') #获取第一个span
        old_price = ''
        new_price = ''
        if price_first_span.hasClass('old_price'):
            old_price = price_first_span.text
            new_price = price_div('span:eq(1)').text() + ' / ' + price_div('span:eq(3)').text()
        else:
            new_price = price_div.text() + ' / ' + price_div('span:first').text
        print '价格:' , new_price
        #print '价格:%s' % new_price #这样打印会报编码错误:'ascii' codec can't encode character u'u20ac' in position 21: ordinal not in range(128)
    
        #size
        size_option = pqhtml('select').filter('.newInput2')('option')
        size_list = []
        for size in size_option:        #为HTMLElement对象
            size_list.append(size.text)
        print 'size:', size_list
    
        #产品名:
        pname = pqhtml('div').filter('.productInfo')('span:first').text()
        print '产品名:%s' % pname
    
    if __name__ == '__main__':
        main()
  • 相关阅读:
    Thread类常用方法
    sql 语句NVL()用法
    SQL极限函数limit()详解<分页必备>
    查询用户上次登录时间问题
    ROWNUM-Oracle中的分页代码
    分组统计查询
    Oracle中的多表查询
    Oracle中的单行函数
    JDBC中的事务-Transaction
    MySql中增加一列
  • 原文地址:https://www.cnblogs.com/xccnblogs/p/4894405.html
Copyright © 2020-2023  润新知