• python爬虫系列之爬京东手机数据


    python抓京东手机数据

    作者:vpoet

    mail:vpoet_sir@163.com

     1 #coding=utf-8
     2 
     3 import urllib2
     4 
     5 from lxml import etree
     6 
     7 import re
     8 
     9 
    10 if __name__ == '__main__':
    11     
    12     main_url = """http://search.jd.com/Search?keyword=%%E6%%89%%8B%%E6%%9C%%BA&enc=utf-8&suggest=0#keyword=%%E6%%89%%8B%%E6%%9C%%BA&enc=utf-8&qrst=1&ps=addr&rt=1&stop=1&sttr=1&cid3=655&click=3-                655&psort=3&page=%s"""
    13     
    14     page_num = 1
    15     
    16     for page in range(page_num):
    17         
    18         html_url = main_url % page
    19         
    20         Res = urllib2.urlopen(html_url)
    21         
    22         Htm = Res.read()
    23         
    24         #print Htm
    25 
    26         tree = etree.HTML(Htm);
    27  
    28         #phone_names = tree.xpath("//div[@id='plist']/ul/li/div[@class='lh-wrap']/div[@class='p-name']/a/text()")   
    29         
    30     #x = 1
    31         #for phone_name in phone_names: 
    32             #print phone_name+'	'+str(x)+'
    '
    33 
    34         #x=x+1
    35     
    36  
    37 
    38     #phone_pic_urls = tree.xpath("//div[@class='lh-wrap']/div[@class='p-img']/a/img")
    39 
    40     #for phone_pic_url in phone_pic_urls: 
    41         #print phone_pic_url.values()[3]
    42 
    43     #phone_prices = tree.xpath("//div[@class='p-price']/strong")  
    44     phone_prices = tree.xpath("//*[@id='plist']/ul[@class='list-h clearfix']/li/div/div[@class='p-price']/strong") 
    45         
    46     x = 1
    47 
    48         for phone_price in phone_prices: 
    49             print phone_price.values()[1]+'	'+str(x)+'
    '
    50         x = x + 1
    51 
    52 
    53     #phone_comments = tree.xpath("//div[@class='extra']/a/text()")  
    54         
    55         #for phone_comment in phone_comments: 
    56             #print "评价数"
    57         #comment_num = re.findall(r'.{2}(d+).{3}',phone_comment)
    58         #print comment_num[0]
    59 
    60 
    61     #phone_good_comments = tree.xpath("//div[@class='extra']/span[@class='reputation']/text()")  
    62         
    63         #for phone_good_comment in phone_good_comments: 
    64             #print "好评率"
    65         #comment_good_num = re.findall(r'((d{2})%.{2})',phone_good_comment)
    66         #print comment_good_num[0]
    67 
    68 
    69     print "over"

    这个没写完,先保存在这里。有时间再完成

  • 相关阅读:
    c#索引器介绍|C#索引器写法|c#索引器例子
    原来查询语句还有这功能,累计变量值
    递归导入access数据winform程序源码
    从sqlserver导入access的最简单的方法
    vs2008中文破解|vs2008中文下载|vs2008正版序列号
    游标遍历标的所有字段代码
    asp.net未知的服务器标记错误
    卓越、当当、京东三大广告联盟比较
    Repeater 的嵌套使用与表克隆
    自动上传编辑器中的远程图片与自动替换alt标签
  • 原文地址:https://www.cnblogs.com/vpoet/p/4659586.html
Copyright © 2020-2023  润新知