• 标题和价格的爬取,主要是遍历的方法



    import re
    import urllib2

    def get_html():
    html = urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
    r = r'target="_blank">&yen; <strong>(.*)</strong>'
    reg = re.compile(r)
    text = re.findall(reg,html)
    return text

    def get_html1():
    html = urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
    r = r' target="_blank" class="title" title="(.*)">(.*)</a>'
    reg = re.compile(r)
    text = re.findall(reg,html)
    return text

    dict = {}
    price = get_html()
    title = get_html1()
    list = []
    for k in title:
    list.append(k[-1])

    for i in range(len(price)):
    for j in range(len(list)):
    if i==j:
    dict[price[i]]=list[j]
    fd=open('aaa.txt','a')
    for k,v in dict.items():
    fd.write(k+' '+v+' ')
    fd.close()


    # - *- coding:utf-8 -*-
    import urllib2
    import re

    def geturl():
    html=urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
    reg=re.compile(r' target="_blank" class="title" title="(.*?)">.*?</a>')
    title=re.findall(reg,html)
    return title
    def geturl1():
    html=urllib2.urlopen("http://uland.taobao.com/sem/tbsearch").read()
    reg=re.compile(r' <strong>(.*?)</strong>')
    title1=re.findall(reg,html)
    return title1
    fd=open('baobiao.txt','wb')
    for(i,j) in zip(geturl(),geturl1()):

    fd.write(i+' '+ j + ' ')
    fd.close()
  • 相关阅读:
    Acdream 1174 Sum 暴力
    Acdream 1114 Number theory 莫比乌斯反演
    Acdream 1007 快速幂,模乘法
    UVa 10023
    UVa 11027
    UVa 11029
    UVa 10820
    UVa 10791
    UVa 11121
    UVa 106
  • 原文地址:https://www.cnblogs.com/ZHANG576433951/p/6090038.html
Copyright © 2020-2023  润新知