• python 爬虫数据存入csv格式方法


    python 爬虫数据存入csv格式方法

    命令存储方式:
    scrapy crawl ju -o ju.csv

    第一种方法:
    with open("F:/book_top250.csv","w") as f:
    f.write("{},{},{},{},{} ".format(book_name ,rating, rating_num,comment, book_link))
    复制代码


    第二种方法:
    with open("F:/book_top250.csv","w",newline="") as f: ##如果不添加newline="",爬取信息会隔行显示
    w = csv.writer(f)
    w.writerow([book_name ,rating, rating_num,comment, book_link])
    复制代码


    方法一的代码:
    import requests
    from lxml import etree
    import time

    urls = ['https://book.douban.com/top250?start={}'.format(i * 25) for i in range(10)]
    with open("F:/book_top250.csv","w") as f:
    for url in urls:
    r = requests.get(url)
    selector = etree.HTML(r.text)

    books = selector.xpath('//*[@id="content"]/div/div[1]/div/table/tr/td[2]')
    for book in books:
    book_name = book.xpath('./div[1]/a/@title')[0]
    rating = book.xpath('./div[2]/span[2]/text()')[0]
    rating_num = book.xpath('./div[2]/span[3]/text()')[0].strip('() ') #去除包含"(",")"," "," "的首尾字符
    try:
    comment = book.xpath('./p[2]/span/text()')[0]
    except:
    comment = ""
    book_link = book.xpath('./div[1]/a/@href')[0]
    f.write("{},{},{},{},{} ".format(book_name ,rating, rating_num,comment, book_link))

    time.sleep(1)
    复制代码


    方法二的代码:
    import requests
    from lxml import etree
    import time
    import csv

    urls = ['https://book.douban.com/top250?start={}'.format(i * 25) for i in range(10)]
    with open("F:/book_top250.csv","w",newline='') as f:
    for url in urls:
    r = requests.get(url)
    selector = etree.HTML(r.text)

    books = selector.xpath('//*[@id="content"]/div/div[1]/div/table/tr/td[2]')
    for book in books:
    book_name = book.xpath('./div[1]/a/@title')[0]
    rating = book.xpath('./div[2]/span[2]/text()')[0]
    rating_num = book.xpath('./div[2]/span[3]/text()')[0].strip('() ') #去除包含"(",")"," "," "的首尾字符
    try:
    comment = book.xpath('./p[2]/span/text()')[0]
    except:
    comment = ""
    book_link = book.xpath('./div[1]/a/@href')[0]

    w = csv.writer(f)
    w.writerow([book_name ,rating, rating_num,comment, book_link])
    time.sleep(1)

  • 相关阅读:
    一步步打造QQ群发消息群发器
    正确理解IEnumerable和IQueryable两接口的区别
    分享破解公众号裂变涨粉工具、吸粉方案。
    快40岁了,我还要不要继续写代码呢?
    精准营销、批量提取QQ群成员号码
    分享一个公众号h5裂变吸粉源码工具
    C#(.NET) HMAC SHA256实现
    mybatis的<if>标签,<foreach>标签,<collection>标签,<association>标签以及useGeneratedKeys用法
    springBoot解决跨域问题
    springBoot实现文件上传与下载
  • 原文地址:https://www.cnblogs.com/duanlinxiao/p/9820685.html
Copyright © 2020-2023  润新知