1
2 # -*- coding: utf-8 -*-
3 import scrapy
4 from itTeachers.items import ItteachersItem
5
6
7 class ItcastSpider(scrapy.Spider):
8 name = 'itcast'
9 allowed_domains = ['itcast.cn']
10 start_urls = ['http://www.itcast.cn/channel/teacher.shtml#']
11
12 def parse(self, response):
13 #with open("teacher.html","w") as f:
14 #f.write(response.body)
15
16 items = []
17
18 teacher_list = response.xpath('//div[@class="li_txt"]')
19 for each in teacher_list:
20
21 #我们将得到的数据封装到一个'ItcastItem'对象
22 item = ItteachersItem()
23 name = each.xpath('h3/text()').extract()
24 title = each.xpath('h4/text()').extract()
25 info = each.xpath('p/text()').extract()
26
27 #xpath返回的是包含一个元素的列表
28 item['name'] = name[0]
29 item['title'] = title[0]
30 item['info'] = info[0]
31
32 items.append(item)
33 #直接返回最后数据
34 return items
~