通过生成器不断爬取网页(非闭包)
#!/bin/python3
# -*- coding:utf-8 -*-
from urllib.request import urlopen
def get():
print("start crawing...")
while True: #进入循环不断获取url
url = yield #将传入参数赋值给url
temp_content = urlopen(url) #爬取传入的url
print(temp_content.read()) #输出爬取到的内容
def main():
g = get() #生成迭代器
next(g) #进入函数内的第一次循环开始
while True:
url = input("url:") #输入url
g.send(url) #传入url
#main program
if __name__ == '__main__':
main()
闭包实现
#!/bin/python3
# -*- coding:utf-8 -*-
from urllib.request import urlopen
def get(url):
def index():
temp_url = url
while True:
html_data = urlopen(temp_url).read()
temp_url = yield html_data
return index
def main():
url = input("url:")
g = get(url)
e = g()
print(next(e))
while True:
url = input("url:")
if url != 'q':
print(e.send(url))
else:
break
#main program
if __name__ == '__main__':
main()