python提取网页数据

#coding:utf-8
import urllib2
import os
import re
def dow(url):
    return urllib2.urlopen(url).read()
str=dow('http://theater.mtime.com/China_Beijing/')
lst=re.findall('d+家影院上映d+场',str)
url = 'http://theater.mtime.com/China_Beijing'
req = urllib2.Request(url,headers={'User-Agent' : "Magic Browser"})
webpage = urllib2.urlopen(req)
strw = webpage.read()
#print strw
tg_start = strw.find('hotplaySvList = [')
#print tg_start#开始
if tg_start == -1:
    print 'not find start tag'
    os._exit(0)
tmp = strw[tg_start:-1]
tg_end = tmp.find(';')

if tg_end == -1 :
    print 'not find end tag'
    os._exit(0)
tmp = tmp[len('hotplaySvList = ['):tg_end]

tar_ls = tmp.split("},{")

dict_film = {}
i=0
for t0 in tar_ls:
    ls_t = t0.split(',')
    id = ls_t[0].split(':')[-1].strip()
    film = ls_t[-1].split('"')[-2].strip()
    print id,film,lst[i]
    i=i+1

    dict_film[id] = film
print len(dict_film)

相关阅读:
win10+Linux双系统安装及一些配置问题
第3讲--3.1旋转矩阵
【读诗】宣州谢朓楼饯别校书叔云
【2】python：end=' '
如何与国外导师联系
PointNet
点云深度学习
ES6常用方法
监听滚动条、上下联动
echarts 左右滚动

原文地址：https://www.cnblogs.com/doublekai/p/6933119.html