import urllib2 from BeautifulSoup import BeautifulSoup import random import time def checkIndex(url): url = url.replace('http://', '') baiduUrl = 'http://www.baidu.com/s?wd=' + url webPage = urllib2.urlopen(baiduUrl) webCont = webPage.read() webCont = webCont.replace('<b>', '').replace('</b>', '') soup = BeautifulSoup(webCont) findlist = soup.find('span', {'class': 'g'}) if findlist: for each in findlist: if url in unicode(each): return url else: return None else: return None waittime = random.randint(1, 20) urllist = open('list.txt') res = open('check.txt', 'w') for eachurl in urllist.readlines(): indexurl = unicode(checkIndex(eachurl)) + '\n' res.write(indexurl) time.sleep(waittime) urllist.close() res.close() print 'over!'