Python常用功能函数汇总
1.按行写字符串到文件中
import sys, os, time, json def saveContext(filename,*name): format = '^' context = name[0] for i in name[1:]: context = context + format + str(i) context = str(context).replace('(','(').replace(')',')').replace(',',',').replace(':',':') #去除首位空格 filename = filename.strip() #读取目录名称 path = os.path.dirname(filename) #如果目录不存在则创建目录 if not os.path.exists(path): os.makedirs(path) #读取文件名称 name = os.path.basename(filename) fp = open(filename,'a') fp.write(context+' ') fp.close()
2.创建初始化浏览器
#coding:utf-8 import sys, os, time, json import urllib2 from pyquery import PyQuery as pq from lxml import etree from selenium import webdriver from urlparse import urljoin #设置utf-8模式 reload(sys) sys.setdefaultencoding( "utf-8" ) #初始化创建浏览器 def init_drive(): ua = "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.3 Safari/537.36" cap = webdriver.DesiredCapabilities.PHANTOMJS cap["phantomjs.page.settings.resourceTimeout"] = 20000 cap["phantomjs.page.settings.loadImages"] = True cap["phantomjs.page.settings.disk-cache"] = True cap["phantomjs.page.settings.userAgent"] = ua cap["phantomjs.page.customHeaders.User-Agent"] =ua cap["phantomjs.page.customHeaders.Referer"] = "http://tj.ac.10086.cn/login/" #driver = webdriver.PhantomJS(executable_path='/home/shutong/phantomjs/bin/phantomjs',desired_capabilities=cap, service_args=['--ignore-ssl-errors=true']) driver = webdriver.PhantomJS(desired_capabilities=cap, service_args=['--ignore-ssl-errors=true']) driver.set_page_load_timeout(60) driver.set_script_timeout(60) return driver
其中,获取网页html
#初始化创建浏览器 driver = init_drive() driver.get(url) html = driver.page_source #退出浏览器 driver.quit()
3.根据url获取网页Html函数
#coding:utf-8 import requests, json, time, re, os, sys, time import urllib2 import random import numpy as np #设置为utf-8模式 reload(sys) sys.setdefaultencoding( "utf-8" ) #最终获取url的数据 def getHtml(url): ua_list = ["Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv2.0.1) Gecko/20100101 Firefox/4.0.1","Mozilla/5.0 (Windows NT 6.1; rv2.0.1) Gecko/20100101 Firefox/4.0.1","Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; en) Presto/2.8.131 Version/11.11","Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11"] user_agent = random.choice(ua_list) request = urllib2.Request(url) request.add_header("User-Agent",user_agent) response = urllib2.urlopen(request,data=None,timeout=60) html = response.read() #可以根据编码格式进行编码 #html = unicode(html,'utf-8') return html
4.获取时间的不同格式
import time import sys import os import shutil import MySQLdb import urllib2 from pyquery import PyQuery as pq from lxml import etree import urllib import sys import httplib import datetime import json from selenium import webdriver from urlparse import urljoin httplib.HTTPConnection._http_vsn = 10 httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0' #设置utf-8模式 reload(sys) sys.setdefaultencoding( "utf-8" ) #获取常用时间格式的函数 #'%Y-%m-%d' 2017-11-18 #'%Y%m%d' 20171118 #%Y%m%d%H' 2017111817 #空或其他 2017-11-18 17:26:35 def getTime(*format): now = '' try: format = format[0] except : pass if format == '%Y-%m-%d': now = time.strftime('%Y-%m-%d',time.localtime(time.time())) elif format == '%Y%m%d': now = time.strftime('%Y%m%d',time.localtime(time.time())) elif format == '%Y%m%d%H': now = time.strftime('%Y%m%d%H',time.localtime(time.time())) else : now = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())) return now
5.连接Mysql执行sql语句
import time import sys import os import shutil import MySQLdb import urllib2 from pyquery import PyQuery as pq from lxml import etree import urllib import sys import httplib import datetime import json from selenium import webdriver from urlparse import urljoin httplib.HTTPConnection._http_vsn = 10 httplib.HTTPConnection._http_vsn_str = 'HTTP/1.0' #设置utf-8模式 reload(sys) sys.setdefaultencoding( "utf-8" ) #定义MySql数据库连接 def conn_mysql(host='192.168.11.43',user='root',passwd='root',db='edw'): conn = '' try: conn= MySQLdb.connect( host= host, port = 3306, user=user, passwd=passwd, db =db, ) #print "连接mysql成功" except : #pass print "连接mysql失败" return conn #执行sql语句返回结果 def excute_sql(conn,sql): #conn = conn_mysql(host='192.168.122.194',user='root',passwd='123456',db='label') cur = conn.cursor() cur.execute('set character_set_client = utf8') cur.execute('set character_set_server = utf8') cur.execute('set character_set_connection = utf8') cur.execute('set character_set_results = utf8') cur.execute('set collation_connection = utf8_general_ci') cur.execute('set collation_server = utf8_general_ci') result = cur.fetchmany(cur.execute(sql)) cur.close() conn.commit() conn.close() return result