通过python操作GeoLite2-City.mmdb库将nginx日志写入数据库
# 创建存放nginx日志的表accesslog2
CREATE TABLE `accesslog2` ( `id` bigint(20) NOT NULL AUTO_INCREMENT, `logtime` datetime DEFAULT NULL, `ip` varchar(128) DEFAULT NULL, `url` text, `status` int(11) DEFAULT NULL, `lat` float DEFAULT NULL, `lng` float DEFAULT NULL, `city` varchar(50) DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB AUTO_INCREMENT=896 DEFAULT CHARSET=utf8;
# 安装geoip2模块
# sudo pip2 install geoip2
#encoding=utf-8 import time import geoip2.database from dbutils import MysqlConnection # 找出ip所在城市的日志处理 def log_2db_4map(log_file): # 清空数据库信息 MysqlConnection.execute_sql('truncate table accesslog2') path=log_file shandle = open(path, 'r') log_dict = {} # 读取ip数据库 reader = geoip2.database.Reader('GeoLite2-City.mmdb') while True: line = shandle.readline() if line == '': break _nodes = line.split() # 访问url,来访ip,http状态码,访问时间 _url,_ip,_status,_lgtime = _nodes[6], _nodes[0], _nodes[8],_nodes[3][1:] # 将日志访问的时间"22/Oct/2017:03:28:01"转成 2017-11-23 10:08:18 类似的格式 _ltime = time.strftime('%Y-%m-%d %H:%M:%S',time.strptime(_lgtime,'%d/%b/%Y:%H:%M:%S')) # 获取城市信息 try: response = reader.city(_ip) # 如果国家不是中国跳出本次循环 if 'China' != response.country.name: continue # 获取城市 _city = response.city.names.get('zh-CN','') if _city == '': print 'ip: %s city is empty' % _ip continue # 获取经度和纬度 _lat = response.location.latitude _lng = response.location.longitude # print response except Exception as e: print 'goe has not %s info' % _ip _args = (_ltime,_ip,_url,_status, _lat,_lng,_city) # 插入数据库语句 sql = 'insert into accesslog2(logtime, ip, url,status,lat,lng,city) values(%s, %s, %s,%s,%s,%s,%s)' MysqlConnection.execute_sql(sql, _args) # 关闭文件句柄 shandle.close() # 文件入口 if __name__ == '__main__': # nginx日志文件 log_file = 'www_access.log' rt_list = log_2db_4map(log_file = log_file)
连接数据库和操作数据库的底层模块参考:
python操作mysql数据库增删改查的dbutils实例
http://www.cnblogs.com/reblue520/p/7884365.html