提取日志中未落入标准字段的mime,分adx,adtype 统计mime的数量和包含js的数量占比
require 'date' require 'net/http' require 'uri' require 'json' def getmimes ( adx , bodyobj ,totalmimes, statics) if bodyobj.class != Hash return end mimes = [] bodyobj.keys.each do |key| val = bodyobj[key] if val.class == Fixnum || val.class == Float || val.class == Array if key == "imp" if val[0]['banner'] != nil && val[0]['banner']['mimes'] != nil statics['includmime'] +=1 mimes += val[0]['banner']['mimes'] end if val[0]['video'] != nil && val[0]['video']['mimes'] != nil statics['includmime'] +=1 mimes += val[0]['video']['mimes'] end end end end if mimes.length >0 mimes.each do |mime| kk = adx.to_s + "_" + mime.to_s if mime.include?"javascript" statics['includejs'] +=1 end totalmimes[ kk] +=1 end end end filepath = "/data/mvdsp/log/request.log.2017-11-30-12" puts filepath i = 0 totalmimes = Hash.new(0) statics = Hash.new(0) begin File.open("#{filepath}").each do |line| statics['total'] +=1 if line.length < 1000 statics['invalidbody'] +=1 next end if ! line.valid_encoding? s = line.encode("UTF-16be", :invalid=>:replace, :replace=>"?").encode('UTF-8') line = s.gsub(/dr/i,'med') end fields = line.split(" ") if fields.length <10 next end adx = fields[3] ext10 = fields[45] adtype = "" if ext10.class ==Hash && ext10['reqtype']!= nil adtype =ext10['reqtype'] end jsonstr = fields[6] bodyobj = {} begin bodyobj = JSON.parse jsonstr rescue JSON::ParserError i +=1 end getmimes(adx,bodyobj,totalmimes , statics) end rescue SystemCallError puts "warn:: #{filepath} not exits!!" end puts "-----------totalmimes---------------------" print totalmimes sorted = totalmimes.sort_by {|_key, value| value} puts sorted puts "-----------statics--------------" print statics puts "--------------------------------"
ruby hash sort by value
hsh ={"a" => 1000, "b" => 10, "c" => 200000} Hash[hsh.sort_by{|k,v| v}] #or hsh.sort_by{|k,v| v}.to_h #or hsh.sort_by(&:last)