• 日志分析-mime统计


    提取日志中未落入标准字段的mime,分adx,adtype 统计mime的数量和包含js的数量占比

    require 'date'
    require 'net/http'
    require 'uri'
    require 'json'
    
    def getmimes (  adx , bodyobj ,totalmimes, statics) 
      if bodyobj.class  != Hash 
            return  
      end
      mimes = []
      bodyobj.keys.each  do |key|
            val = bodyobj[key]
              if val.class == Fixnum || val.class == Float  || val.class == Array
                   if key == "imp"
                       if  val[0]['banner'] != nil  && val[0]['banner']['mimes'] != nil  
                         statics['includmime'] +=1 
                         mimes +=  val[0]['banner']['mimes']
                       end
                       if  val[0]['video'] != nil  && val[0]['video']['mimes'] != nil
                         statics['includmime'] +=1 
                         mimes +=  val[0]['video']['mimes']
                       end            
                   end
              end
      end
      
      if mimes.length >0 
         mimes.each do |mime|
            kk = adx.to_s + "_" + mime.to_s
            if mime.include?"javascript"
              statics['includejs'] +=1 
            end
                 totalmimes[ kk] +=1 
         end
      end
    end
    
    
    filepath = "/data/mvdsp/log/request.log.2017-11-30-12"
    puts filepath 
    i  = 0 
    totalmimes = Hash.new(0)
    statics = Hash.new(0)
    begin  
      File.open("#{filepath}").each do |line|
        statics['total'] +=1
        if  line.length < 1000 
          statics['invalidbody'] +=1
            next 
        end 
       if ! line.valid_encoding?
         s = line.encode("UTF-16be", :invalid=>:replace, :replace=>"?").encode('UTF-8')
         line = s.gsub(/dr/i,'med')
       end 
    
    
       fields = line.split("	") 
        if fields.length <10
            next 
        end
        adx  = fields[3]
        ext10  = fields[45]
        adtype = ""
        if ext10.class ==Hash &&  ext10['reqtype']!= nil 
          adtype =ext10['reqtype']
        end
        
      
        jsonstr = fields[6]
        bodyobj = {}
        begin  
         bodyobj =  JSON.parse  jsonstr
      
        rescue JSON::ParserError
          i +=1  
       end 
         getmimes(adx,bodyobj,totalmimes ,  statics) 
      end
    rescue SystemCallError
      puts "warn:: #{filepath} not  exits!!"
    end
    
    
    
    puts "-----------totalmimes---------------------"
    print   totalmimes  
    sorted = totalmimes.sort_by {|_key, value| value}
    puts sorted
    
    puts "-----------statics--------------"
    print   statics  
    puts "--------------------------------"

     ruby hash sort by value 

    hsh ={"a" => 1000, "b" => 10, "c" => 200000}
    Hash[hsh.sort_by{|k,v| v}] 
    #or 
    hsh.sort_by{|k,v| v}.to_h
    #or 
    hsh.sort_by(&:last)
  • 相关阅读:
    tee命令
    linux优化之SElinux关闭
    (1)使用bash脚本实现批量添加用户
    Django admin管理工具的使用、定制及源码解析
    Mysql常见命令
    树梅派
    19道Python循环遍历,while,for语句测试题,网上看到的题目,自己不看答案全部做了一次,总共3个小时左右
    9*9的矩形,中间有个星号,按不同方向键,星星对应移动
    app在admin中显示成我们想要的中文名
    九九乘法表
  • 原文地址:https://www.cnblogs.com/lavin/p/7940975.html
Copyright © 2020-2023  润新知