• 批量提取cuckoo样本分析结果到数据库


    前言: cuckoo是一个强大的工具,几乎我们需要的样本信息cuckoo都可以提供,这两天写了一个提取cuckoo检测信息批量导入数据库的小程序,自觉程序写得少,有很多不足,放上源码,有错误或可以优化的地方希望大家不吝赐教~

    开发环境:python2.7 + scapy + CuckooCentOS

    也可以移步https://github.com/Viwilla/CukooToMySQL下载源代码

      1 #----------------------------------------------------
      2 #数据库连接在254行,注意端口不要用引号因起来
      3 #linux下跑可能会遇到编码问题,请去掉我的中文注释
      4 #自觉写程序的经验不足
      5 #有错误或可优化的地方希望大家不吝赐教
      6 #_author_ = Vi
      7 #https://github.com/Viwilla
      8 #http://www.cnblogs.com/Viwilla/
      9 #----------------------------------------------------
     10 import json
     11 import codecs
     12 import sqlite3
     13 import os,sys,os.path
     14 import MySQLdb
     15 import scapy.all as scapy
     16 import binascii
     17 import shutil
     18 reload(sys)
     19 sys.setdefaultencoding('utf-8')
     20 
     21 #-------------------------------------------------------------
     22 #如果要解析http协议,可以导入htpp包实现
     23 #linux导入http失败,因此注释掉这一段,直接提取整个http内容
     24 #后面保留了解析http的代码
     25 #--------------------------------------------------------------
     26 #try:
     27     # This import works from the project directory
     28     #import scapy_http.http
     29 #except ImportError:
     30     # If you installed this package via pip, you just need to execute this
     31     #from scapy.layers import http
     32 
     33 re = 0
     34 add = 0
     35 ID = 0
     36 global _MD5
     37 totalMD5 = []
     38 strtotal = []
     39 flag = 0
     40 ReFlag = 0
     41 
     42 #------------------------------------------------
     43 #ConnectDB(host, user ,paaawd,dbname,port)
     44 #------------------------------------------------
     45 def ConnectDB(h, u ,pa,d,p):
     46     try:
     47         global cur
     48         global conn        
     49         ISOTIMEFORMAT = '%Y-%m-%d %X'
     50         conn = MySQLdb.connect(
     51             host = h, 
     52             user = u,
     53             passwd = pa,
     54             db = d,
     55             port = p)
     56         cur = conn.cursor()
     57         print("use success")
     58     except :
     59         print "use DB failed"
     60 #------------------------------------------------
     61 #寻找最大的ID,下一次插入数据从该ID之后插入
     62 #Find the max ID in DataBase
     63 #------------------------------------------------
     64 def SelectID():
     65     str = "SELECT MAX(ID) FROM samplesinfo3"
     66     global cur
     67     cur.execute(str)
     68     ID = cur.fetchall()
     69     return ID[0][0]
     70 #------------------------------------------------
     71 #统计MD5
     72 #若数据库中已有该MD5检测信息则跳过
     73 #------------------------------------------------
     74 def CountMD5():
     75     query = "SELECT  SampleMD5 FROM samplesinfo3"
     76     cur.execute(query)
     77     md5 = cur.fetchall()
     78     global totalMD5
     79     for data in md5:
     80     if data[0] not in totalMD5:
     81         totalMD5.append(data[0])
     82         
     83 #------------------------------------------------
     84 #解析cuckoo生成的“report.json”文件
     85 #提取我们需要的字段
     86 #------------------------------------------------
     87 def ReadJSON(file):
     88     global ID
     89     with open(file) as data_file:
     90         data = json.load(data_file)
     91     _SHA1 = data['target']['file']['sha1']
     92     global _MD5
     93     _MD5  = data['target']['file']['md5']
     94     if ReFlag == 0:
     95         if _MD5 not in totalMD5:
     96         totalMD5.append(_MD5)
     97         elif _MD5 in totalMD5:
     98         return 0
     99     _Type = data['target']['file']['type']
    100     if not  _Type:
    101         _Type = ''
    102     _Yara= data['target']['file']['yara']
    103     if not _Yara:
    104         _Yara= ''
    105     try:
    106         _360AV = data['virustotal']['scans']['Qihoo-360']['result']
    107         if not _360AV:
    108             _360AV = ''
    109     except:
    110         _360AV = ''
    111     try:
    112         _Avira = data['virustotal']['scans']['Avira']['result']
    113         if not Avira:
    114             Avira = ''
    115     except:
    116         _Avira = ''
    117     try:
    118         _ClamAV = data['virustotal']['scans']['ClamAV']['result']
    119         if not _ClamAV:
    120             _ClamAV = ''
    121     except: 
    122         _ClamAV = ''
    123     try:
    124         _Eset = data['virustotal']['scans']['ESET-NOD32']['result']
    125         if not _Eset:
    126             _Eset = ''
    127     except:
    128         _Eset = ''
    129     try:
    130         _F_Secure = data['virustotal']['scans']['F-Secure']['result']
    131         if not _F_Secure:
    132             _F_Secure = ''
    133     except:
    134         _F_Secure = ''    
    135     try:
    136         _Kaspersky = data['virustotal']['scans']['Kaspersky']['result']
    137         if not _Kaspersky:
    138             _Kaspersky = ''
    139     except:
    140         _Kaspersky = ''
    141     try:
    142         _Symantec = data['virustotal']['scans']['Symantec']['result']
    143         if not _Symantec:
    144             _Symantec = ''
    145     except:
    146         _Symantec = ''
    147 
    148     str1 = "{}".format(" '%s','%s',\"%s\",'%s','%s','%s','%s','%s','%s','%s','%s',"%(_SHA1, _MD5,_Type, _Yara ,_360AV, _Avira,  _ClamAV , _Eset ,_F_Secure, _Kaspersky, _Symantec))
    149     return str1
    150 
    151 #----------------------------------------------
    152 #解析cuckoo生成的Pcap包
    153 #提取我们需要的信息
    154 #----------------------------------------------
    155 def ReadPcap(file,str0,str1):
    156     packets = scapy.rdpcap(file)
    157     for p in packets:
    158     #显示scapy解析内容,调试用
    159         #print '=' * 78
    160         #p.show()
    161     strID = "('%d',"%ID
    162         _IP = ''
    163         _dns = ''
    164         _flow = ''
    165         if p.payload.name == 'ARP':
    166             continue
    167         if p.payload.name == 'IP':
    168         #保存IP
    169             if p.payload.src == '192.168.229.111':
    170                 dst ="dst_%s:%d"%(p.payload.dst, p.payload.payload.dport)
    171                 _IP = dst
    172             elif p.payload.dst =='192.168.229.111':
    173                 src ="src_%s:%d"%(p.payload.src, p.payload.payload.sport)
    174                 _IP = src
    175                 
    176             # 解析TCP协议     
    177             if p.payload.proto == 6:       
    178                 #提取十六进制流量数据
    179                 if  p.payload.payload.payload.name == 'Raw':
    180                     load = str(binascii.b2a_hex(p.load))
    181                     _flow = load
    182                 if  p.payload.payload.payload.name == 'HTTP':
    183                     if p.payload.payload.payload.payload.name == 'HTTP Response':
    184             #若要提取ascii数据可直接用p.payload
    185                         #ascii = p.load
    186                         #if ascii not in asciidata:
    187                             #asciidata.append(ascii) 
    188                         load = str(binascii.b2a_hex(p.load))
    189                         _flow = load
    190                             
    191             #解析UDP
    192             elif p.payload.proto == 17:
    193                 if  p.payload.payload.payload.name== 'Raw':
    194                     load = str(binascii.b2a_hex(p.load))
    195                     _flow = load   
    196         #保存DNS
    197                 if p.payload.payload.payload.name == 'DNS':
    198                     dns = p.payload.payload.payload.qd.qname
    199                     _dns = dns    
    200                 
    201             else:
    202                 print "No rule for protocol %s"%p.payload.proto
    203                 continue
    204                 
    205         else:
    206             print "No rule for %s"%p.payload.name
    207             continue
    208         
    209         #--------插入信息去重---------
    210         strc = _MD5 + _dns + _IP + _flow 
    211         if strc not in strtotal:
    212             strtotal.append(strc)
    213             str2 = "'%s','%s','%s');"%( _dns, _IP,_flow)             
    214             _str1 = str0 + strID + str1 +str2
    215             ToDB(_str1) 
    216         global flag
    217         flag = 1 
    218         continue
    219     #flag=1  : 标志该样本有流量信息
    220     #flag = 0:标志该样本无流量信息
    221     if flag == 0:
    222     strID = "('%d',"%ID
    223     str2 = "'%s','%s','%s');"%('','','')
    224     _str2 = str0 + strID + str1 + str2
    225     #print _str2
    226     ToDB(_str2)
    227     return 
    228 
    229 #------------------------------------------------
    230 #SQL操作
    231 #------------------------------------------------
    232 def ToDB(_str):
    233     #try:
    234         cur.execute(_str)
    235         conn.commit()
    236         global ID
    237         ID = ID +1
    238         addstr = " '%s' added"% _MD5
    239         print addstr
    240     ReFlag = 1  
    241     return
    242 
    243 #------------------------------------------------
    244 #main()
    245 #------------------------------------------------
    246 def main():
    247     rootdir = '/root/cuckoo/storage/analyses/'
    248     #n=样本个数+1
    249     n = len(os.listdir(rootdir))
    250     Js = "reports/report.json"
    251     pcap = 'dump.pcap'
    252     if not os.path.exists("pcap"):
    253     os.mkdir("pcap")
    254     ConnectDB('ip', 'username', 'password', 'database', 'port')#端口去掉引号!!
    255     global ID
    256     try:
    257         ID = SelectID() + 1
    258     except:
    259         ID = 1
    260     startID = ID
    261     str0 = "INSERT INTO samplesinfo(ID,SampleSHA1, SampleMD5, SampleType, Yara, 360AV, Avira, ClamAV, Eset ,F_Secure, Kaspersky, Symantec,DNS_IP, IP_Port,Flow)values"
    262     CountMD5()
    263     for id in range(1,n):
    264         file1 = rootdir + '%d/'%id + Js
    265         file2 = rootdir + '%d/'%id + pcap
    266         result = ReadJSON(file1) 
    267         if result == 0:
    268         print "%s already exists!"%_MD5
    269             global re
    270             re = re +1
    271             continue
    272     else:
    273         str1 = result
    274         ReadPcap(file2,str0,str1)
    275     global flag
    276     if flag == 1:
    277         pcapname = "pcap/%s"%_MD5
    278         if not os.path.exists(pcapname):
    279             shutil.copy(file2,pcapname)
    280         else:
    281         print "pcap '%s' exists"%_MD5
    282     flag = 0
    283     global ReFlag
    284     ReFlag = 0
    285         
    286     add = ID - startID
    287     print "%d items already exists!"%re
    288     print "Successfully add %d items, from %d to %d ."%(add,startID,ID - 1)
    289     cur.close()
    290     conn.close()
    291 
    292 if __name__ == '__main__':
    293     main()
    294 
    295 exit()
  • 相关阅读:
    js-AOP
    jQueryUI之autocomplete
    nginx安装配置
    oracle结构语法
    ajax/表单提交 多个相同name的处理方法
    ES6模块化
    docker运维
    帆软报表
    oracle锁表
    香港到大陆IPLC节点故障
  • 原文地址:https://www.cnblogs.com/Viwilla/p/5013925.html
Copyright © 2020-2023  润新知