• unicorn模拟执行学习


    前言

    unicorn 是一个模拟执行软件,用于模拟执行各种平台的二进制文件,前几天在  twitter 上看到一篇文章,这里做个记录。

    正文

    记录系统调用

    首先是一个简单的示例

    e8ffffffffc05d6a055b29dd83c54e89e96a02030c245b31d266ba12008b39c1e710c1ef1081e9feffffff8b4500c1e010c1e81089c309fb21f8f7d021d86689450083c5024a85d20f85cfffffffec37755d7a0528ed24ed24ed0b887feb509838f95c962b9670fec6ffc6ff9f321f581e00d380
    

    这是一段 x86_32 的  shellcode,可以用 radare2 反汇编它

    rasm2 -a x86 -b 32 -d e8ffffffffc05d6a055b29dd83c54e89e96a02030c245b31d266ba12008b39c1e710c1ef1081e9feffffff8b4500c1e010c1e81089c309fb21f8f7d021d86689450083c5024a85d20f85cfffffffec37755d7a0528ed24ed24ed0b887feb509838f95c962b9670fec6ffc6ff9f321f581e00d380
    

    这里的目标是记录他的系统调用,在 32 中使用 int 80 来执行系统调用,所以我们在 执行 int 80  前 记录它的 寄存器信息,就可以记录系统调用了。

    
    from unicorn import *
    from unicorn.x86_const import *
    
    
    shellcode = "e8ffffffffc05d6a055b29dd83c54e89e96a02030c245b31d266ba12008b39c1e710c1ef1081e9feffffff8b4500c1e010c1e81089c309fb21f8f7d021d86689450083c5024a85d20f85cfffffffec37755d7a0528ed24ed24ed0b887feb509838f95c962b9670fec6ffc6ff9f321f581e00d380".decode("hex")
    
    BASE = 0x400000
    STACK_ADDR = 0x0
    STACK_SIZE = 1024*1024
    
    mu = Uc (UC_ARCH_X86, UC_MODE_32)
    
    mu.mem_map(BASE, 1024*1024)
    mu.mem_map(STACK_ADDR, STACK_SIZE)
    
    
    mu.mem_write(BASE, shellcode)
    mu.reg_write(UC_X86_REG_ESP, STACK_ADDR + STACK_SIZE/2)
    
    def syscall_num_to_name(num):
        syscalls = {1: "sys_exit", 15: "sys_chmod"}
        return syscalls[num]
    
    def hook_code(mu, address, size, user_data):
        #print('>>> Tracing instruction at 0x%x, instruction size = 0x%x' %(address, size))  
        
        machine_code = mu.mem_read(address, size)
        if machine_code == "xcdx80":
            
            r_eax = mu.reg_read(UC_X86_REG_EAX)
            r_ebx = mu.reg_read(UC_X86_REG_EBX)
            r_ecx = mu.reg_read(UC_X86_REG_ECX)
            r_edx = mu.reg_read(UC_X86_REG_EDX)
            syscall_name = syscall_num_to_name(r_eax)
            
            print "--------------"
            print "We intercepted system call: "+syscall_name
            
            if syscall_name == "sys_chmod":
                s = mu.mem_read(r_ebx, 20).split("x00")[0]
                print "arg0 = 0x%x -> %s" % (r_ebx, s)
                print "arg1 = " + oct(r_ecx)
            elif syscall_name == "sys_exit":
                print "arg0 = " + hex(r_ebx)
                exit()
            
            mu.reg_write(UC_X86_REG_EIP, address + size)
            
    mu.hook_add(UC_HOOK_CODE, hook_code)
    
    mu.emu_start(BASE, BASE-1)
    
    

    关键就是使用 mu.hook_add, 使得在 unicorn 执行一条指令之前会先执行 hook_code 并且传入了与程序运行状态相关的参数,便于我们对程序状态进行操纵。在这里就是获取了 寄存器的值,然后根据系统调用号解析参数。

    ARM代码模拟执行

    测试程序位于

    http://t.cn/RQ6viS6
    

    其实就是执行一个递归函数,最后打印返回值

    我们的目标是加速程序的执行,可以加速的原理在于,这里是递归调用,对于的参数,返回值确定,所以我们就可以对已经执行过的参数,直接设置返回值,进而加速程序的运行。

    from unicorn import *
    from unicorn.arm_const import *
    import struct
    
    def read(name):
        with open(name) as f:
            return f.read()
            
    def u32(data):
        return struct.unpack("I", data)[0]
        
    def p32(num):
        return struct.pack("I", num)
    
    
    mu = Uc(UC_ARCH_ARM, UC_MODE_LITTLE_ENDIAN)
    
    
    BASE = 0x10000
    STACK_ADDR = 
    STACK_SIZE = 1024*10240x300000
    
    mu.mem_map(BASE, 1024*1024)
    mu.mem_map(STACK_ADDR, STACK_SIZE)
    
    
    mu.mem_write(BASE, read("./task4"))
    mu.reg_write(UC_ARM_REG_SP, STACK_ADDR + STACK_SIZE/2)
    
    instructions_skip_list = []
    
    CCC_ENTRY = 0x000104D0
    CCC_END = 0x00010580
    
    stack = []                                          # Stack for storing the arguments
    d = {}                                              # Dictionary that holds return values for given function arguments 
    
    def hook_code(mu, address, size, user_data):  
        #print('>>> Tracing instruction at 0x%x, instruction size = 0x%x' %(address, size))
        if address == CCC_ENTRY:                        # Are we at the beginning of ccc function?
            arg0 = mu.reg_read(UC_ARM_REG_R0)           # Read the first argument. it is passed by R0
            
            if arg0 in d:                               # Check whether return value for this function is already saved.
                ret = d[arg0]
                mu.reg_write(UC_ARM_REG_R0, ret)        # Set return value in R0
                mu.reg_write(UC_ARM_REG_PC, 0x105BC)    # Set PC to point at "BX LR" instruction. We want to return from fibonacci function
                
            else:
                stack.append(arg0)                      # If return value is not saved for this argument, add it to stack.
            
        elif address == CCC_END:
            arg0 = stack.pop()                          # We know arguments when exiting the function
            
            ret = mu.reg_read(UC_ARM_REG_R0)            # Read the return value (R0)
            d[arg0] = ret                               # Remember the return value for this argument
            
    mu.hook_add(UC_HOOK_CODE, hook_code)
    mu.emu_start(0x00010584, 0x000105A8)
    return_value = mu.reg_read(UC_ARM_REG_R1)           # We end the emulation at printf("%d
    ", ccc(x)).
    print "The return value is %d" % return_value
    

    关键点,用一个数组存储了 参数:返回值 对, 从而规避一些冗余的运算。

    参考

    http://eternal.red/2018/unicorn-engine-tutorial/

  • 相关阅读:
    用鼠标键盘来控制你的Android手机——同屏显示简单教程
    Ubuntu13.04 Eclipse下编译安装Hadoop插件及使用小例
    eclipse在Ubuntu 13.04下的安装过程及问题小记
    Ubuntu下安装qq方法及疑难问题解决
    POJ1065 Wooden Sticks(贪心+动态规划——单调递减或递增序列)
    简单的字母全排列问题—递归法和STL法
    ichat在线客服jQuery插件(可能是历史上最灵活的)
    轻量级实用JQuery表单验证插件:validateForm5
    一句话在网页右上角加一个精致下拉框:forkme on github
    FastUI快速界面原型制作工具
  • 原文地址:https://www.cnblogs.com/hac425/p/9416898.html
Copyright © 2020-2023  润新知