• 伪随机数的爆破--2


    伪随机数的爆破–2

    伪随机数的爆破–2

    1 简介

    php中的mt_rand是采用梅森旋转算法MT19937,但在php 7.1.0之前的版本中,其实现的不是标准算法,造成容易猜解。可以参考php里的随机数这篇文章。还有官方文档的changelog。并且已经存在很多工具用来爆破种子,例如著名的php_mt_seed。

    这里我参考了php_mt_seed文章中引用的基于CUDA框架的爆破算法,自己稍作修改,可以看到用GPU爆破比cpu的速度要快很多。当然,这取决与你的显卡和CPU。

    原始的cuda代码在这里mt_rand.cu

    2 具体实现

    由于要用clojure的gpu相关的库,要添加依赖到deps.edn:

    {:deps {uncomplicate/neanderthal {:mvn/version "0.22.0"}}}
    

    然后是gpu执行的设备代码, mt_rand_me.cu:

    typedef unsigned int uint32_t;
    
    #define MT_N          (624)
    #define N             MT_N                 /* length of state vector */
    #define M             (397)                /* a period parameter */
    #define hiBit(u)      ((u) & 0x80000000U)  /* mask all but highest   bit of u */
    #define loBit(u)      ((u) & 0x00000001U)  /* mask all but lowest    bit of u */
    #define loBits(u)     ((u) & 0x7FFFFFFFU)  /* mask     the highest   bit of u */
    #define mixBits(u, v) (hiBit(u)|loBits(v)) /* move hi bit of u to hi bit of v */
    #define twist(m,u,v)  (m ^ (mixBits(u,v)>>1) ^ ((uint32_t)(-(uint32_t)(loBit(u))) & 0x9908b0dfU))
    
    #define PHP_MT_RAND_MAX ((long) (0x7FFFFFFF)) /* (1<<31) - 1 */
    #define RAND_RANGE(__n, __min, __max, __tmax) ((__min) + (long) ((double) ( (double) (__max) - (__min) + 1.0) * ((__n) / ((__tmax) + 1.0))))
    
    /*
       Calculate mt_rand after some amount of steps.
       Due to memory optimization 'step' must statisfy condition:
           step < N - M
    */
    __device__
    uint32_t mt_rand( uint32_t seed, int step )
    {
        uint32_t p0,p1,pM,cs;   // cs - value current state array cell
        int i;
    
        //// init step
        cs = seed & 0xffffffffU;
    
        for( i = 1; i <= step; i++ )   // corect is ( i = 1; i < N; i++ )
            cs = ( 1812433253U * ( cs ^ (cs >> 30) ) + i ) & 0xffffffffU;
        p0 = cs;
        cs = ( 1812433253U * ( cs ^ (cs >> 30) ) + i ) & 0xffffffffU;
        p1 = cs;
        for( i = step + 2; i <= step + M; i++ )   // corect is ( i = 1; i < N; i++ )
            cs = ( 1812433253U * ( cs ^ (cs >> 30) ) + i ) & 0xffffffffU;
        pM = cs;
    
        //for( i = 1; i <= step + M; i++ )   // corect is ( i = 1; i < N; i++ )
        //    cs = ( 1812433253U * ( cs ^ (cs >> 30) ) + i ) & 0xffffffffU;
        //    if( i == step + 0 ) p0 = cs;
        //    if( i == step + 1 ) p1 = cs;
        //    if( i == step + M ) pM = cs;
        //}
    
        //// reload (twist) step
        uint32_t x = twist( pM, p0, p1 );
        x ^= x >> 11;
        x ^= (x << 7) & 0x9d2c5680U;
        x ^= (x << 15) & 0xefc60000U;
        x ^= x >> 18;
        x = (long)(x >> 1);
        return x;
    }
    
    __device__
    uint32_t mt_rand_mm( uint32_t seed, int step, int min, int max )
    {
        return RAND_RANGE( mt_rand( seed, step ), min, max, PHP_MT_RAND_MAX );
    }
    
    // 上面的代码直接取自mt_rand.cu
    
    extern "C"
    __global__
    void mt_rand_find(uint32_t sseed, //测试的种子
                      uint32_t* values, // 已知的随机数序列
                      uint32_t length, // 已知随机数序列的长度
                      int min, // 最小值
                      int max, // 最大值
                      bool *found, // 是否找到的标记
                      uint32_t* sols // 用于保存找到的seed结果
                      )
    {
        uint32_t gid = blockDim.x * blockIdx.x + threadIdx.x;
    
        // 这里会有很多个GPU线程同时执行,所以把结果保存在sols数组中,每个线程一个数组项
        // gid相当于线程顺序id,每个线程测试一个seed
        // 整个函数相当于多个GPU线程同时对[sseed, sseed+总的线程数量]范围内的数字作为种子进行爆破
        uint32_t seed = sseed + gid;
    
        sols[ gid ] = 0;
    
        for(int i = 0; i < length; i++){
          uint32_t x = mt_rand_mm(seed, i, min, max);
          if(!(values[i] == x)) return;
        }
    
        *found = true;        // indicate that we found solution
        sols[ gid ] = seed;   // here is solution
    }
    
    //只用一个线程,用于测试rand
    extern "C"
    __global__
    void mt_rand_one( uint32_t seed, int step, int min, int max, uint32_t* ret)
    {
    
      uint32_t x = mt_rand_mm( seed, step, min, max);
      *ret = x;
    }
    

    最后是clojure的host代码:

    (require '[uncomplicate.clojurecuda.core :refer :all])
    (require '[uncomplicate.commons.core :refer :all])
    
    ;; 初始化设备
    (init)
    (device-count)
    ;; => 1
    ;; 显卡个数
    
    ;; 选择gpu和上下文,device的参数为显卡id
    (def my-gpu (device 0))
    (def ctx (context my-gpu))
    
    ;; 设置当前上下文
    (current-context! ctx)
    (def php-rand (slurp "./mt_rand_me.cu"))
    
    ;; 编译并导出函数,本机要安装CUDA toolkit
    ;; 具体过程参考https://dragan.rocks/articles/18/Interactive-GPU-Programming-1-Hello-CUDA
    (def rand-program (compile! (program php-rand)))
    (def rand-module (module rand-program))
    (def mt-rand-find(function rand-module "mt_rand_find"))
    (def mt-rand-one(function rand-module "mt_rand_one"))
    
    (def threads 2000000) ;; GPU线程数量
    (def size threads) ;; 保存GPU计算结果的数组大小,等同于GPU线程数量
    
    (def bool-size 1) ;; boolean类型的长度,c内存表示
    (def uint-size 4) ;; uint 类型的长度,c内存表示
    (def max-rand (int (dec (Math/pow 2 31))))
    
    (defn find-rand-one-block
      [n values & [opts]]
      (let [found (mem-alloc bool-size)
            _ (memcpy-host! (byte-array [0]) found)
            min (get opts :min 0)
            max (get opts :max max-rand)
    
            ;; 复制values到gpu内存
            values-len (count values)
            values-match (mem-alloc (* uint-size values-len))
            _ (memcpy-host! (int-array values) values-match)
    
            ;; 分配保存结果的数组的内存
            sols-len (* size uint-size)
            sols (mem-alloc sols-len)
    
            _ (launch! mt-rand-find (grid-1d size)
                       (parameters n
                                   values-match
                                   values-len
                                   min
                                   max
                                   found
                                   sols))
            ret-found (-> (memcpy-host! found (byte-array 1))
                          first)
            ret-sols (memcpy-host! sols (int-array size))]
        ;; 释放GPU内存
        (release sols)
        (release values-match)
        (release found)
        (when-not (zero? ret-found)
          (println "block:" n "ret found:" ret-found)
          (filter (comp not zero?) ret-sols))))
    
    (def max-blocks (/ 0xffffffff (+ size 1)))
    (defn find-all-seed
      "在32位地址空间中寻找匹配vals序列的种子"
      [vals & [opts]]
      (doseq [n (range (int (Math/ceil max-blocks)))]
        (let [rs (find-rand-one-block (* size n) vals opts)]
          (when rs
            (doseq [r rs]
              (println "found:" (Integer/toUnsignedString r)))))))
    
    (time (find-all-seed [617664816]))
    ;; block: 28000000 ret found: 1
    ;; found: 28833322
    ;; block: 368000000 ret found: 1
    ;; found: 368690622
    ;; block: 2398000000 ret found: 1
    ;; found: 2398389484
    ;; "Elapsed time: 15790.4927 msecs"
    
    (defn rand-one
      [seed & [opts]]
      (let [step (get opts :step 0)
            ret (mem-alloc 50)
            min (get opts :min 0)
            max (get opts :max max-rand)
            _ (launch! mt-rand-one (grid-1d 1)
                       (parameters seed step min max ret))
            ret-sols (memcpy-host! ret (int-array 1))]
        (release ret)
        (first ret-sols)))
    
    (comment
      ;; 测试是否和php中生成的结果一致
     (rand-one 1234 {:step 0})
    ;; => 1741177057
    
     (rand-one 1234 {:step 1})
    ;; => 1068724585
    
     ;; 可以看到和php中生成的结果一致,*注意*,在php7.1以上mt_srand用MT_RAND_PHP参数,
     ;; 用于生成和php5采用的mt_rand算法一致的结果
    ;; php -r 'mt_srand(1234,MT_RAND_PHP); echo mt_rand()."---".mt_rand(). "
    ";'
    ;; 1741177057---1068724585
    
     )
    
    ;; 清理环境
    (release rand-module)
    (release rand-program)
    (release ctx)
    

    php_mt_seed的结果:

    $ time php_mt_seed 617664816
    Found 3, trying 3690433088 - 3690987519, speed 112249734 seeds per second
    seed = 28833322
    
    seed = 368690622
    
    seed = 2398389484
    
    Found 3
    Found 3, trying 4261412864 - 4294967295, speed 112251741 seeds per second
    real    0m38.282s
    user    0m0.000s
    sys     0m0.000s
    

    下面测试随机字符串序列,如下php代码:

    <?php
    function randStr($l=4){
        $ret="";
        $chars="qwertyuiopasdfghjklzxcvbnm1234567890QWERTYUIOPASDFGHJKLZXCVBNM";
        for($i=0;$i<$l;$i++)    $ret.=$chars[mt_rand(0,strlen($chars))];
        return $ret;
    }
    
    mt_srand( 6688991, MT_RAND_PHP );
    echo randStr(5);
    ?>
    

    randStr结果是:A4MFO

    现在根据randStr的结果猜解出seed,先用clojure来实现:

    ;; 首先要获得随机序列转换为字符串的转换表,才能把字符串转换回随机数序列。
    ;; 这里假定已经有了这个表
    (def chars "qwertyuiopasdfghjklzxcvbnm1234567890QWERTYUIOPASDFGHJKLZXCVBNM")
    
    (defn rand-strs->rand-seq
      "把随机结果字符串转换回随机数字序列"
      [strs]
      (mapv #(->> (str %1)
                  (.indexOf chars))
            strs))
    
    ;; randStr结果
    (def result "A4MFO")
    
    (def rand-seq (rand-strs->rand-seq result))
    rand-seq
    ;; => [46 29 61 49 44]
    
    (def max-r (count chars))
    max-r
    ;; => 62
    
    (time (find-all-seed rand-seq {:max max-r}))
    ;; block: 6000000 ret found: 1
    ;; found: 6688991
    ;; block: 200000000 ret found: 1
    ;; found: 201323601
    ;; block: 3658000000 ret found: 1
    ;; found: 3658569207
    ;; block: 3892000000 ret found: 1
    ;; found: 3893347456
    ;; "Elapsed time: 20043.783126 msecs"
    

    随着要测试的序列增大,速度会降低,而且max值比较小,会有很多个符合结果序列的种子。

    下面测试php_mt_seed的效果:

    $ time php_mt_seed 46 46 0 62 29 29 0 62 61 61 0 62 49 49 0 62 44 44 0 62
    Found 3, trying 3690Pattern: EXACT-FROM-63 EXACT-FROM-63 EXACT-FROM-63 EXACT-FROM-63 EXACT-FROM-63
    
    seed = 6688991
    
    seed = 201323601
    
    seed = 3658569207
    
    seed = 3893347456
    
    Found 4
    Found 4, trying 4261412864 - 4294967295, speed 107807449 seeds per second
    real    0m39.848s
    user    0m0.000s
    sys     0m0.000s
    

    可以看到就算测试的序列增加,php_mt_seed的速度也比较稳定,可能它是在一次rand计算中比较整个序列的,所以不管序列有多长,对速度没什么影响,而我这里的GPU代码,序列中每个值的比较都要调用mt_rand,而mt_rand再根据step去计算结果,因此mt_rand_find的执行时间会随着序列长度的增加而增加,只做个简单的比较,这里就不再优化算法实现了。

    3 总结

    可以看到GPU的速度非常快,对于一个数字的计算,本机测试大概16秒,而php_mt_seed大概要38秒。主要是因为GPU的线程数量很多,示例中开了2000000线程,我只是无脑开,对于CUDA编程没有研究过,通过这次实验,体验下GPU编程。

    作者: ntestoc

    Created: 2019-03-17 周日 09:00

  • 相关阅读:
    struts2+jpa+spring 泛型版小结
    PasswordEncoder
    父窗口 子窗口
    Powerdesigner的PDM(物理数据模型)生成数据库及逆向工程(将现有的数据库生成PDM)
    js 正则表达式
    <aop:config>
    CJDBC
    struts2取值
    mysql启动错误1067的解决
    杂碎
  • 原文地址:https://www.cnblogs.com/ntestoc/p/10535488.html
Copyright © 2020-2023  润新知