• php多线程thread开发与应用的例子


    Php多线程的使用,首先需要PHP5.3以上版本,并安装pthreads PHP扩展,可以使PHP真正的支持多线程,扩展如何安装请自行百度

    PHP扩展下载:https://github.com/krakjoe/pthreads

    PHP手册文档:http://php.net/manual/zh/book.pthreads.php

    在安装好扩展之后,就可以运用多线程了,下面贴个通过搜索结果抓取百度网盘内容的代码:

    <?php
    include 'include/CurlLoad.class.php'; // 引入读取库
    /**
     * 多线程抓取内容
     * @param array $url 待抓取URL列表集合
     * @return 成功返回指定内容,失败返回NULL
     */
    function vget($url) {
     $ret = BaiduSRLinksGet ( $url, 1 ); // 获取结果列表地址
     if ($ret != null) {
      if (array_key_exists ( "links", $ret )) {
       $infos = array ();
       $number = count ( $ret ['links'] );
       for($i = 0; $i < $number; $i ++) {//循环创建线程对象
        $thread_array [$i] = new baidu_thread_run ( $ret ['links'] [$i] );
        $thread_array [$i]->start ();
       }
       foreach ( $thread_array as $thread_array_key => $thread_array_value ) {//检查线程是否执行结束
        while ( $thread_array [$thread_array_key]->isRunning () ) {
         usleep ( 10 );
        }
        if ($thread_array [$thread_array_key]->join ()) {//如果执行结束,取出结果
         $temp = $thread_array [$thread_array_key]->data;
         if ($temp != null)
          $infos ['res'] [] = $temp;
        }
       }
       $infos ['pages'] = $ret ['pages'];
       $infos ['status'] = "1";
      } else
      $infos = null;
     } else
      $infos = null;
     return $infos;
    }
    /**
     * 获取百度搜索结果列表URL
     *
     * @param string $url
     *         搜索结果页URL
     * @param int $format
     *         默认$format=0,获取默认地址;$format=1获取跳转后真实地址
     * @return NULL multitype:array()
     */
    function BaiduSRLinksGet($url, $format = 0) {
     $html = CurlLoad::HtmlGet ( $url ); // 获取页面
     if ($html == null)
      return null;
     try {
      preg_match_all ( "/"url":"(?<links>.*)"}/", $html, $rets ); // 搜索结果链接筛选
      if (! array_key_exists ( 'links', $rets )) // 如果数组中不包含Links键名,表示获取失败
       return null;
      $ret = array ();
      if ($format == 1) {
       $number = count ( $rets ['links'] );
       for($i = 0; $i < $number; $i ++) {
        $headr_temp = CurlLoad::Get_Headers ( $rets ['links'] [$i], 1 ); // 通过headr获取真实地址
        if (array_key_exists ( "Location", $headr_temp ))
         $ret ['links'] [$i] = $headr_temp ['Location'];
        else
         $ret ['links'] = $rets ['links'];
       }
      } else
       $ret ['links'] = $rets ['links'];
      preg_match_all ( '/href="?/s?wd=site%3Apan.baidu.com%20(?<url>.+?)&ie=utf-8">/', $html, $out );
      unset ( $out ['url'] [0] );
      $number = count ( $out ['url'] );
      for($i = 1; $i < $number; $i ++) {
       preg_match_all ( '/&pn=(.*)/', $out ['url'] [$i], $temp );
       $ret ['pages'] [$temp [1] [0] / 10] = base64_encode ( $out ['url'] [$i] );
      }
      return $ret;
     } catch ( Exception $e ) {
      WriteLog ( $e );
      return null;
     }
    }
    /**
     * 百度网盘资源信息获取
     *
     * @param string $url
     *         网盘资源页URL
     * @return NULL array
     */
    function PanInfoGet($url) {
     $html = CurlLoad::HtmlGet ( $url ); // 获取页面
     if ($html == null)
      return null;
     try {
      if (preg_match_all ( "/文件名:(?<name>.*) 文件大小:(?<size>.*) 分享者:(?<user>.*) 分享时间:(?<date>.*) 下载次数:(?<number>[0-9]+)/", $html, $ret ) == 0)
       return null;
      $rets ['name'] = $ret ['name'] [0];
      $rets ['size'] = $ret ['size'] [0];
      $rets ['user'] = $ret ['user'] [0];
      $rets ['date'] = $ret ['date'] [0];
      $rets ['number'] = $ret ['number'] [0];
      $rets ['link'] = $url;
      return $rets;
     } catch ( Exception $e ) {
      WriteLog ( $e );
      return null;
     }
    }
    function WriteLog($str) {
     $file = fopen ( "../error.log", "a+" );
     fwrite ( $file, "Warning:" . date ( "Y/m/d H:i:s" ) . ":" . $str . "rn" );
     fclose ( $file );
    }
    /**
     * 多线程抓取对象
     * @author MuXi
     *
     */
    class baidu_thread_run extends Thread {
     public $url;
     public $data;
     public function __construct($url) {
      $this->url = $url;
     }
     public function run() {
      if (($url = $this->url)) {
       $this->data = PanInfoGet ( $url );//线程执行方法
      }
     }
    }
    ?>
  • 相关阅读:
    Python十大经典算法之选择排序
    在js中修改样式带 !important 的样式
    vscode 使用 ejs 语法有红色错误提示线
    mysql 报 'Host ‘XXXXXX’ is blocked because of many connection errors'
    字符编码
    Canal.adapter报错
    Windows 域控配置时间同步
    Docker清理日志脚本
    Docker快速部署clickhouse
    Windows批处理一键添加hosts文件
  • 原文地址:https://www.cnblogs.com/kenshinobiy/p/4671251.html
Copyright © 2020-2023  润新知