• 采集器


    <meta http-equiv=Content-Type content="text/html;charset=gbk">
    <script src="./js/jquery.js" type="text/javascript"></script>
    <script src="./js/jquery.validate.js" type="text/javascript"></script>
    <script src="./js/jquery.metadata.js" type="text/javascript"></script>
    <script type="text/javascript">
    </script>
    <?php
    $url="http://www.jy.com.cn/PreSellCert_List.do?project=%B3%A4%BD%AD%B9%FA%BC%CA";
    $str=file_get_contents($url);
    $str=compress_html($str);
    /*$str = 'http://www.youku.com/show_page/id_ABCDEFG.html';
    $matches = array();
    */
    $regex='/<span class="font_bold font_blue font_14px"><a href="PreSellCert_Detail.do?pscid=(.*)">.*(<span class="font_12px">(.*)</span>)</a></span></td>'
    .'.*<span class="font_16px font_bold">(.*)</span>套</td>.*批准日期:(.*)</td></tr>.*<span class="font_16px font_bold">(.*)</span>套</td>/U';
    //$str="adfadfadf预售许可证:123123</span>)";
    if(preg_match_all($regex, $str, $matches,PREG_SET_ORDER)){
    foreach($matches as $val){
    $saleurl="http://www.jy.com.cn/ifrm_PreSellCert_SaleStat.do?pscid=".$val[1];
    $salestr=file_get_contents($saleurl);
    //print_R($salestr);exit;
    $salestr=compress_html($salestr);
    //$regex='/<td align="right">(.*)</td>/U';
    $regex='/<tr><td align="right">(.{1,30})</td><td align="right">(.*)</td><td align="right">(.*)</td><td align="right">(.*)</td><td align="right">(.*)</td><td align="right">(.*)</td><td align="right">(.*)</td><td align="right">(.*)</td><td align="right">(.*)</td><td align="right">(.*)</td><td align="right">(.*)</td><td align="right">(.*)</td><td align="center">(.*)</td></tr>/U';
    if(preg_match_all($regex, $salestr, $salematches,PREG_SET_ORDER)){
    print_R($salematches);exit;
    }
    }
    }

    function compress_html($string) {
    $string = str_replace(" ", '', $string); //清除换行符
    $string = str_replace(" ", '', $string); //清除换行符
    $string = str_replace(" ", '', $string); //清除制表符
    $pattern = array (
    "/> *([^ ]*) *</", //去掉注释标记
    "/[s]+/",
    "/<!--[^!]*-->/",
    "/" /",
    "/ "/",
    "'/*[^*]**/'"
    );
    $replace = array (
    ">\1<",
    " ",
    "",
    """,
    """,
    ""
    );
    return preg_replace($pattern, $replace, $string);
    }
    ?>

  • 相关阅读:
    JSON 使用
    JSON 语法
    JSON 简介
    什么是 JSON ?
    PHP 数组排序
    PHP 数组
    swan.after
    Page.after
    App.after
    AOP说明
  • 原文地址:https://www.cnblogs.com/hechunhua/p/3673702.html
Copyright © 2020-2023  润新知