• 写一个百度url收录检测的web_php小工具


    为了网站做SEO的需要,统计网站的页面收录率,写个小工具,目前虽然还不完善,但很想跟大家分享一下。

    使用方法,可以先用sitemapX软件,生成网站链接的列表,这个列表比较靠谱,因为网站内部能链接到的网站基本都是比较重要的页面。

    把生成的列表粘贴进小工具,查询即可。--目前如果一次请求过多,会出现被百度屏蔽的情况,目前还没解决,大家有啥好的方案可以分享来哈!

    测试地址:http://zhidong10.com/site_rs_baidu/baidu.php

    以后会增加:收录、未收录筛选,手动设置请求参数,数据导出,未收录重新检测等。2013-03-23

    废话不多说,上代码。

    发送请求、数据展现:

    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
    <html xmlns="http://www.w3.org/1999/xhtml">
        <head>
            <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
            <title>百度收录查询</title>
            <link href="css/reset.css" rel="stylesheet" type="text/css" />
            <link href="css/ui.css" rel="stylesheet" type="text/css" />
            <script type="text/javascript" src="js/jquery-1.7.1.min.js"></script>
        </head>
    
        <body>
            <h1 class="tit">百度收录查询</h1>
            <div class="enterbox">
                <textarea id="TextBox1" ></textarea>
            </div>
            <div class="sub_btn"><a href="javascript: testGG()" >提交</a></div>
            <div class="resutl_tit">查询结果:</div>
            <div id="maindiv">
                      <div id="div3" style=" 750px;"><!--<a href="javascript:void(0);" title="点击重新查询"><img src="image/rst.gif"></a><span id="rehome">←重新查询</span>--> 
                    <!--<div onclick="_export()" id="div_export"></div>-->
                    <div id="div4"><span id="num_total">总:</span><span id="num_total2">0</span><span id="num_sl">收录:</span><span id="num_sl2">0</span><span id="num_nosl">未收录:</span><span id="num_nosl2">0</span><span id="percent">收录率:</span><span id="num_pct"></span></div>
                </div>
                <table class="tb1">
                    <colgroup>
                        <col id="col1">
                            <col id="col2" />
                            <col id="col3" />
                            <col id="col4" />
                            <col id="col5" />
                    </colgroup>
                    <thead id="thead">
                        <tr>
                            <th scope="col"><span>No.</span></th>
                            <th scope="col"><span>标题</span></th>
                            <th scope="col"><span>网址</span></th>
                            <th scope="col" class="th2"><div id="div5"><img src="image/sort.gif" id="s1"></div>
                                <span id="sp">收录</span></th>
                            <th style="border-bottom:1px solid #a5a5a5;border-right:1px solid #a5a5a5" scope="col" class="kz"><span>快照</span></th>
                        </tr>
                    </thead>
                </table>
            </div>
            <script type="text/javascript">
                var count = {
                    total:0,
                    time : '',
                    url_list:"",
                    star:0
                };
                function get_rs(url, num){
                    var url =url;
                    var num =num;
                    $.ajax({
                        url: "rs.php?site="+url+"&time="+ new Date().getTime(),
                        dataType:"json",
                        success: function(data){
                            var title = $("#tr"+num).find(".title");
                            var rs = $("#tr"+num).find(".yo div");
                            var kz = $("#tr"+num).find(".kz div");
                            if(!data) {
                                title.html("数据异常");
                                console.log("data:"+data)
                                return;
                            }
                            if(data.error == 1){
                                title.html(data.title);      
                                rs.attr("class","iconwarn");
                                kz.attr("class","");
                                kz.html("-"); 
                            }
                            if(data.error == 0){
                                title.html(data.title);      
                                rs.attr("class","iconok");
                                kz.attr("class","");
                                kz.html(data.date); 
                            }
                            $("#num_sl2").html($(".yo .iconok").length);
                            $("#num_nosl2").html($(".yo .iconwarn").length);
                            var per =  Math.floor(($(".yo .iconok").length/count.total)*100)
                            $("#num_pct").html(per+"%")
                        },
                        error:function(){
                        
                        }
                    });
                }
    
                function testGG(){
                    //清除旧数据
                    $(".tb1 tbody tr").remove();
                    count = {
                        total:0,
                        time : '',
                        url_list:"",
                        star:0
                    };
                    $("#num_total2").html("0");
                    $("#num_sl2").html("0");
                    $("#num_nosl2").html("0"); 
                    $("#num_pct").html("");
                
                    var txt = document.getElementById("TextBox1");
                    count.url_list = txt.value.split("\n");
                    count.total = count.total + count.url_list.length;
                    $("#num_total2").html(count.total);
                    count.time =  setInterval(function(){
                        run();
                    }, 1000);//1000毫秒执行一次,防止百度屏蔽攻击
                }
                function run(){
                    var len = count.url_list.length;
                    if(count.star < len){
                        var order =  (count.star%2==0?"odd1":"odd");
                        var url = $.trim(count.url_list[count.star]);
                        url = url.substr(0,7).toLowerCase()=="http://"?url:"http://"+url;
                        var str = '';
                        str = '<tr class="'+order+'" id="tr'+count.star+'"> <td class="num">'+(count.star+1)+'</td><td class="title">-</td><td class="url2"><a href="'+url+'" target="_blank">'+url+'</a></td><td class="yo"><div class="iconloading"></div></td><td class="kz" style="border:1px;"><div class="iconloading">  </div></td></tr>';
                        $(".tb1").append(str);                                                   
                        get_rs(url,count.star);
                        count.star++;
                    }else{
                        clearInterval(count.time);
                    }
                
                }
            </script>
        </body>
    </html>

    请求处理

    <?php
    
    //$site = 'http://xsh.changyou.com/dhsh/events/events.shtml';
    $site = $_GET['site'];
    
    function fs($r, $t) {//无法匹配到时,返回
        $finfo = array('error' => 1, 'title' => $t);
        $fInfo = json_encode($finfo);
        if (!$r) {
            return $fInfo;
        }
        if (!$r[0]) {
            return $fInfo;
        }
        if (!$r[0][0]) {
            return $fInfo;
        }
    }
    
    function str_substr($start, $end, $str) { // 字符串截取函数     
        $temp = explode($start, $str, 2);
        $content = explode($end, $temp[1], 2);
        return $content[0];
    }
    
    function curl_file_get_contents($durl) {
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $durl);
        curl_setopt($ch, CURLOPT_TIMEOUT, 20);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        $rs = curl_exec($ch);
        curl_close($ch);
        return $rs;
    }
    
    //$buffer = @file_get_contents("http://www.baidu.com/s?wd=" . $site);   
    $buffer = @file_get_contents("http://www.baidu.com/s?wd=" . $site); //返回输出文本流
    $d = @file_get_contents($site);
    if (!$d) {
        $s = "路径不存在";
        $finfo = array('error' => 1, 'title' => $s);
        $fInfo = json_encode($finfo);
        echo $fInfo;
        return;
    } else {
        $s = str_substr("<title>", "</title>", $d);
    }
    preg_match_all("/<table[^>]+>.+?<\/table>/", $buffer, $r);
    $tmp = fs($r, $s);
    if ($tmp) {
        echo $tmp;
        return;
    }
    preg_match_all("/<span[^>]+>.+?<\/span>/", $r[0][0], $r);
    $tmp = fs($r, $s);
    if ($tmp) {
        echo $tmp;
        return;
    }
    //$txt = '<span class="g">  <b>xsh.changyou.com</b>/<b>dhsh</b>/<b>events</b>/events... 2013-1-27  </span>';
    //echo $txt;
    preg_match_all("/(\d{4}-\d+-\d+)/", $r[0][0], $r);
    $tmp = fs($r, $s);
    if ($tmp) {
        echo $tmp;
        return;
    }
    $info = array('error' => 0, 'date' => $r[0][0], 'title' => $s);
    $userinfo = json_encode($info);
    echo $userinfo;
    ?>

    svn: http://php-rss-ajax.googlecode.com/svn/trunk/site_rs_baidu

    源码下载:猛击此处

    反馈可以留言拍砖,或者发邮箱里:zhidong10@foxmail.com

  • 相关阅读:
    B1028人口普查
    B1004成绩排名
    B1041考试座位号
    A1009 Product of Polynomials多项式相乘
    A1002 A+B for Polynomials 多项式相加
    B1010一元多项式求导
    A1065 A+Band C(64 bit)
    A1046 Shortest Distance 最短路径
    排序
    windows 平台使用wireshark命令行抓包
  • 原文地址:https://www.cnblogs.com/zhidong123/p/2964045.html
Copyright © 2020-2023  润新知