• 我自己开发的工具,打印出百度贴吧某用户发表过的所有帖子


    <html>
    <meta charset="UTF-8"/> 
    <style>
    a {
    	color: green;
    	font-family: arial;
    	font-weight: bold
    }
    </style>
    <body>
    	<div id="container"></div>
    </body>
    <script src="jquery1.7.1.js">
    /* Jerry 2017-02-06 14:58PM update
     should use C:MyAppChromeApplicationchrome.exe --user-data-dir="C:/yaas" --disable-web-security
     and then FIRST LOG ON BAIDU successfully!!!!
    
    */
    
    </script>
    <script>
    /* Jerry 2017-02-05 5:54PM
    这个警告的意思是说:请求的资源可能会被(扩展/或其他什么机制)屏蔽掉。
    
    之所以会出现这个警告,是因为去获取该资源的请求其实并(还)没有真的发生,所以 Header 里显示的是伪信息,直到服务器真的有响应返回,这里的 Header 信息才会被更新为真实的。不过这一切也可能不会发生,因为该请求可能会被屏蔽。比如说 AdBlock 什么的,当然了不全是浏览器扩展,具体情况具体分析了。
    
    对了,别忘了用 chrome://net-internals 来帮助你查找被屏蔽的请求以及可能的原因。
    */
    
    var PREFIX = "http://tieba.baidu.com";
    var START = "http://tieba.baidu.com/i/i/my_tie";
    //var START = "http://www.baidu.com";
    var POST = {};
    var TOTAL = 0;
    var SORTED = [];
    
    function getTotalCount(collection){
    	var count = 0;
    	for( bar in collection){
    		if( !collection.hasOwnProperty(bar))
    			continue;
    		var postList = collection[bar];
    		count += postList.length;
    	}
    	return count;
    }
    
    function shouldEnd(previousCount) {
    	TOTAL = getTotalCount(POST);
    	console.log("pre: " + previousCount + " total: " + TOTAL);
    	return ( previousCount == TOTAL );
    }
    
    function main() {
    	var html = getPostByAJAX(START);
    	handleLiChildren(html);
    	var page = 2;
    	while(1){
    		var prevCount = getTotalCount(POST);
    		var task = START + "?&pn=" + page;
    		var html = getPostByAJAX(task);
    		handleLiChildren(html);
    		page++;
    		/*
    		if( page >=2 )
    			break;*/
    		if( shouldEnd(prevCount) )
    			break;
    	}
    	sort();
    	generate();
    }
    
    function handleLiChildren(resultString){
    	var htmlDom = $(resultString);
    	var liChildren = $("li", htmlDom);
    	$.each( liChildren, function(i, value) {
    		// if( value.className.indexOf("nav_item") != -1 )
    		if( value.className)
    			return true;
    		if( value.innerText == "我回复的" || value.innerText == "我的精品")
    			return true;
    		var detail = parseDetail(value);
    		insertPost(detail);
    	});
    }
    
    /*
    <ul>
    	<li>
    		<cite>2016</cite>
    		 <a href="/f?kw=%E5%A4%A7%E9%82%91" >尿素氮</a>
    	</li>
    	<li>
    		<cite>2015</cite>
    		 <a href="/f?kw=%E5%A4%A7%E9%82%91" >尿素氮2</a>
    	</li>
    </ul>
    */
    
    function getpostSource(post) {
    	var source = "<li><cite>";
    	source += post.date + "/<cite>";
    	source += '<a href="' + post.url + '">' + post.postTitle + "</a></li>";
    	return source; 
    }
    
    function getBarPostsSource(barName, posts) {
    	var source = '<h1>' + barName + ': ' + posts.length + '个</h1>';
    	source += "<ul>";
    	for( var i = 0; i < posts.length; i++){
    		var post = posts[i];
    		source += getpostSource(post);
    	}
    	source += "</ul>";
    	return source;
    }
    
    function sortNumber(a,b){
    	return b.size - a.size; 
    }
    
    function sort() {
    	for( barName in POST) {
    		if( !POST.hasOwnProperty(barName))
    			continue;
    		var post = {
    			name: barName,
    			size: POST[barName].length
    		};
    		SORTED.push(post);
    	}
    	SORTED.sort(sortNumber);
    }
    
    function generate(){
    	var div = document.getElementById("container");
    	var source = "总共帖子: " + TOTAL + "个";
    	for( var i = 0; i < SORTED.length; i++){
    		var posts = POST[SORTED[i].name];
    		source += getBarPostsSource(SORTED[i].name, posts);
    	}
    	div.innerHTML = source;
    }
    
    $(function(){ 
    	main();
    }); 
    
    
    function getPostByAJAX(requestURL){
       var html = $.ajax({
      	url: requestURL,
      	xhrFields: {
        // The 'xhrFields' property sets additional fields on the XMLHttpRequest.
        // This can be used to set the 'withCredentials' property.
        // Set the value to 'true' if you'd like to pass cookies to the server.
        // If this is enabled, your server must respond with the header
        // 'Access-Control-Allow-Credentials: true'.
        withCredentials: true
      },
      	async: false}).responseText; 
       debugger;
       return html;
    }
    
    /*
    function getPostByAJAX(requestURL){
     var settings = {
            type: "GET",
            crossOrigin: true,
            url:requestURL,
            error: function(XHR,textStatus,errorThrown) {
                alert ("XHR="+XHR+"
    textStatus="+textStatus+"
    errorThrown=" + errorThrown);
            },
            success: function(data,textStatus) {
                debugger;
            },
            headers: {
                "Access-Control-Allow-Origin":"http://tieba.baidu.com",
                "Access-Control-Allow-Headers":"X-Requested-With"
            }
        };
        $.ajax(settings);
    }
    */
    /*
    function getPostByAJAX(requestURL){
       var html = $.ajax({
      	url: requestURL,
      	dataType:"jsonp",
      	xhrFields: {
        // The 'xhrFields' property sets additional fields on the XMLHttpRequest.
        // This can be used to set the 'withCredentials' property.
        // Set the value to 'true' if you'd like to pass cookies to the server.
        // If this is enabled, your server must respond with the header
        // 'Access-Control-Allow-Credentials: true'.
        withCredentials: true
      },
      	async: false}).responseText; 
       return html;
    }
    */
    function insertPost(postDetail){
    	if( !POST[postDetail.barName]){
    		POST[postDetail.barName] = [];
    	}
    	POST[postDetail.barName].push(postDetail);
    }
    
    function parseDetail(liNode) {
    	var cite = $("cite", liNode);
    	var date = cite[0].innerHTML; // value1
    	var tds = $("td", liNode);
    	var a1 = $("a", tds[0]);
    	var barName = a1[0].innerHTML; // value2
    	var a2 = $("a", tds[1]);
    	var postTitle = a2[0].innerHTML; // value3
    	var url = PREFIX + a2.attr("href");
    	return {
    		date: date,
    		barName: barName,
    		postTitle: postTitle,
    		url: url
    	}
    }
    function getTestData(){
    	return '<!DOCTYPE html><html><body><div class="wrap1"><div class="wrap2"><div ' + 
    	' id="main_wrapper" class="main_wrapper"><div id="main_back_img"><div ' + 
    	' id="main_back_bottom"><div id="container" class="ibody clearfix"><div><div ' + 
    	' id="content"><div class="simple_block_container"><ul><li><cite>2-16</cite>' + 
        '<div class="wrap_container"><table><tr><td class="nowrap">在<a style="" ' +
        ' href="/f?kw=%E5%A4%A7%E9%82%91" target="_blank">ANDROID吧</a> 发贴</td><td class="wrap">' + 
        '<a href="/p/4356641476?pid=84106363194&amp;cid=0#841063631" class="thread_title"  target="_blank">硬盘</a></td>' +
    	'</tr></table></div><div class="clear"></div></li>' + 
        '<li></li><li></li></ul></div></div></div></div></div></div></div></div></body></html>';
    }
    </script>
    </html>
    

    要获取更多Jerry的原创文章,请关注公众号"汪子熙":

  • 相关阅读:
    为自己的开篇
    软考程序员笔记
    centos php7 安装mysqli扩展心得
    php判断访问协议是否是https
    go语言新建多维map集合
    获取contenteditable区域光标所在位置信息
    ckeditor中 config.js等通过ckeditor.js引入文件手动修改方法
    Vue使用——v-for循环里面使用v-if判断显示数据
    数据库关联字段设置
    Spring Jpa 自动建表——时间字段设置
  • 原文地址:https://www.cnblogs.com/sap-jerry/p/12232805.html
Copyright © 2020-2023  润新知