• 分享一段下载QQ用户日志到本地的代码 [转日志不用进空间啦,输入QQ号就可以下载对方任意一篇日志了]


    很悲剧的说,又是被冻醒的,苦逼的程序员生活.冻手冻脚的敲代码,真心伤不起.

    继上次图解分析的腾讯空间日志真实路径后,闲着没事就写了段下载腾讯空间日志的代码.这年头转日志不用进空间啦,输入QQ号就可以下载对方任意一篇日志了.

    当然你开心就全部下载喽.

    实习方式很简单,简单的有些搞笑,大侠们勿喷啊, 但是功能还是搞定了.至于优化或者还有更好的方法后面再想想.贴出来与大家分享一下.

    package org.crawler.picture.dennisit.action;
    
    import java.io.BufferedReader;
    import java.io.BufferedWriter;
    import java.io.File;
    import java.io.FileWriter;
    import java.io.InputStream;
    import java.io.InputStreamReader;
    import java.net.URL;
    import java.net.URLConnection;
    import java.util.ArrayList;
    import java.util.HashSet;
    import java.util.List;
    import java.util.Set;
    
    
    /**
     *
     *  @version : 1.1
     *  
     *  @author  : 苏若年    <a href="mailto:DennisIT@163.com">发送邮件</a>
     *    
     *  @since      : 1.0        创建时间:    2013-1-2        下午11:56:55
     *     
     *  @function: 日志下载备份类
     *
     */
    
    public class BlogDownloadAction extends DownloadAction{
    
        
        
        /**
         * 创建每一页的用户访问数量
         * @param qq  用户QQ号码
         * @param pos 其实页码
         * @param num 每页显示数量  默认为15,根据实际情况定
         * @return
         */
        public String createURLForPage(String qq,int pos,int num){
            String baseStrBegin = "http://b11.qzone.qq.com/cgi-bin/blognew/get_abs?hostUin="+qq;
            String baseStrcont1 = "&blogType=0&cateName=&cateHex=&statYear=2013&reqInfo=7&pos=" + pos;
            String baseStrcont2 = "&num=" + num +"&sortType=0&absType=0&source=0&rand=0.8141584321856499&g_tk=5381&verbose=1&ref=qzone";
            return baseStrBegin + baseStrcont1 + baseStrcont2;
        }
        
        
        /**
         * 获取每一页的日志ID集合
         * @param qq
         * @param page
         * @param num
         * @return
         */
        public List<String> getBlogIDListForEachPage(String qq,int page,int num){
            List<String> lst = new ArrayList<String>();
            String diaryURL = createURLForPage(qq,page,num);
            //System.out.println(diaryURL);
            URL url;
            try {
                url = new URL(diaryURL);
                URLConnection urlConnection = url.openConnection();
                urlConnection.addRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)");
                InputStream is = url.openStream();
                BufferedReader bufferReader = new BufferedReader(new InputStreamReader(is));
                String rLine = null;
                
                int countNum = 9;
                
                while((rLine=new String(bufferReader.readLine()))!=null){
                    countNum++;
                    if(rLine.contains("cateInfo")){
                        break;
                    }
                    if((countNum-18)%13==0){
                                //逐行读取,获取每一个日志对象
                        String blogId = "";
                        if(rLine.contains("blogId")){
                            String temp = rLine ;
                            if(temp.contains("{")){
                                temp = rLine.substring(8);
                            }
                            blogId = getBlogINFO(temp);
                            lst.add(blogId);
                        }
                    }
                    
                }
                
            }catch (Exception e) {
                // TODO: handle exception
            }
            return lst;
        }
    
        public  String getBlogINFO(String str){
            String[] strArray = str.split(":|,");
            /*for(int i=0; i<strArray.length; i++){
                System.out.println("strArray[" +i+"]=" + strArray[i]);
            }*/
            if(strArray.length!=0){
                return strArray[1];
            }
            return null;
        }    
        
        /**
         * 获取每一页用户的日志URL集合
         * @param qq
         * @param page
         * @return
         */
        public List<String> getBlogURLListForEachPage(String qq, int page,int num){
            List<String> idlst = getBlogIDListForEachPage(qq,page,num);
            List<String> urlSet = new ArrayList<String>();
            String blogUrl = "http://user.qzone.qq.com/"+qq+"/blog/";
            for(String id:idlst){
                urlSet.add(blogUrl+id);
            }
            return urlSet;
        }
        
        /**
         * 第一页用于发送数据包,获取回应的数据包信息,根据回应包信息检测日志总数
         * @param qq
         * @return
         */
        public String createFirstPageURL(String qq){
            return createURLForPage(qq,0,15);
        }
        
        
        /**
         * 获取日志总数核心方法
         * @return
         */
        public int getBlogCount(String qq){
            int blogCount = 0;
            String diaryURL = createFirstPageURL(qq); //第一页用于发送数据包,获取回应的数据包信息,根据回应包信息检测日志总数
            URL url;
            try {
                url = new URL(diaryURL);
                URLConnection urlConnection = url.openConnection();
                urlConnection.addRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)");
                InputStream is = url.openStream();
                BufferedReader bufferReader = new BufferedReader(new InputStreamReader(is));
                String rLine = null;
                while((rLine=bufferReader.readLine())!=null){
                    
                    if(rLine.contains("totalNum")){
                        blogCount = Integer.parseInt(BlogINFOUtil.getBlogINFO(rLine));        //获取日志总数
                        break;
                    }
                }
            }catch (Exception e) {
                // TODO: handle exception
            }
            return blogCount;
        }
        
        /**
         * 获取用户的所有日志的日志ID
         * @param qq
         */
        public List<String>  allQQBlogID(String qq){
            List<String> allBlogID = new ArrayList<String>();
            int count = getBlogCount(qq);
            int pageCount = (count%15==0)?count/15:(count/15+1) ;
            
            for(int i=0; i<pageCount;i++){
                List<String> lsts ;
                if(i==pageCount-1){
                    lsts = getBlogIDListForEachPage(qq, i*15,count-(i*15));
                }else{
                    lsts = getBlogIDListForEachPage(qq, i*15,15);
                }
                
                allBlogID.addAll(lsts);
            }
            return allBlogID;
        }
        
        /**
         * 获取用户的所有日志访问URL
         * @param qq
         */
        public List<String>  allQQBlogURL(String qq){
            List<String> allURL = new ArrayList<String>();
            //DiaryDownload dyd = new DiaryDownload();
            int count = getBlogCount(qq);
            System.out.println("日志总数为:" + count);
            int pageCount = (count%15==0)?count/15:(count/15+1) ;
            System.out.println("用户日志页数:" + pageCount);
            int show = 0;
            for(int i=0; i<pageCount;i++){
                System.out.println(qq+"用户的第"+(i+1)+"页的日志信息");
                System.out.println("----------------------------------------");
                List<String> lsts ;
                if(i==pageCount-1){
                    lsts = getBlogURLListForEachPage(qq, i*15,count-(i*15));
                }else{
                    lsts = getBlogURLListForEachPage(qq, i*15,15);
                }
                for(String str : lsts){
                    System.out.println(qq +"用户的第"+(++show)+"篇日志访问URL为:\t"+str);
                }
                allURL.addAll(lsts);
                System.out.println("----------------------------------------");
            }
            return allURL;
        }
        
        /**
         * 创建包含日志内容的真正URL
         * @param qq
         * @param logId
         * @return
         */
        public String createHaveContentBlogURL(String qq,String logId){
            /*
             http://b11.qzone.qq.com/cgi-bin/blognew/blog_output_data?uin=
             1325103287
             &blogid=
             1305125403
             &styledm=ctc.qzonestyle.gtimg.cn&imgdm=ctc.qzs.qq.com&bdm=b.qzone.qq.com&mode=2
             &numperpage=15
             &blogseed=0.491407030262053&property=GoRE&timestamp=1357192365&dprefix=&g_tk=5381
             &ref=qzone&v6=1&entertime=1357192364386&via=QZ.HashRefresh
             &pos=1305125403
             
             */
            String baseContURL = "http://b11.qzone.qq.com/cgi-bin/blognew/blog_output_data?uin=" + qq +"&blogid="+logId ;
            String baseCont1 = "&styledm=ctc.qzonestyle.gtimg.cn&imgdm=ctc.qzs.qq.com&bdm=b.qzone.qq.com&mode=2&numperpage=15";
            String baseCont2 = "&blogseed=0.491407030262053&property=GoRE&timestamp=1357192365&dprefix=&g_tk=5381";
            String baseCont3 = "&ref=qzone&v6=1&entertime=1357192364386&via=QZ.HashRefresh";
            String baseCont4 = "&pos=" + logId;
            return baseContURL + baseCont1 + baseCont2 + baseCont3 + baseCont4;
        }
        
        /**
         * 备份日志
         * @param backPath 日志存放路径
         * @param fileName 日志名称
         * @param urlStr   日志URL
         */
        public static void backQQBlog(String backPath, String fileName, String urlStr){
            URL url;
            try {
                url = new URL(urlStr);
                URLConnection urlConnection = url.openConnection();
                urlConnection.addRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)");
                InputStream is = url.openStream();
                BufferedReader bufferReader = new BufferedReader(new InputStreamReader(is,"gb2312"));
                String rLine = "";
                
                File file = new File(backPath+fileName);
                FileWriter fw = new FileWriter(file);
                BufferedWriter bw = new BufferedWriter(fw);
                while((rLine=bufferReader.readLine())!=null){
                    System.out.println(rLine);
                    bw.write("" +rLine.toString()+"\r\n");
                    //bw.write(new String(rLine.getBytes("GBK"),"gbk")+"\r\n");
                }
                is.close();
                bufferReader.close();
                bw.close();
                fw.close();
            }catch (Exception e) {
                e.printStackTrace();
            }finally{
                
            }
        }
        
        public static void main(String[] args) {
            BlogDownloadAction  down = new BlogDownloadAction();
            List<String> qqIdList = down.allQQBlogID("799089378");
            List<String> qqBlogURLList = down.allQQBlogURL("799089378");
            System.out.println("所有日志总数:" + qqBlogURLList.size());
            //备份第2篇日志
            String filePath = "F:/";
            String filename = "799089378_"+qqIdList.get(9)+".html";
            System.out.println("第二篇日志的Id为:"+ qqIdList.get(9)+ ",\t日志访问URL为:" +qqBlogURLList.get(6));
            String url = down.createHaveContentBlogURL("799089378",qqIdList.get(9));
            System.out.println(url);
            down.backQQBlog(filePath, filename, url);
        }
        
    }


    转载请注明出处[http://www.cnblogs.com/dennisit/archive/2013/01/05/2845095.html]

      在线交谈

  • 相关阅读:
    Spring AOP概念理解
    五分钟快速掌握RPC原理及实现
    Linux常用命令汇总
    一致性哈希算法原理
    RPC原理及实现
    IO设计模式:Reactor和Proactor对比
    到底什么时候该使用MQ?
    eclipse查看一个方法被谁引用(调用)的快捷键四种方式
    maven build pulgin
    VSCode 常用setiings.json设置
  • 原文地址:https://www.cnblogs.com/dennisit/p/2845095.html
Copyright © 2020-2023  润新知