很悲剧的说,又是被冻醒的,苦逼的程序员生活.冻手冻脚的敲代码,真心伤不起.
继上次图解分析的腾讯空间日志真实路径后,闲着没事就写了段下载腾讯空间日志的代码.这年头转日志不用进空间啦,输入QQ号就可以下载对方任意一篇日志了.
当然你开心就全部下载喽.
实习方式很简单,简单的有些搞笑,大侠们勿喷啊, 但是功能还是搞定了.至于优化或者还有更好的方法后面再想想.贴出来与大家分享一下.
package org.crawler.picture.dennisit.action;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
/**
*
* @version : 1.1
*
* @author : 苏若年 <a href="mailto:DennisIT@163.com">发送邮件</a>
*
* @since : 1.0 创建时间: 2013-1-2 下午11:56:55
*
* @function: 日志下载备份类
*
*/
public class BlogDownloadAction extends DownloadAction{
/**
* 创建每一页的用户访问数量
* @param qq 用户QQ号码
* @param pos 其实页码
* @param num 每页显示数量 默认为15,根据实际情况定
* @return
*/
public String createURLForPage(String qq,int pos,int num){
String baseStrBegin = "http://b11.qzone.qq.com/cgi-bin/blognew/get_abs?hostUin="+qq;
String baseStrcont1 = "&blogType=0&cateName=&cateHex=&statYear=2013&reqInfo=7&pos=" + pos;
String baseStrcont2 = "&num=" + num +"&sortType=0&absType=0&source=0&rand=0.8141584321856499&g_tk=5381&verbose=1&ref=qzone";
return baseStrBegin + baseStrcont1 + baseStrcont2;
}
/**
* 获取每一页的日志ID集合
* @param qq
* @param page
* @param num
* @return
*/
public List<String> getBlogIDListForEachPage(String qq,int page,int num){
List<String> lst = new ArrayList<String>();
String diaryURL = createURLForPage(qq,page,num);
//System.out.println(diaryURL);
URL url;
try {
url = new URL(diaryURL);
URLConnection urlConnection = url.openConnection();
urlConnection.addRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)");
InputStream is = url.openStream();
BufferedReader bufferReader = new BufferedReader(new InputStreamReader(is));
String rLine = null;
int countNum = 9;
while((rLine=new String(bufferReader.readLine()))!=null){
countNum++;
if(rLine.contains("cateInfo")){
break;
}
if((countNum-18)%13==0){
//逐行读取,获取每一个日志对象
String blogId = "";
if(rLine.contains("blogId")){
String temp = rLine ;
if(temp.contains("{")){
temp = rLine.substring(8);
}
blogId = getBlogINFO(temp);
lst.add(blogId);
}
}
}
}catch (Exception e) {
// TODO: handle exception
}
return lst;
}
public String getBlogINFO(String str){
String[] strArray = str.split(":|,");
/*for(int i=0; i<strArray.length; i++){
System.out.println("strArray[" +i+"]=" + strArray[i]);
}*/
if(strArray.length!=0){
return strArray[1];
}
return null;
}
/**
* 获取每一页用户的日志URL集合
* @param qq
* @param page
* @return
*/
public List<String> getBlogURLListForEachPage(String qq, int page,int num){
List<String> idlst = getBlogIDListForEachPage(qq,page,num);
List<String> urlSet = new ArrayList<String>();
String blogUrl = "http://user.qzone.qq.com/"+qq+"/blog/";
for(String id:idlst){
urlSet.add(blogUrl+id);
}
return urlSet;
}
/**
* 第一页用于发送数据包,获取回应的数据包信息,根据回应包信息检测日志总数
* @param qq
* @return
*/
public String createFirstPageURL(String qq){
return createURLForPage(qq,0,15);
}
/**
* 获取日志总数核心方法
* @return
*/
public int getBlogCount(String qq){
int blogCount = 0;
String diaryURL = createFirstPageURL(qq); //第一页用于发送数据包,获取回应的数据包信息,根据回应包信息检测日志总数
URL url;
try {
url = new URL(diaryURL);
URLConnection urlConnection = url.openConnection();
urlConnection.addRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)");
InputStream is = url.openStream();
BufferedReader bufferReader = new BufferedReader(new InputStreamReader(is));
String rLine = null;
while((rLine=bufferReader.readLine())!=null){
if(rLine.contains("totalNum")){
blogCount = Integer.parseInt(BlogINFOUtil.getBlogINFO(rLine)); //获取日志总数
break;
}
}
}catch (Exception e) {
// TODO: handle exception
}
return blogCount;
}
/**
* 获取用户的所有日志的日志ID
* @param qq
*/
public List<String> allQQBlogID(String qq){
List<String> allBlogID = new ArrayList<String>();
int count = getBlogCount(qq);
int pageCount = (count%15==0)?count/15:(count/15+1) ;
for(int i=0; i<pageCount;i++){
List<String> lsts ;
if(i==pageCount-1){
lsts = getBlogIDListForEachPage(qq, i*15,count-(i*15));
}else{
lsts = getBlogIDListForEachPage(qq, i*15,15);
}
allBlogID.addAll(lsts);
}
return allBlogID;
}
/**
* 获取用户的所有日志访问URL
* @param qq
*/
public List<String> allQQBlogURL(String qq){
List<String> allURL = new ArrayList<String>();
//DiaryDownload dyd = new DiaryDownload();
int count = getBlogCount(qq);
System.out.println("日志总数为:" + count);
int pageCount = (count%15==0)?count/15:(count/15+1) ;
System.out.println("用户日志页数:" + pageCount);
int show = 0;
for(int i=0; i<pageCount;i++){
System.out.println(qq+"用户的第"+(i+1)+"页的日志信息");
System.out.println("----------------------------------------");
List<String> lsts ;
if(i==pageCount-1){
lsts = getBlogURLListForEachPage(qq, i*15,count-(i*15));
}else{
lsts = getBlogURLListForEachPage(qq, i*15,15);
}
for(String str : lsts){
System.out.println(qq +"用户的第"+(++show)+"篇日志访问URL为:\t"+str);
}
allURL.addAll(lsts);
System.out.println("----------------------------------------");
}
return allURL;
}
/**
* 创建包含日志内容的真正URL
* @param qq
* @param logId
* @return
*/
public String createHaveContentBlogURL(String qq,String logId){
/*
http://b11.qzone.qq.com/cgi-bin/blognew/blog_output_data?uin=
1325103287
&blogid=
1305125403
&styledm=ctc.qzonestyle.gtimg.cn&imgdm=ctc.qzs.qq.com&bdm=b.qzone.qq.com&mode=2
&numperpage=15
&blogseed=0.491407030262053&property=GoRE×tamp=1357192365&dprefix=&g_tk=5381
&ref=qzone&v6=1&entertime=1357192364386&via=QZ.HashRefresh
&pos=1305125403
*/
String baseContURL = "http://b11.qzone.qq.com/cgi-bin/blognew/blog_output_data?uin=" + qq +"&blogid="+logId ;
String baseCont1 = "&styledm=ctc.qzonestyle.gtimg.cn&imgdm=ctc.qzs.qq.com&bdm=b.qzone.qq.com&mode=2&numperpage=15";
String baseCont2 = "&blogseed=0.491407030262053&property=GoRE×tamp=1357192365&dprefix=&g_tk=5381";
String baseCont3 = "&ref=qzone&v6=1&entertime=1357192364386&via=QZ.HashRefresh";
String baseCont4 = "&pos=" + logId;
return baseContURL + baseCont1 + baseCont2 + baseCont3 + baseCont4;
}
/**
* 备份日志
* @param backPath 日志存放路径
* @param fileName 日志名称
* @param urlStr 日志URL
*/
public static void backQQBlog(String backPath, String fileName, String urlStr){
URL url;
try {
url = new URL(urlStr);
URLConnection urlConnection = url.openConnection();
urlConnection.addRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)");
InputStream is = url.openStream();
BufferedReader bufferReader = new BufferedReader(new InputStreamReader(is,"gb2312"));
String rLine = "";
File file = new File(backPath+fileName);
FileWriter fw = new FileWriter(file);
BufferedWriter bw = new BufferedWriter(fw);
while((rLine=bufferReader.readLine())!=null){
System.out.println(rLine);
bw.write("" +rLine.toString()+"\r\n");
//bw.write(new String(rLine.getBytes("GBK"),"gbk")+"\r\n");
}
is.close();
bufferReader.close();
bw.close();
fw.close();
}catch (Exception e) {
e.printStackTrace();
}finally{
}
}
public static void main(String[] args) {
BlogDownloadAction down = new BlogDownloadAction();
List<String> qqIdList = down.allQQBlogID("799089378");
List<String> qqBlogURLList = down.allQQBlogURL("799089378");
System.out.println("所有日志总数:" + qqBlogURLList.size());
//备份第2篇日志
String filePath = "F:/";
String filename = "799089378_"+qqIdList.get(9)+".html";
System.out.println("第二篇日志的Id为:"+ qqIdList.get(9)+ ",\t日志访问URL为:" +qqBlogURLList.get(6));
String url = down.createHaveContentBlogURL("799089378",qqIdList.get(9));
System.out.println(url);
down.backQQBlog(filePath, filename, url);
}
}
转载请注明出处[http://www.cnblogs.com/dennisit/archive/2013/01/05/2845095.html]