前一段时间blog_backup不好使了,然后发邮件给作者也都没给回复,一直没信,然后决定不等了,在网上搜到一个使用js备份csdn的软件,虽然不能用了,但感觉想法不错,于是动手改,后来改的越来越多就干脆就重新创建文件.
于是这个csdn_blog_backup修改版终于做完了,算法上没什么,主要就是正则表达式花老长时间了,没有尝试别的博客,反正最终我的博客可以备份了,不保证别人的能用,O(∩_∩)O~,可能会由于主题不同某些人不能用,可是尝试修改正则式了(可能还得费点时间).
下面是两个文件
run.bat
mode con lines=2000
cscript.exe csdn_blog_backup2.js
pause
csdn_blog_backup2.js
// 本程序思路是首先将要访问的页面信息(页面地址和文件夹)保存到各种对象数组中,然后不断将数组进行处理,生成新的数组.知道所有数组处理完毕.
//********************History Start*********************
//(2010/02/28 ) 由于CSDN有修改(自己猜的,囧),函数Article中正则表达式修改为: <div\s+class=\"user_article\">(?:(?!<\/div>).|\n)*?<h1\s+class=\"title_txt\">\s*(?:<img\s+src=\"\/images\/\w+\.gif\"\s+border=\"\d+\"\s+width=\"\d+\"\s+height=\"\d+\"\s+alt=\".+\"\/>\ \;)?\s*((?:(?!<\/div>).|\n)*?)\s*<cite\s+class=\"fav_csdnstylebykimi\">(?:(?!<\/cite>).|\n)*?<\/cite>\s*<\/h1>\s*<div\s+class=\"blogstory\">(?:\s*?<script\s+type=\"text\/javascript\">(?:(?!<\/script>).|\n)*?<\/script>\s*)*((?:.|\n)*?)\s*<p\s+class=\"right articalinfo\">(?:.|\n)*?@\s*((?:.|\n)*?)\ \;\&\#124\; \s*<a\s+id=\"a_comment\"\s+href=\"#FeedBack\"\s+title=\"评论\">(?:.| \n)*?<\/p>(?:.|\n)*?<div\s+class=\"mutualitys\">(?:.|\n)*?<\/div>\s*<\/div>
//(2010/03/28) 对图片部分取得进行优化:由于图片出错率比较高,但重要性又较文章低很多,所以考虑所用常量与文章获取区别开,以减少因图片获取造成的大量延时.
//(2010/06/29) 增加了断点续传功能,因为我发嫌总是容易给服务器拒绝,现在我的博客备份都需要两次才可以了.
//********************History End*********************
//
//********************start*********************
// 用户常量自定义区
//********************start*********************
var cstrRootDir = ".";
// 本程序每次main执行时,倘若当前这次执行网络出错,考虑到可能是服务器内部机制拒绝访问,则将设置下次main执行延时.延时逐渐以一定倍数增长并且带有上限,来探测服务器状况
// 起始延时时间(ms)
var iDelay = 1000;
// 每次延时的倍数(X)
var ciDelayStep = 3;
// 最大延时(ms)
var ciMaxDelay = 6000000;
// 每次CSDN网络访问之后停留时间,设置得当可以有效防止服务器加入黑名单,不过备份时间增长(由于增加断点续传功能,若是嫌慢可以置为零,)
var ciUrlDelay = 0;
// 当出现几个页面连续访问出错时间,考虑到服务器可能已经进入拒绝状态,将终止当前的循环而进行等待
// 最大页面连续连接出错数
var ciMaxConnectErrCount = 5;
// 单个页面尝试访问几次之后讲暂时放弃对该页面的访问而继续进行其他页面的访问
// 最大单页面连续连接出错数
var ciMaxPageConnectErrCount = 3;
// 当main连续出错几次后程序将结束,这个时间往往已经足够长,基本是服务器已经处理连续拒绝的状态了
// 最大出错重试次数
var ciMaxMainRunCount = 1;
// xmlhttp网络连接超时时间(ms)
var ciSendTime = 10000;
// xmlhttp连接状态测试间隔(ms)
var ciSendTestInterval = 10;
//********************end*********************
// 用户常量自定义区
//********************end*********************
// 用户名数组
var aryUserName = new Array;
// 全局对象
// 窗口脚本对象,用于实现窗口功能
var m_wsh = new ActiveXObject("WScript.Shell");
var fso = new ActiveXObject("Scripting.FileSystemObject");
var m_http = new ActiveXObject("Microsoft.xmlhttp");
var objParserWriter = new ActiveXObject("Microsoft.XMLDOM");
var objDate = new Date();
var strDateTime = objDate.getYear().toString() + "_" + (objDate.getMonth()+1).toString() + "_" + objDate.getDate().toString() + "_" + objDate.getHours() + "_" + objDate.getMinutes() + "_" + objDate.getSeconds() + "_" + objDate.getMilliseconds();
// CSDN网址
var cstrBaseUrl = "http://blog.csdn.net";
// Dir
var cstrDir = "Dir";
// Url
var cstrUrl = "Url";
// Continue.xml
var cstrContinueXmlFile = "Continue.xml";
// Continue.bat
var cstrContinueBatFile = "Continue.bat";
// log.txt
var cstrLogFile = "log.txt";
// 文件分隔符
var cstrFileSpliter = "\\";
// Articles
var cstrArticles = "Articles";
// Months
var cstrMonths = "Months";
// Users
var cstrUsers = "Users";
// Continue
var cstrContinue = "Continue";
function XmlHttp(iTestInterval, iTimeOut)
{
var m_xmlhttp = new ActiveXObject("Microsoft.xmlhttp");
this.Open = function()
{
}
}
function User(strUserDir, strUserUrl)
{
var m_UserDir = fso.GetFolder(strUserDir);
var m_UserUrl = strUserUrl;
this.Prase = function()
{
// 页面取得
var strPage = _GetPageText(m_UserUrl);
if(strPage == null)
{
return;
}
var aryMonth = new Array();
// 文章月份取得
var reMonths = /<dt>存档<\/dt>\s*<dd>\s*<div\s+class=\"publiclist_sidebar\">\s*<ul>((?:.|\n)*?)<\/ul>/igm;
var strResult = strPage.match(reMonths);
if (null == strResult)
{
_ConsoleWrite(strPage);
_Debug("没找到月份");
return aryMonth;
}
// 文章月份逐月取得
var reMonth = /<li><a\s+href=\"(.+?)\">(.+?)<\/a><\/li>/gim;
while(reMonth.exec(strResult))
{
strMonth = RegExp.$2;
strMonthUrl = RegExp.$1;
//创建月份的文件夹
var strMonthDir = m_UserDir + cstrFileSpliter + _FormatFileName(strMonth);
if(!_CreateFolder(strMonthDir))
{
_Debug("创建文件夹[" + strMonthDir + "]失败");
continue;
}
aryMonth.push(new Month(strMonthDir, cstrBaseUrl + strMonthUrl))
}
_ConsoleWrite(aryMonth.length);
return aryMonth;
}
this.toXml = function()
{
var xmlUser = objParserWriter.createElement("User");
xmlUser.setAttribute(cstrDir, m_UserDir);
xmlUser.setAttribute(cstrUrl, m_UserUrl);
return xmlUser;
}
}
function Month(strMonthDir, strMonthUrl)
{
var m_MonthDir = fso.GetFolder(strMonthDir);
var m_MonthUrl = strMonthUrl;
this.Prase = function()
{
// 页面取得
var strPage = _GetPageText(m_MonthUrl);
if(strPage == null)
{
return;
}
var aryArticle = new Array();
// 按天获取写文章
var reDay = /<div\s+class=\"default_contents\"><h6\s+class=\"pubtime\">(\d+?年\d+?月\d+?日)<\/h6>((?:<div\s+class=\"user_article\"><h1\s+class=\"title_txt\">(?:<img\s+src=\"\/images\/authorship\.gif\"\s+border=\"0\"\s+width=\"15\"\s+height=\"16\"\s+alt=\"原创\"\/>\ \;)?<a\s+href=\"(?:(?!<\/a>).)*?\">(?:(?!<\/a>).)*?<\/a><\/h1><p>(?:(?!<\/p>).|\n)*?<\/p>\s*<p\s+class=\"right articalinfo\">(?:(?!<\/p>).|\n)*?<\/p><\/div>)+?)<\/div>/gim;
while(reDay.exec(strPage))
{
strDay = RegExp.$1;
strDayContent = RegExp.$2;
_ConsoleWrite("天:" + strDay);
// 创建当天的文件夹
var strDayDir = m_MonthDir + cstrFileSpliter + _FormatFileName(strDay);
if(!_CreateFolder(strDayDir))
{
_Debug("创建文件夹[" + strDayDir + "]失败\n");
continue;
}
var strImageDir = strDayDir + cstrFileSpliter + "img";
if(!_CreateFolder(strImageDir))
{
_Debug("创建文件夹[" + strImageDir + "]失败\n");
return aryImage;
}
// 每天中文章信息取得
var reArticleInfo = /<div\s+class=\"user_article\"><h1\s+class=\"title_txt\">(?:<img\s+src=\"\/images\/authorship\.gif\"\s+border=\"0\"\s+width=\"15\"\s+height=\"16\"\s+alt=\"原创\"\/>\ \;)?<a\s+href=\"((?:(?!<\/a>).)*?)\">((?:(?!<\/a>).)*?)<\/a><\/h1><p>(?:(?!<\/p>).|\n)*?<\/p>\s*<p\s+class=\"right\s+articalinfo\">(?:(?!<\/p>).|\n)*?<\/p><\/div>/gim;
while(reArticleInfo.exec(strDayContent))
{
strArticleUrl = cstrBaseUrl + RegExp.$1;
strArticleTitle = RegExp.$2;
_ConsoleWrite("篇:" + strArticleTitle);
aryArticle.push(new Article(strDayDir + cstrFileSpliter,strArticleUrl))
}
}
return aryArticle;
}
this.toXml = function()
{
var xmlMonth = objParserWriter.createElement("Month");
xmlMonth.setAttribute(cstrDir, m_MonthDir);
xmlMonth.setAttribute(cstrUrl, m_MonthUrl);
return xmlMonth;
}
}
function Article(strDayDir, strArticleUrl)
{
var m_strDayDir = fso.GetFolder(strDayDir);
var m_ArticleUrl = strArticleUrl;
this.Prase = function()
{
// 页面取得
var strPage = _GetPageText(m_ArticleUrl);
if(strPage == null)
{
return;
}
var aryImage = new Array();
// 文章正则匹配式
var re = /<div\s+class=\"user_article\">(?:(?!<\/div>).|\n)*?<h1\s+class=\"title_txt\">\s*(?:<img\s+src=\"\/images\/\w+\.gif\"\s+border=\"\d+\"\s+width=\"\d+\"\s+height=\"\d+\"\s+alt=\".+\"\/>\ \;)?\s*((?:(?!<\/div>).|\n)*?)\s*<cite\s+class=\"fav_csdnstylebykimi\">(?:(?!<\/cite>).|\n)*?<\/cite>\s*<\/h1>\s*<div\s+class=\"blogstory\">(?:\s*?<script\s+type=\"text\/javascript\">(?:(?!<\/script>).|\n)*?<\/script>\s*)*((?:.|\n)*?)\s*<p\s+class=\"right articalinfo\">(?:.|\n)*?@\s*((?:.|\n)*?)\ \;\&\#124\;\s*<a\s+id=\"a_comment\"\s+href=\"#FeedBack\"\s+title=\"评论\">(?:.|\n)*?<\/p>(?:.|\n)*?<div\s+class=\"mutualitys\">(?:.|\n)*?<\/div>\s*<\/div>/gim;
var result = strPage.match(re);
if (null==result)
{
_ConsoleWrite(strPage);
_Debug("没匹配到文章");
return aryImage;
}
var strTitlePart = RegExp.$1;
var strContentPart = RegExp.$2;
var strDateTimePart = RegExp.$3;
var strTitle = _FormatFileName(strTitlePart);
if (strTitle == "")
{
_ConsoleWrite(strPage);
_Debug("文章名错误");
return aryImage;
}
var strImageDir = m_strDayDir + cstrFileSpliter + "img";
// 图片信息取得
var reImage = /(\<img\s*src\s*\=\s*\")(http\:\/\/[^\"]*?([^\/\"]*?))(?=\"[^\>].*?\/\>)/gim;
while(reImage.exec(strContentPart))
{
aryImage.push(new Image(strImageDir + cstrFileSpliter, RegExp.$2));
}
var strContent = strContentPart.replace(reImage, function($0, $1, $2, $3){return $1 + ".\\img\\" + _FormatFileName($2);});
// 同名文章出现的处理
var strFilePath = m_strDayDir + cstrFileSpliter + _FormatFileName(strTitle) +".htm";
var i = 0;
while(fso.FileExists(strFilePath) && i < 10000)
{
_ConsoleWrite("同名文章" + i.toString() + "篇出现");
strFilePath = m_strDayDir + cstrFileSpliter + _FormatFileName(strTitle) + "[" + i.toString() + "]" +".htm";
}
var text_file = fso.CreateTextFile(strFilePath, true);
text_file.Write("<head><title>" + strTitle + "</title><h3>" + strTitle + "</h3><h5>创建时间:" + strDateTimePart + "</h5><h6>URL:" + "<a href ='" + m_ArticleUrl + "'>" + m_ArticleUrl + "</a>" + "</h6></head><Body>" + _ReplaceErrSign(strContent) + "</Body>");
text_file.Close();
text_file = null;
return aryImage;
}
this.toXml = function()
{
var xmlArticle = objParserWriter.createElement("Article");
xmlArticle.setAttribute(cstrDir, m_strDayDir);
xmlArticle.setAttribute(cstrUrl, m_ArticleUrl);
return xmlArticle;
}
}
// 图片
function Image(strImageDir, strImageUrl)
{
var m_strImageName = _FormatFileName(strImageUrl);
var m_strImageDir = fso.GetFolder(strImageDir);
var m_strImageUrl = strImageUrl;
this.Prase = function()
{
if(fso.FileExists(m_strImageDir + cstrFileSpliter + m_strImageName))
{
_ConsoleWrite("文件已经存在");
return true;
}
// 图片内容取得
var strReponseBody = _GetReponseBody(m_strImageUrl);
if(strReponseBody == null)
{
_ConsoleWrite("该文件获取失败")
return true;
}
var m_stream = new ActiveXObject("Adodb.Stream");
m_stream.Type = 1;
m_stream.Open();
m_stream.Write(strReponseBody);
m_stream.SaveToFile(m_strImageDir + cstrFileSpliter + m_strImageName);
m_stream.Close();
return true;
}
}
function main()
{
// 连续连接错误
var bConnectErr = false;
if(aryUserName.length != 0)
{
for(var i = aryUserName.length - 1; i >= 0; i--)
{
var strUserDir = strBaseDir + cstrFileSpliter + aryUserName[i];
if(!_CreateFolder(strUserDir))
{
_Debug("创建文件夹[" + strUserDir + "]失败");
}
else
{
aryUser.push(new User(strUserDir + cstrFileSpliter, cstrBaseUrl + "/" + aryUserName[i]));
}
aryUserName.splice(i, 1);
}
}
if(aryUser.length != 0)
{
var iConnectErrCount = 0;
for(var i = aryUser.length - 1; i >= 0; i--)
{
var aryResult = aryUser[i].Prase();
if(aryResult)
{
aryUser.splice(i, 1);
aryMonth = aryMonth.concat(aryResult);
iConnectErrCount = 0;
}
else
{
bConnectErr = true;
iConnectErrCount++;
// 判断是否超过最大连续连接失败数
if(iConnectErrCount >= ciMaxConnectErrCount)
{
break;
}
}
}
}
else if(aryMonth.length != 0)
{
var iConnectErrCount = 0;
for(var i = aryMonth.length - 1; i >= 0; i--)
{
var aryResult = aryMonth[i].Prase();
if(aryResult)
{
aryMonth.splice(i, 1);
aryArticle = aryArticle.concat(aryResult);
iConnectErrCount = 0;
}
else
{
bConnectErr = true;
iConnectErrCount++;
// 判断是否超过最大连续连接失败数
if(iConnectErrCount >= ciMaxConnectErrCount)
{
break;
}
}
}
}
else if(aryArticle.length != 0)
{
var iConnectErrCount = 0;
for(var i = aryArticle.length - 1; i >= 0; i--)
{
var aryResult = aryArticle[i].Prase();
if(aryResult)
{
aryArticle.splice(i, 1);
aryImage = aryImage.concat(aryResult);
iConnectErrCount = 0;
}
else
{
bConnectErr = true;
iConnectErrCount++;
// 判断是否超过最大连续连接失败数
if(iConnectErrCount >= ciMaxConnectErrCount)
{
break;
}
}
}
}
else if(aryImage.length != 0)
{
var iConnectErrCount = 0;
for(var i = aryImage.length - 1; i >= 0; i--)
{
var aryResult = aryImage[i].Prase();
if(aryResult)
{
aryImage.splice(i, 1);
iConnectErrCount = 0;
}
else
{
bConnectErr = true;
//***CHANGE (2010/03/28) Start***
// 取消对图片和文章相同的处理部分处理(图片将不影响连接失败次数)
// iConnectErrCount++;
// // 判断是否超过最大连续连接失败数
// if(iConnectErrCount >= ciMaxConnectErrCount)
// {
// break;
// }
//***CHANGE (2010/03/28) End***
}
}
}
//***CHANGE (2010/03/28) Start***
// 取消对图片和文章相同的处理部分处理
// if(aryUser.length != 0 || aryMonth.length != 0 || aryArticle.length != 0 || aryImage.length != 0)
if(aryUser.length != 0 || aryMonth.length != 0 || aryArticle.length != 0)
//***CHANGE (2010/03/28) End***
{
if(bConnectErr)
{
iMainRunCount++;
if(iMainRunCount <= ciMaxMainRunCount)
{
_ConsoleWrite("延时" + iDelay/1000 + "秒");
WScript.Sleep(iDelay);
iDelay = iDelay * ciDelayStep;
// 最大延时限定
iDelay = iDelay > ciMaxDelay ? ciMaxDelay : iDelay;
main();
}
else
{
_Debug("网络状况不能继续进行备份");
}
}
else
{
iMainRunCount = 0;
main();
}
}
//***ADD (2010/03/28) Start***
// 增加对图片的特殊处理部分(仅剩图片部分时)
else if(aryImage.length != 0)
{
// 倘若图片部分出错,则不重试
if(!bConnectErr)
{
main();
}
}
//***Change (2010/03/28) End***
}
var aryUser = new Array();
var aryMonth = new Array();
var aryArticle = new Array();
var aryImage = new Array();
// 主文件夹
var strBaseDir;
// log文件
var objLogFile;
// continue文件
var strContinueXmlFilePath;
// continue bat 文件
var strContinueBatFilePath;
if (fso.FileExists(cstrRootDir + cstrFileSpliter + cstrContinueXmlFile))
{
//***ADD (2010/06/27) Start***
// 增加续传功能,在文章特别多的时间会有用
strBaseDir = cstrRootDir;
var objLogFile = fso.OpenTextFile(strBaseDir + cstrFileSpliter + cstrLogFile, 8 , true);
strContinueBatFilePath = strBaseDir + cstrFileSpliter + cstrContinueBatFile;
strContinueXmlFilePath = strBaseDir + cstrFileSpliter + cstrContinueXmlFile;
var objParserReader = new ActiveXObject("Microsoft.XMLDOM");
objParserReader.async = false;
objParserReader.load(strContinueXmlFilePath);
var xmlContinue = objParserReader.documentElement;
for (var i = 0; i < xmlContinue.childNodes.length; i++)
{
var xmlObject = xmlContinue.childNodes[i];
if (xmlObject.baseName == cstrUsers)
{
for (var j = 0; j < xmlObject.childNodes.length; j++)
{
var xmlUser = xmlObject.childNodes[j];
aryUser.push(new Article(xmlUser.getAttribute(cstrDir), xmlUser.getAttribute(cstrUrl)));
}
}
if (xmlObject.baseName == cstrMonths)
{
for (var j = 0; j < xmlObject.childNodes.length; j++)
{
var xmlMonth = xmlObject.childNodes[j];
aryMonth.push(new Article(xmlMonth.getAttribute(cstrDir), xmlMonth.getAttribute(cstrUrl)));
}
}
if (xmlObject.baseName == cstrArticles)
{
for (var j = 0; j < xmlObject.childNodes.length; j++)
{
var xmlArticle = xmlObject.childNodes[j];
aryArticle.push(new Article(xmlArticle.getAttribute(cstrDir), xmlArticle.getAttribute(cstrUrl)));
}
}
}
//***ADD (2010/06/27) End***
}
else
{
strBaseDir = cstrRootDir + cstrFileSpliter + strDateTime;
_CreateFolder(strBaseDir);
var objLogFile = fso.OpenTextFile(strBaseDir + cstrFileSpliter + cstrLogFile, 8 , true);
strContinueBatFilePath = strBaseDir + cstrFileSpliter + cstrContinueBatFile;
strContinueXmlFilePath = strBaseDir + cstrFileSpliter + cstrContinueXmlFile;
// 获取用户名
WScript.StdOut.WriteLine("用户名(User1[,User2]):\n");
var aryInput = WScript.StdIn.ReadLine().split("\s*,\s*");
for(var i = 0; i < aryInput.length; i++)
{
var strInput = aryInput[i];
if(strInput != "")
{
aryUserName.push(strInput);
}
}
}
// 出错重试计数
var iMainRunCount = 0;
main();
//***ADD (2010/06/27) Start***
// 增加续传功能,在文章特别多的时间会有用
if (aryUser.length != 0 || aryMonth.length != 0 || aryArticle.length != 0)
{
var xmlContinue = objParserWriter.createElement(cstrContinue);
if (aryUser.length != 0)
{
var xmlAryUser = objParserWriter.createElement(cstrUsers);
for(var i = 0; i < aryUser.length; i++)
{
xmlAryUser.appendChild(aryUser[i].toXml());
}
xmlContinue.appendChild(xmlAryUser);
}
if (aryMonth.length != 0)
{
var xmlAryMonth = objParserWriter.createElement(cstrMonths);
for(var i = 0; i < aryMonth.length; i++)
{
xmlAryMonth.appendChild(aryMonth[i].toXml());
}
xmlContinue.appendChild(xmlAryMonth);
}
if (aryArticle.length != 0)
{
var xmlAryArticle = objParserWriter.createElement(cstrArticles);
for(var i = 0; i < aryArticle.length; i++)
{
xmlAryArticle.appendChild(aryArticle[i].toXml());
}
xmlContinue.appendChild(xmlAryArticle);
}
objParserWriter.appendChild(xmlContinue);
objParserWriter.save(strContinueXmlFilePath);
var objBatFile = fso.OpenTextFile(strBaseDir + cstrFileSpliter + cstrContinueBatFile, 2 , true);
objBatFile.WriteLine("mode con lines=2000");
objBatFile.WriteLine("cscript.exe /d ..\\csdn_blog_backup2.js");
objBatFile.WriteLine("pause");
objBatFile.Close();
}
else
{
if (fso.FileExists(strContinueBatFilePath))
{
fso.DeleteFile(strContinueBatFilePath);
}
if (fso.FileExists(strContinueXmlFilePath))
{
fso.DeleteFile(strContinueXmlFilePath);
}
}
//***ADD (2010/06/27) End***
_ConsoleWrite("自动备份结束");
//////////////
// 基本函数 //
//////////////
// 创建文件夹
function _CreateFolder(strFolderPath)
{
var bRet = true;
try
{
fso.CreateFolder(strFolderPath);
}
catch (e)
{
_ConsoleWrite(e.name + ":" + e.message);
bRet = false;
}
return bRet;
}
// 页面文本取得
function _GetPageText(strUrl)
{
var strRet = null;
var iRetry = 0;
_ConsoleWrite("url:" + strUrl);
while(strRet == null && iRetry <= ciMaxPageConnectErrCount)
{
iRetry++;
if(iRetry != 1)
{
_ConsoleWrite("重试:" + (iRetry - 1) + "次" );
}
try
{
m_http.open("GET", strUrl, true);
m_http.send("");
var iSendTime = ciSendTime;
while(m_http.readyState != 4)
{
WScript.Sleep(ciSendTestInterval);
iSendTime = iSendTime - ciSendTestInterval;
if(iSendTime < 0)
{
var objError = new Error(0, "连接超时");
objError.name = "自定义异常";
throw objError;
}
}
break;
}
catch(e)
{
_ConsoleWrite(e.name + ": " + e.message);
}
}
if(m_http.readyState == 4 && m_http.responseText != "")
{
if(m_http.responseText.search("<head><title>403 Forbidden</title></head>") != -1)
{
_ConsoleWrite("Forbidden Page");
iRetry = ciMaxPageConnectErrCount + 1;
}
else if(m_http.responseText.search("<title>CSDN | 错误</title>") != -1)
{
_ConsoleWrite("用户不存在或禁止访问");
iRetry = ciMaxPageConnectErrCount + 1;
}
else
{
strRet = m_http.responseText;
WScript.Sleep(ciUrlDelay);
}
}
return strRet;
}
// 图片取得函数
function _GetReponseBody(strUrl)
{
// 图片独有的连接超时限制数(与文章的区别开独立定义)(2010/02/28 )
var ciMaxPageConnectErrBodyCount = 1;
// 图片独有的连接超时时间(与文章的区别开独立定义)(2010/02/28 )
// 每次图片访问完毕之后延时(由于一般图片都不是在CSDN上而是很多服务器上,不会造成过快访问,所以可以考虑不必延时)
var ciPicDelay = 0;
var ciBodySendTime = 3000;
var strRet = null;
var iRetry = 0;
_ConsoleWrite("url:" + strUrl);
while(strRet == null && iRetry <= ciMaxPageConnectErrBodyCount)
{
iRetry++;
if(iRetry != 1)
{
_ConsoleWrite("重试:" + (iRetry - 1) + "次" );
}
try
{
m_http.open("GET", strUrl, true);
m_http.send("");
var iSendTime = ciBodySendTime;
while(m_http.readyState != 4)
{
WScript.Sleep(ciSendTestInterval);
iSendTime = iSendTime - ciSendTestInterval;
if(iSendTime < 0)
{
var objError = new Error(0, "连接超时");
objError.name = "自定义异常";
throw objError;
}
}
strRet = m_http.responseBody;
WScript.Sleep(ciPicDelay);
}
catch(e)
{
_ConsoleWrite(e.name + ": " + e.message);
}
}
return strRet;
}
function _FormatFileName(str)
{
return _ReplaceFileName(_FormatContent(_HtmlDecode(str)));
}
function _FormatContent(str)
{
return _ReplaceErrSign(_HtmlDecode(str));
}
//////////////
// 基础函数 //
//////////////
// log输出函数
function _WriteLog(str)
{
objLogFile.WriteLine(str);
}
// 信息输出函数
function _Debug(str)
{
_WriteLog(str);
m_wsh.popup(str);
}
function _ConsoleWrite(str)
{
_WriteLog(str);
WScript.StdOut.WriteLine(str);
}
// 替换系统unicode不能写入的字符
function _ReplaceErrSign(str)
{
var re = /[\u2318\uFEFF]/g;
return str.replace(re, "_");
}
//替换HTML字符
function _HtmlDecode(str)
{
str = decodeURIComponent(str);
var re = /\&\#(\d+)\;/gm;
return str.replace(re, String.fromCharCode(parseInt(RegExp.$1)));
}
// 替换文件名中的特殊字符
function _ReplaceFileName(str)
{
var re = /[\/\\<>\*\:\?\"\t\|]/g;
return str.replace(re, "_");
}
多多交流,呵呵,不过工作原因应该不会有什么时间进行升级了
最近修改时间:2010/06/28
由于CSDN有修改(自己猜的,囧),函数Article中正则表达式修改为: <div\s+class=\"user_article\">(?:(?!<\/div>).|\n)*?<h1\s+class=\"title_txt\">\s*(?:<img\s+src=\"\/images\/\w+\.gif\"\s+border=\"\d+\"\s+width=\"\d+\"\s+height=\"\d+\"\s+alt=\".+\"\/>\ \;)?\s*((?:(?!<\/div>).|\n)*?)\s*<cite\s+class=\"fav_csdnstylebykimi\">(?:(?!<\/cite>).|\n)*?<\/cite>\s*<\/h1>\s*<div\s+class=\"blogstory\">(?:\s*?<script\s+type=\"text\/javascript\">(?:(?!<\/script>).|\n)*?<\/script>\s*)*((?:.|\n)*?)\s*<p\s+class=\"right articalinfo\">(?:.|\n)*?@\s*((?:.|\n)*?)\ \;\&\#124\; \s*<a\s+id=\"a_comment\"\s+href=\"#FeedBack\"\s+title=\"评论\">(?:.| \n)*?<\/p>(?:.|\n)*?<div\s+class=\"mutualitys\">(?:.|\n)*?<\/div>\s*<\/div>
最新修改版:-)(增加了一个后台任务了)
// 本程序思路是首先将要访问的页面信息(页面地址和文件夹)保存到各种对象数组中,然后不断将数组进行处理,生成新的数组.知道所有数组处理完毕.
//********************History Start*********************
//(2010/02/28 ) 由于CSDN有修改(自己猜的,囧),函数Article中正则表达式修改为: <div\s+class=\"user_article\">(?:(?!<\/div>).|\n)*?<h1\s+class=\"title_txt\">\s*(?:<img\s+src=\"\/images\/\w+\.gif\"\s+border=\"\d+\"\s+width=\"\d+\"\s+height=\"\d+\"\s+alt=\".+\"\/>\ \;)?\s*((?:(?!<\/div>).|\n)*?)\s*<cite\s+class=\"fav_csdnstylebykimi\">(?:(?!<\/cite>).|\n)*?<\/cite>\s*<\/h1>\s*<div\s+class=\"blogstory\">(?:\s*?<script\s+type=\"text\/javascript\">(?:(?!<\/script>).|\n)*?<\/script>\s*)*((?:.|\n)*?)\s*<p\s+class=\"right articalinfo\">(?:.|\n)*?@\s*((?:.|\n)*?)\ \;\&\#124\; \s*<a\s+id=\"a_comment\"\s+href=\"#FeedBack\"\s+title=\"评论\">(?:.| \n)*?<\/p>(?:.|\n)*?<div\s+class=\"mutualitys\">(?:.|\n)*?<\/div>\s*<\/div>
//(2010/03/28) 对图片部分取得进行优化:由于图片出错率比较高,但重要性又较文章低很多,所以考虑所用常量与文章获取区别开,以减少因图片获取造成的大量延时.
//(2010/06/29) 对图片部分取得进行优化:由于图片出错率比较高,但重要性又较文章低很多,所以考虑所用常量与文章获取区别开,以减少因图片获取造成的大量延时.
//(2010/09/13) 功能改进,在文章一次备份失败(一般认为是被加入黑名单了吧)之后将在后台安排一小时一次的任务直到完全备份结束或者关机.
//********************History End*********************
//
//********************start*********************
// 用户常量自定义区
//********************start*********************
var cstrRootDir = WScript.ScriptFullName.substring(0, WScript.ScriptFullName.length - WScript.ScriptName.length - 1);
// 本程序每次main执行时,倘若当前这次执行网络出错,考虑到可能是服务器内部机制拒绝访问,则将设置下次main执行延时.延时逐渐以一定倍数增长并且带有上限,来探测服务器状况
// 起始延时时间(ms)
var iDelay = 1000;
// 每次延时的倍数(X)
var ciDelayStep = 3;
// 最大延时(ms)
var ciMaxDelay = 6000000;
// 每次CSDN网络访问之后停留时间,设置得当可以有效防止服务器加入黑名单,不过备份时间增长(由于增加断点续传功能,若是嫌慢可以置为零,)
var ciUrlDelay = 0;
// 当出现几个页面连续访问出错时间,考虑到服务器可能已经进入拒绝状态,将终止当前的循环而进行等待
// 最大页面连续连接出错数
var ciMaxConnectErrCount = 5;
// 单个页面尝试访问几次之后讲暂时放弃对该页面的访问而继续进行其他页面的访问
// 最大单页面连续连接出错数
var ciMaxPageConnectErrCount = 3;
// 当main连续出错几次后程序将结束,这个时间往往已经足够长,基本是服务器已经处理连续拒绝的状态了
// 最大出错重试次数
var ciMaxMainRunCount = 1;
// xmlhttp网络连接超时时间(ms)
var ciSendTime = 10000;
// xmlhttp连接状态测试间隔(ms)
var ciSendTestInterval = 10;
//********************end*********************
// 用户常量自定义区
//********************end*********************
String.prototype.Trim = function()
{
return this.replace(/(^\s*)|(\s*$)/g, "");
}
// 用户名数组
var aryUserName = new Array;
// 全局对象
// 窗口脚本对象,用于实现窗口功能
var WshShell = new ActiveXObject("WScript.Shell");
var fso = new ActiveXObject("Scripting.FileSystemObject");
var m_http = new ActiveXObject("Microsoft.xmlhttp");
var objParserWriter = new ActiveXObject("Microsoft.XMLDOM");
var objDate = new Date();
var strDateTime = objDate.getYear().toString() + "_" + (objDate.getMonth()+1).toString() + "_" + objDate.getDate().toString() + "_" + objDate.getHours() + "_" + objDate.getMinutes() + "_" + objDate.getSeconds() + "_" + objDate.getMilliseconds();
// CSDN网址
var cstrBaseUrl = "http://blog.csdn.net";
// Dir
var cstrAttributeDir = "Dir";
// Url
var cstrAttributeUrl = "Url";
// Continue.xml
var cstrContinueXmlFile = "Continue.xml";
// Continue.bat
var cstrContinueBatFile = "Continue.bat";
// ContinueTask.js
var cstrContinueJSFile = "Continue.js";
// log.txt
var cstrLogFile = "log.txt";
// 文件分隔符
var cstrFileSpliter = "\\";
// BaseDir
var cstrBaseNameBaseDir = "BaseDir";
// JS
var cstrBaseNameContinueJS = "ContinueJS";
// Log
var cstrBaseNameLog = "Log";
// Bat
var cstrBaseNameContinueBat = "ContinueBat";
// Articles
var cstrBaseNameArticles = "Articles";
// Months
var cstrBaseNameMonths = "Months";
// Users
var cstrBaseNameUsers = "Users";
// Continue
var cstrBaseNameContinue = "Continue";
function XmlHttp(iTestInterval, iTimeOut)
{
var m_xmlhttp = new ActiveXObject("Microsoft.xmlhttp");
this.Open = function()
{
}
}
function User(strUserDir, strUserUrl)
{
var m_UserDir = fso.GetFolder(strUserDir);
var m_UserUrl = strUserUrl;
this.Prase = function()
{
// 页面取得
var strPage = _GetPageText(m_UserUrl);
if(strPage == null)
{
return;
}
var aryMonth = new Array();
// 文章月份取得
var reMonths = /<dt>存档<\/dt>\s*<dd>\s*<div\s+class=\"publiclist_sidebar\">\s*<ul>((?:.|\n)*?)<\/ul>/igm;
var strResult = strPage.match(reMonths);
if (null == strResult)
{
_ConsoleWrite(strPage);
_Debug("没找到月份");
return aryMonth;
}
// 文章月份逐月取得
var reMonth = /<li><a\s+href=\"(.+?)\">(.+?)<\/a><\/li>/gim;
while(reMonth.exec(strResult))
{
strMonth = RegExp.$2;
strMonthUrl = RegExp.$1;
//创建月份的文件夹
var strMonthDir = m_UserDir + cstrFileSpliter + _FormatFileName(strMonth);
if(!_CreateFolder(strMonthDir))
{
_Debug("创建文件夹[" + strMonthDir + "]失败");
continue;
}
aryMonth.push(new Month(strMonthDir, cstrBaseUrl + strMonthUrl))
}
_ConsoleWrite(aryMonth.length);
return aryMonth;
}
this.toXml = function()
{
var xmlUser = objParserWriter.createElement("User");
xmlUser.setAttribute(cstrAttributeDir, m_UserDir);
xmlUser.setAttribute(cstrAttributeUrl, m_UserUrl);
return xmlUser;
}
}
function Month(strMonthDir, strMonthUrl)
{
var m_MonthDir = fso.GetFolder(strMonthDir);
var m_MonthUrl = strMonthUrl;
this.Prase = function()
{
// 页面取得
var strPage = _GetPageText(m_MonthUrl);
if(strPage == null)
{
return;
}
var aryArticle = new Array();
// 按天获取写文章
var reDay = /<div\s+class=\"default_contents\"><h6\s+class=\"pubtime\">(\d+?年\d+?月\d+?日)<\/h6>((?:<div\s+class=\"user_article\"><h1\s+class=\"title_txt\">(?:<img\s+src=\"\/images\/authorship\.gif\"\s+border=\"0\"\s+width=\"15\"\s+height=\"16\"\s+alt=\"原创\"\/>\ \;)?<a\s+href=\"(?:(?!<\/a>).)*?\">(?:(?!<\/a>).)*?<\/a><\/h1><p>(?:(?!<\/p>).|\n)*?<\/p>\s*<p\s+class=\"right articalinfo\">(?:(?!<\/p>).|\n)*?<\/p><\/div>)+?)<\/div>/gim;
while(reDay.exec(strPage))
{
strDay = RegExp.$1;
strDayContent = RegExp.$2;
_ConsoleWrite("天:" + strDay);
// 创建当天的文件夹
var strDayDir = m_MonthDir + cstrFileSpliter + _FormatFileName(strDay);
if(!_CreateFolder(strDayDir))
{
_Debug("创建文件夹[" + strDayDir + "]失败\n");
continue;
}
var strImageDir = strDayDir + cstrFileSpliter + "img";
if(!_CreateFolder(strImageDir))
{
_Debug("创建文件夹[" + strImageDir + "]失败\n");
return aryImage;
}
// 每天中文章信息取得
var reArticleInfo = /<div\s+class=\"user_article\"><h1\s+class=\"title_txt\">(?:<img\s+src=\"\/images\/authorship\.gif\"\s+border=\"0\"\s+width=\"15\"\s+height=\"16\"\s+alt=\"原创\"\/>\ \;)?<a\s+href=\"((?:(?!<\/a>).)*?)\">((?:(?!<\/a>).)*?)<\/a><\/h1><p>(?:(?!<\/p>).|\n)*?<\/p>\s*<p\s+class=\"right\s+articalinfo\">(?:(?!<\/p>).|\n)*?<\/p><\/div>/gim;
while(reArticleInfo.exec(strDayContent))
{
strArticleUrl = cstrBaseUrl + RegExp.$1;
strArticleTitle = RegExp.$2;
_ConsoleWrite("篇:" + strArticleTitle);
aryArticle.push(new Article(strDayDir + cstrFileSpliter,strArticleUrl))
}
}
return aryArticle;
}
this.toXml = function()
{
var xmlMonth = objParserWriter.createElement("Month");
xmlMonth.setAttribute(cstrAttributeDir, m_MonthDir);
xmlMonth.setAttribute(cstrAttributeUrl, m_MonthUrl);
return xmlMonth;
}
}
function Article(strDayDir, strArticleUrl)
{
var m_strDayDir = fso.GetFolder(strDayDir);
var m_ArticleUrl = strArticleUrl;
this.Prase = function()
{
// 页面取得
var strPage = _GetPageText(m_ArticleUrl);
if(strPage == null)
{
return;
}
var aryImage = new Array();
// 文章正则匹配式
var re = /<div\s+class=\"user_article\">(?:(?!<\/div>).|\n)*?<h1\s+class=\"title_txt\">\s*(?:<img\s+src=\"\/images\/\w+\.gif\"\s+border=\"\d+\"\s+width=\"\d+\"\s+height=\"\d+\"\s+alt=\".+\"\/>\ \;)?\s*((?:(?!<\/div>).|\n)*?)\s*<cite\s+class=\"fav_csdnstylebykimi\">(?:(?!<\/cite>).|\n)*?<\/cite>\s*<\/h1>\s*<div\s+class=\"blogstory\">(?:\s*?<script\s+type=\"text\/javascript\">(?:(?!<\/script>).|\n)*?<\/script>\s*)*((?:.|\n)*?)\s*<p\s+class=\"right articalinfo\">(?:.|\n)*?@\s*((?:.|\n)*?)\ \;\&\#124\;\s*<a\s+id=\"a_comment\"\s+href=\"#FeedBack\"\s+title=\"评论\">(?:.|\n)*?<\/p>(?:.|\n)*?<div\s+class=\"mutualitys\">(?:.|\n)*?<\/div>\s*<\/div>/gim;
var result = strPage.match(re);
if (null==result)
{
_ConsoleWrite(strPage);
_Debug("没匹配到文章");
return aryImage;
}
var strTitlePart = RegExp.$1;
var strContentPart = RegExp.$2;
var strDateTimePart = RegExp.$3;
var strTitle = _FormatFileName(strTitlePart);
if (strTitle == "")
{
_ConsoleWrite(strPage);
_Debug("文章名错误");
return aryImage;
}
var strImageDir = m_strDayDir + cstrFileSpliter + "img";
// 图片信息取得
var reImage = /(\<img\s*src\s*\=\s*\")(http\:\/\/[^\"]*?([^\/\"]*?))(?=\"[^\>].*?\/\>)/gim;
while(reImage.exec(strContentPart))
{
aryImage.push(new Image(strImageDir + cstrFileSpliter, RegExp.$2));
}
var strContent = strContentPart.replace(reImage, function($0, $1, $2, $3){return $1 + ".\\img\\" + _FormatFileName($2);});
// 同名文章出现的处理
var strFilePath = m_strDayDir + cstrFileSpliter + _FormatFileName(strTitle) +".htm";
var i = 0;
while(fso.FileExists(strFilePath) && i < 10000)
{
_ConsoleWrite("同名文章" + i.toString() + "篇出现");
strFilePath = m_strDayDir + cstrFileSpliter + _FormatFileName(strTitle) + "[" + i.toString() + "]" +".htm";
}
var text_file = fso.CreateTextFile(strFilePath, true);
text_file.Write("<head><title>" + strTitle + "</title><h3>" + strTitle + "</h3><h5>创建时间:" + strDateTimePart + "</h5><h6>URL:" + "<a href ='" + m_ArticleUrl + "'>" + m_ArticleUrl + "</a>" + "</h6></head><Body>" + _ReplaceErrSign(strContent) + "</Body>");
text_file.Close();
text_file = null;
return aryImage;
}
this.toXml = function()
{
var xmlArticle = objParserWriter.createElement("Article");
xmlArticle.setAttribute(cstrAttributeDir, m_strDayDir);
xmlArticle.setAttribute(cstrAttributeUrl, m_ArticleUrl);
return xmlArticle;
}
}
// 图片
function Image(strImageDir, strImageUrl)
{
var m_strImageName = _FormatFileName(strImageUrl);
var m_strImageDir = fso.GetFolder(strImageDir);
var m_strImageUrl = strImageUrl;
this.Prase = function()
{
if(fso.FileExists(m_strImageDir + cstrFileSpliter + m_strImageName))
{
_ConsoleWrite("文件已经存在");
return true;
}
// 图片内容取得
var strReponseBody = _GetReponseBody(m_strImageUrl);
if(strReponseBody == null)
{
_ConsoleWrite("该文件获取失败")
return true;
}
var m_stream = new ActiveXObject("Adodb.Stream");
m_stream.Type = 1;
m_stream.Open();
m_stream.Write(strReponseBody);
m_stream.SaveToFile(m_strImageDir + cstrFileSpliter + m_strImageName);
m_stream.Close();
return true;
}
}
function main()
{
// 连续连接错误
var bConnectErr = false;
if(aryUserName.length != 0)
{
for(var i = aryUserName.length - 1; i >= 0; i--)
{
var strUserDir = strBaseDir + cstrFileSpliter + aryUserName[i];
if(!_CreateFolder(strUserDir))
{
_Debug("创建文件夹[" + strUserDir + "]失败");
}
else
{
aryUser.push(new User(strUserDir + cstrFileSpliter, cstrBaseUrl + "/" + aryUserName[i]));
}
aryUserName.splice(i, 1);
}
}
if(aryUser.length != 0)
{
var iConnectErrCount = 0;
for(var i = aryUser.length - 1; i >= 0; i--)
{
var aryResult = aryUser[i].Prase();
if(aryResult)
{
aryUser.splice(i, 1);
aryMonth = aryMonth.concat(aryResult);
iConnectErrCount = 0;
}
else
{
bConnectErr = true;
iConnectErrCount++;
// 判断是否超过最大连续连接失败数
if(iConnectErrCount >= ciMaxConnectErrCount)
{
break;
}
}
}
}
else if(aryMonth.length != 0)
{
var iConnectErrCount = 0;
for(var i = aryMonth.length - 1; i >= 0; i--)
{
var aryResult = aryMonth[i].Prase();
if(aryResult)
{
aryMonth.splice(i, 1);
aryArticle = aryArticle.concat(aryResult);
iConnectErrCount = 0;
}
else
{
bConnectErr = true;
iConnectErrCount++;
// 判断是否超过最大连续连接失败数
if(iConnectErrCount >= ciMaxConnectErrCount)
{
break;
}
}
}
}
else if(aryArticle.length != 0)
{
var iConnectErrCount = 0;
for(var i = aryArticle.length - 1; i >= 0; i--)
{
var aryResult = aryArticle[i].Prase();
if(aryResult)
{
aryArticle.splice(i, 1);
aryImage = aryImage.concat(aryResult);
iConnectErrCount = 0;
}
else
{
bConnectErr = true;
iConnectErrCount++;
// 判断是否超过最大连续连接失败数
if(iConnectErrCount >= ciMaxConnectErrCount)
{
break;
}
}
}
}
else if(aryImage.length != 0)
{
var iConnectErrCount = 0;
for(var i = aryImage.length - 1; i >= 0; i--)
{
var aryResult = aryImage[i].Prase();
if(aryResult)
{
aryImage.splice(i, 1);
iConnectErrCount = 0;
}
else
{
bConnectErr = true;
//***CHANGE (2010/03/28) Start***
// 取消对图片和文章相同的处理部分处理(图片将不影响连接失败次数)
// iConnectErrCount++;
// // 判断是否超过最大连续连接失败数
// if(iConnectErrCount >= ciMaxConnectErrCount)
// {
// break;
// }
//***CHANGE (2010/03/28) End***
}
}
}
//***CHANGE (2010/03/28) Start***
// 取消对图片和文章相同的处理部分处理
// if(aryUser.length != 0 || aryMonth.length != 0 || aryArticle.length != 0 || aryImage.length != 0)
if(aryUser.length != 0 || aryMonth.length != 0 || aryArticle.length != 0)
//***CHANGE (2010/03/28) End***
{
if(bConnectErr)
{
iMainRunCount++;
if(iMainRunCount <= ciMaxMainRunCount)
{
_ConsoleWrite("延时" + iDelay/1000 + "秒");
WScript.Sleep(iDelay);
iDelay = iDelay * ciDelayStep;
// 最大延时限定
iDelay = iDelay > ciMaxDelay ? ciMaxDelay : iDelay;
main();
}
else
{
_ConsoleWrite("网络状况不能继续进行备份");
}
}
else
{
iMainRunCount = 0;
main();
}
}
//***ADD (2010/03/28) Start***
// 增加对图片的特殊处理部分(仅剩图片部分时)
else if(aryImage.length != 0)
{
// 倘若图片部分出错,则不重试
if(!bConnectErr)
{
main();
}
}
//***Change (2010/03/28) End***
}
//////////////
// 基本函数 //
//////////////
// 创建文件夹
function _CreateFolder(strFolderPath)
{
var bRet = true;
try
{
fso.CreateFolder(strFolderPath);
}
catch (e)
{
_ConsoleWrite(e.name + ":" + e.message);
bRet = false;
}
return bRet;
}
// 页面文本取得
function _GetPageText(strUrl)
{
var strRet = null;
var iRetry = 0;
_ConsoleWrite("url:" + strUrl);
while(strRet == null && iRetry <= ciMaxPageConnectErrCount)
{
iRetry++;
if(iRetry != 1)
{
_ConsoleWrite("重试:" + (iRetry - 1) + "次" );
}
try
{
m_http.open("GET", strUrl, true);
m_http.send("");
var iSendTime = ciSendTime;
while(m_http.readyState != 4)
{
WScript.Sleep(ciSendTestInterval);
iSendTime = iSendTime - ciSendTestInterval;
if(iSendTime < 0)
{
var objError = new Error(0, "连接超时");
objError.name = "自定义异常";
throw objError;
}
}
break;
}
catch(e)
{
_ConsoleWrite(e.name + ": " + e.message);
}
}
if(m_http.readyState == 4 && m_http.responseText != "")
{
if(m_http.responseText.search("<head><title>403 Forbidden</title></head>") != -1)
{
_ConsoleWrite("Forbidden Page");
iRetry = ciMaxPageConnectErrCount + 1;
}
else if(m_http.responseText.search("<title>CSDN | 错误</title>") != -1)
{
_ConsoleWrite("用户不存在或禁止访问");
iRetry = ciMaxPageConnectErrCount + 1;
}
else
{
strRet = m_http.responseText;
WScript.Sleep(ciUrlDelay);
}
}
return strRet;
}
// 图片取得函数
function _GetReponseBody(strUrl)
{
// 图片独有的连接超时限制数(与文章的区别开独立定义)(2010/02/28 )
var ciMaxPageConnectErrBodyCount = 1;
// 图片独有的连接超时时间(与文章的区别开独立定义)(2010/02/28 )
// 每次图片访问完毕之后延时(由于一般图片都不是在CSDN上而是很多服务器上,不会造成过快访问,所以可以考虑不必延时)
var ciPicDelay = 0;
var ciBodySendTime = 3000;
var strRet = null;
var iRetry = 0;
_ConsoleWrite("url:" + strUrl);
while(strRet == null && iRetry <= ciMaxPageConnectErrBodyCount)
{
iRetry++;
if(iRetry != 1)
{
_ConsoleWrite("重试:" + (iRetry - 1) + "次" );
}
try
{
m_http.open("GET", strUrl, true);
m_http.send("");
var iSendTime = ciBodySendTime;
while(m_http.readyState != 4)
{
WScript.Sleep(ciSendTestInterval);
iSendTime = iSendTime - ciSendTestInterval;
if(iSendTime < 0)
{
var objError = new Error(0, "连接超时");
objError.name = "自定义异常";
throw objError;
}
}
strRet = m_http.responseBody;
WScript.Sleep(ciPicDelay);
}
catch(e)
{
_ConsoleWrite(e.name + ": " + e.message);
}
}
return strRet;
}
function _FormatFileName(str)
{
return _ReplaceFileName(_FormatContent(_HtmlDecode(str)));
}
function _FormatContent(str)
{
return _ReplaceErrSign(_HtmlDecode(str));
}
//////////////
// 基础函数 //
//////////////
// log输出函数
function _WriteLog(str)
{
if (objLogFile != undefined)
{
objLogFile.WriteLine(str);
}
}
// 信息输出函数
function _Debug(str)
{
_WriteLog(str);
if(bAlert)
{
WshShell.popup(str);
}
}
function _ConsoleWrite(str)
{
_WriteLog(str);
WScript.StdOut.WriteLine(str);
}
// 替换系统unicode不能写入的字符
function _ReplaceErrSign(str)
{
var re = /[\u2318\uFEFF]/g;
return str.replace(re, "_");
}
//替换HTML字符
function _HtmlDecode(str)
{
var re = /%(?![0-9,A-F,a-f][0-9,A-F,a-f])/gm;
str = str.replace(re, "_")
str = decodeURIComponent(str);
re = /\&\#(\d+)\;/gm;
return str.replace(re, String.fromCharCode(parseInt(RegExp.$1)));
}
// 替换文件名中的特殊字符
function _ReplaceFileName(str)
{
var re = /[\/\\<>\*\:\?\"\t\|]/g;
return str.replace(re, "_");
}
var aryUser = new Array();
var aryMonth = new Array();
var aryArticle = new Array();
var aryImage = new Array();
// 主文件夹
var strBaseDir;
// log文件
var strLogFile;
// continue文件
var strContinueXmlFilePath;
// continue bat 文件
var strContinueBatFilePath;
// ContinueTask js 文件
var strContinueJSFilePath;
// 警告弹出
var bAlert = true;
// 加载初始信息
if (WScript.Arguments.length == 1 && fso.FileExists(WScript.Arguments(0)))
{
bAlert = false;
strContinueXmlFilePath = WScript.Arguments(0);
//***ADD (2010/06/27) Start***
// 增加续传功能,在文章特别多的时间会有用
var objParserReader = new ActiveXObject("Microsoft.XMLDOM");
objParserReader.async = false;
objParserReader.load(strContinueXmlFilePath);
var xmlContinue = objParserReader.documentElement;
for (var i = 0; i < xmlContinue.childNodes.length; i++)
{
var xmlObject = xmlContinue.childNodes[i];
if (xmlObject.baseName == cstrBaseNameUsers)
{
for (var j = 0; j < xmlObject.childNodes.length; j++)
{
var xmlUser = xmlObject.childNodes[j];
aryUser.push(new User(xmlUser.getAttribute(cstrAttributeDir), xmlUser.getAttribute(cstrAttributeUrl)));
}
}
if (xmlObject.baseName == cstrBaseNameMonths)
{
for (var j = 0; j < xmlObject.childNodes.length; j++)
{
var xmlMonth = xmlObject.childNodes[j];
aryMonth.push(new Month(xmlMonth.getAttribute(cstrAttributeDir), xmlMonth.getAttribute(cstrAttributeUrl)));
}
}
if (xmlObject.baseName == cstrBaseNameArticles)
{
for (var j = 0; j < xmlObject.childNodes.length; j++)
{
var xmlArticle = xmlObject.childNodes[j];
aryArticle.push(new Article(xmlArticle.getAttribute(cstrAttributeDir), xmlArticle.getAttribute(cstrAttributeUrl)));
}
}
if (xmlObject.baseName == cstrBaseNameLog)
{
strLogFile = xmlObject.getAttribute(cstrAttributeDir);
}
if (xmlObject.baseName == cstrBaseNameContinueBat)
{
strContinueBatFilePath = xmlObject.getAttribute(cstrAttributeDir);
}
if (xmlObject.baseName == cstrBaseNameContinueJS)
{
strContinueJSFilePath = xmlObject.getAttribute(cstrAttributeDir);
}
if (xmlObject.baseName == cstrBaseNameBaseDir)
{
strBaseDir = xmlObject.getAttribute(cstrAttributeDir);
}
}
//***ADD (2010/06/27) End***
}
else
{
strBaseDir = cstrRootDir + cstrFileSpliter + strDateTime;
_CreateFolder(strBaseDir);
strLogFile = strBaseDir + cstrFileSpliter + cstrLogFile;
strContinueBatFilePath = strBaseDir + cstrFileSpliter + cstrContinueBatFile;
strContinueXmlFilePath = strBaseDir + cstrFileSpliter + cstrContinueXmlFile;
strContinueJSFilePath = strBaseDir + cstrFileSpliter + cstrContinueJSFile;
// 获取用户名
WScript.StdOut.WriteLine("用户名(User1[,User2]):\n");
var aryInput = WScript.StdIn.ReadLine().split(",");
for(var i = 0; i < aryInput.length; i++)
{
var strInput = aryInput[i].Trim();
if(strInput != "")
{
aryUserName.push(strInput);
}
}
}
var objLogFile = fso.OpenTextFile(strLogFile, 8 , true);
// 出错重试计数
var iMainRunCount = 0;
// 执行
main();
//***ADD (2010/06/27) Start***
// 增加续传功能,在文章特别多的时间会有用
if (aryUser.length != 0 || aryMonth.length != 0 || aryArticle.length != 0)
{
var xmlContinue = objParserWriter.createElement(cstrBaseNameContinue);
var xmlLog = objParserWriter.createElement(cstrBaseNameLog);
xmlLog.setAttribute(cstrAttributeDir, strLogFile);
xmlContinue.appendChild(xmlLog);
var xmlContinueBat = objParserWriter.createElement(cstrBaseNameContinueBat);
xmlContinueBat.setAttribute(cstrAttributeDir, strContinueBatFilePath);
xmlContinue.appendChild(xmlContinueBat);
var xmlContinueJS = objParserWriter.createElement(cstrBaseNameContinueJS);
xmlContinueJS.setAttribute(cstrAttributeDir, strContinueJSFilePath);
xmlContinue.appendChild(xmlContinueJS);
var xmlBaseDir = objParserWriter.createElement(cstrBaseNameBaseDir);
xmlBaseDir.setAttribute(cstrAttributeDir, strBaseDir);
xmlContinue.appendChild(xmlBaseDir);
if (aryUser.length != 0)
{
var xmlAryUser = objParserWriter.createElement(cstrBaseNameUsers);
for(var i = 0; i < aryUser.length; i++)
{
xmlAryUser.appendChild(aryUser[i].toXml());
}
xmlContinue.appendChild(xmlAryUser);
}
if (aryMonth.length != 0)
{
var xmlAryMonth = objParserWriter.createElement(cstrBaseNameMonths);
for(var i = 0; i < aryMonth.length; i++)
{
xmlAryMonth.appendChild(aryMonth[i].toXml());
}
xmlContinue.appendChild(xmlAryMonth);
}
if (aryArticle.length != 0)
{
var xmlAryArticle = objParserWriter.createElement(cstrBaseNameArticles);
for(var i = 0; i < aryArticle.length; i++)
{
xmlAryArticle.appendChild(aryArticle[i].toXml());
}
xmlContinue.appendChild(xmlAryArticle);
}
objParserWriter.appendChild(xmlContinue);
objParserWriter.save(strContinueXmlFilePath);
var objContinueBatFile = fso.OpenTextFile(strContinueBatFilePath, 2 , true);
objContinueBatFile.WriteLine("cscript.exe \"" + WScript.ScriptFullName + "\" \"" + strContinueXmlFilePath + "\"");
objContinueBatFile.WriteLine("pause");
objContinueBatFile.Close();
var objContinueJSFile = fso.OpenTextFile(strContinueJSFilePath, 2 , true);
objContinueJSFile.WriteLine("WScript.Sleep(3600000);");
objContinueJSFile.WriteLine("var WshShell = WScript.CreateObject(\"WScript.Shell\");");
objContinueJSFile.WriteLine("WshShell.Run(\"cscript.exe \\\"" + WScript.ScriptFullName.replace(/\\/g, "/") + "\\\" \\\"" + strContinueXmlFilePath.replace(/\\/g, "/") + "\\\"\", 0);");
objContinueJSFile.Close();
WshShell.Run(strContinueJSFilePath);
_ConsoleWrite("自动备份暂停");
}
else
{
if (fso.FileExists(strContinueBatFilePath))
{
fso.DeleteFile(strContinueBatFilePath);
}
if (fso.FileExists(strContinueJSFilePath))
{
fso.DeleteFile(strContinueJSFilePath);
}
if (fso.FileExists(strContinueXmlFilePath))
{
fso.DeleteFile(strContinueXmlFilePath);
}
//***ADD (2010/06/27) End***
_ConsoleWrite("自动备份结束");
objLogFile.Close();
var objBaseDir = fso.GetFolder(strBaseDir);
// 两种命名方式都用了
try
{
objBaseDir.Name = objBaseDir.Name + "(完成)";
}
catch(e)
{
// *******************************************
// 重命名的另一种方式,上面那种有时无效(在手动执行任务时间会无效吧,以为它的启动目录就是要命名的目录)
// 重命名前必须切换工作目录
WshShell.CurrentDirectory = objBaseDir.ParentFolder.Path;
// 使用cmd /c 可以直接执行指令
WshShell.Run("cmd /C ren \"" + objBaseDir.Path + "\" \"" + objBaseDir.Name + "(完成)" + "\"", 0);
// *******************************************
}
}