使用C#转移WORDPRESS博客到为知笔记或EVERNOTE
注:文中的方法非常简单,使用MySQL导出表到XML,发送邮件到你的为知/Evernote邮箱账户即可,唯一目前没找到破的是SMTP常随机无响应.. 而且象GMAIL的SMTP有发送次数限制(一天一千左右吧,所以如果有超过的话可能需要更换账号再试,当然你也可以改良下,做个数组,遇到配额错误可以提取下一个SMTP信息),优点是利用邮件的方式可以将博客中的图片也能导入到笔记中(而不是用的图片原链接,是直接放进笔记)
日志导入Wiz的结果:
一、目的
很简单,我担心博客放在网上有一天会不见了,所以一直想存到本地,博客太多,手动已不可能
- 我这些年使用wordpress有上千篇博客,其中有8成都是私有,使用现在的一些读取FEED方式导出的方式不能读取私有和保护博客,Wordpress也有导出插件,我没有试太多,有一个导出xml的工具采用流式试过十多次没有一次是完整导出,有时导出100来K,有时几M,最后放弃使用WP插件导出。
- 数据源:使用MySQL导出XML。
首先原因是空间提供商不提供MySQL远程连接,(如果你能导出SQL在本地建MySQL再导入,就不在这个话题范围内)。我原本想导出为SQL,再转换为MSSQL,可在网上找的所有converter都是必须登陆远程MySQL服务器,这条路径也行不通。导出的CSV为乱码,试过UTF8, GB2312都不行,这条路径也行不通。最后发现只有导出XML时没有乱码。这就简单多了。 - 使用C#读取XML时,有几类重要的信息是个人觉得必需的
- 博客表 wp_posts
- 类别(category / tag等) 表 wp_term_taxonomy, wp_terms wp_term_relationships
SQL:
-- 列出全部日志 --
create view v_post as
SELECT p.*, tax.term_taxonomy_id,tax.term_id,category.name, tax.count
FROM
wp_term_relationships relation,
wp_terms category,
wp_term_taxonomy tax,
wp_posts p
WHERE
category.term_id = tax.term_id and
tax.term_taxonomy_id = relation.term_taxonomy_id and
relation.object_id = p.id
二、操作步骤
- 登陆你空间的phpAdmin,选中日志数据库后,选择Export,框选的是需要注意的,导出时不要选择导出TABLE/VIEW等SQL语句。UTF-8必选否则会有乱码
得到导出的XML将作为我们的数据源。请一定确保下载下来的XML是有效的(我刚试过,这一种也有可能会下载不完整,无解 /摊手) - 跑日志的运行结果
- 代码
View Codeusing
System;
using
System.Linq;
using
System.Net.Mail;
using
System.Text;
using
System.Text.RegularExpressions;
using
System.Threading.Tasks;
using
System.Xml.Linq;
using
System.Xml.XPath;
namespace
WordpressExport
{
class
Program
{
static
SmtpClient smtpClient;
static
MailMessage mailMessage;
static
IOrderedEnumerable<post> list;
static
bool
smtpConnected;
static
bool
triedAfterEx =
false
;
readonly
static
string
CONFIG_smtp_add = System.Configuration.ConfigurationSettings.AppSettings[
"smtp_add"
];
readonly
static
string
CONFIG_smtp_acct_name = System.Configuration.ConfigurationSettings.AppSettings[
"smtp_acct_name"
];
readonly
static
string
CONFIG_smtp_acct_pwd = System.Configuration.ConfigurationSettings.AppSettings[
"smtp_acct_pwd"
];
readonly
static
string
CONFIG_xml_path = System.Configuration.ConfigurationSettings.AppSettings[
"xml_path"
];
readonly
static
string
CONFIG_evernote_folder = System.Configuration.ConfigurationSettings.AppSettings[
"evernote_folder"
];
readonly
static
string
CONFIG_post_scope_start_date = System.Configuration.ConfigurationSettings.AppSettings[
"post_scope_start_date"
];
readonly
static
string
CONFIG_post_scope_end_date = System.Configuration.ConfigurationSettings.AppSettings[
"post_scope_end_date"
];
readonly
static
string
CONFIG_notebook_email = System.Configuration.ConfigurationSettings.AppSettings[
"notebook_email"
];
readonly
static
string
CONFIG_blog_addr = System.Configuration.ConfigurationSettings.AppSettings[
"blog_addr"
];
static
void
SendMail(post post)
{
bool
isSuccess =
true
;
if
(post ==
null
)
return
;
DateTime post_date;
if
(!DateTime.TryParse(post.post_date,
out
post_date))
{
post_date = System.DateTime.MinValue;
}
string
str_post_date = post_date == System.DateTime.MinValue ?
""
: post_date.ToString(
"yyyy-MM-dd"
);
mailMessage.SubjectEncoding = Encoding.UTF8;
mailMessage.Subject =
string
.Format(
"[{0}] {1} {2}"
, str_post_date, post.post_title, CONFIG_evernote_folder);
//主題
mailMessage.Body =
"<b>创建时间:</b>"
+ post.post_date +
"<br/>"
;
//內容
mailMessage.Body +=
"<b>原目录或tag</b>:"
+ post.post_tagcat +
"<br/>"
;
//內容
mailMessage.Body +=
string
.Format(
"<b>原文见</b>:<a href={0}?p={1}>{0}?p={1}</a></a><br/><br/><br/>"
, CONFIG_blog_addr, post.ID);
//內容
mailMessage.Body += post.post_content;
try
{
mailMessage.BodyEncoding = Encoding.UTF8;
//正文編碼
mailMessage.Priority = MailPriority.High;
//優先級
mailMessage.IsBodyHtml =
true
;
Regex reg =
new
Regex(
@"\n"
);
mailMessage.Body = reg.Replace(mailMessage.Body,
"<br/>"
);
Console.WriteLine(System.DateTime.Now +
" sending mail... id = "
+ post.ID +
" "
+ post.post_title);
// 你也可以使用异步发送,不过会导致发到邮箱的时间很乱,我们还是希望导入到笔记后的笔记自然顺序(即ID)是按时间顺序的
// smtpClient.Send(mailMessage);// 發送郵件
}
catch
(SmtpException ex)
{
Console.WriteLine(
"failed in SMTP connection, try again..."
);
// System.IO.IOException: Unable to write data to the transport connection: An established connection was aborted by the
// software in your host machine.
// ---> System.Net.Sockets.SocketException: An established connection was aborted by the software in your host machine
if
(triedAfterEx)
{
isSuccess =
false
;
Console.WriteLine(ex.Message);
}
// again, create another smtp instance for reconnecting SMTP
// Parallel.Invoke(() => connectSmtp(), () => SendMail (post));
isSuccess =
false
;
Console.WriteLine(ex.Message);
triedAfterEx =
true
;
}
catch
(Exception ex2)
{
isSuccess =
false
;
Console.WriteLine(ex2.Message);
}
Console.WriteLine(System.DateTime.Now.ToString() + (isSuccess ?
" completed"
:
" completed with error(s)"
));
}
static
void
Main(
string
[] args)
{
smtpConnected =
false
;
Console.WriteLine(System.DateTime.Now +
" start..."
);
Parallel.Invoke(() => connectSmtp(), () => HandleWPExport());
if
(list.Count() == 0)
{
throw
new
Exception(
"ooops.."
);
}
int
cnt = 0;
while
(1 == 1)
{
if
(smtpConnected =
true
|| cnt > 10000)
break
;
cnt++;
}
Console.WriteLine(cnt);
foreach
(var p
in
list)
{
triedAfterEx =
false
;
SendMail(p);
}
}
private
static
void
connectSmtp()
{
smtpClient =
new
SmtpClient();
smtpClient.DeliveryMethod = SmtpDeliveryMethod.Network;
//指定電子郵件發送方式
smtpClient.Host =
"smtp.gmail.com"
;
//指定SMTP服務器
smtpClient.EnableSsl =
true
;
smtpClient.Credentials =
new
System.Net.NetworkCredential(CONFIG_smtp_acct_name, CONFIG_smtp_acct_pwd);
smtpClient.Timeout = 100000;
}
private
static
void
HandleWPExport()
{
Console.WriteLine(System.DateTime.Now +
" starting to read wp_posts.xml..."
);
XDocument xmlDoc = XDocument.Load(CONFIG_xml_path);
var queryTax = (from tax
in
xmlDoc.XPathSelectElements(
@".//table[@name='wp_term_taxonomy']"
)
where
tax.XPathSelectElement(
"column[@name='taxonomy']"
).Value.Trim(
"\n"
.ToCharArray()) == TaxonomyEnum.category.ToString() ||
tax.XPathSelectElement(
"column[@name='taxonomy']"
).Value.Trim(
"\n"
.ToCharArray()) == TaxonomyEnum.link_category.ToString() ||
tax.XPathSelectElement(
"column[@name='taxonomy']"
).Value.Trim(
"\n"
.ToCharArray()) == TaxonomyEnum.post_tag.ToString()
select
new
{
term_id = tax.XPathSelectElement(
"column[@name='term_id']"
).Value.Trim(
"\n"
.ToCharArray()),
tax_id = tax.XPathSelectElement(
"column[@name='term_taxonomy_id']"
).Value.Trim(
"\n"
.ToCharArray())
}).ToList();
var queryCat = (from cat
in
xmlDoc.XPathSelectElements(
@".//table[@name='wp_terms']"
)
select
new
{
term_id = cat.XPathSelectElement(
"column[@name='term_id']"
).Value.Trim(
"\n"
.ToCharArray()),
name = cat.XPathSelectElement(
"column[@name='name']"
).Value.Trim(
"\n"
.ToCharArray()),
}).ToList();
var queryRel = (from rel
in
xmlDoc.XPathSelectElements(
@".//table[@name='wp_term_relationships']"
)
select
new
{
object_id = rel.XPathSelectElement(
"column[@name='object_id']"
).Value.Trim(
"\n"
.ToCharArray()),
tax_id = rel.XPathSelectElement(
"column[@name='term_taxonomy_id']"
).Value.Trim(
"\n"
.ToCharArray()),
}).ToList();
Console.WriteLine(System.DateTime.Now +
" continuing ... "
);
var queryTagCat = (from tax
in
queryTax
from cat
in
queryCat
from rel
in
queryRel
where cat.term_id == tax.term_id && tax.tax_id == rel.tax_id
select
new
{
name = cat.name,
id = rel.object_id
}).ToList();
var query = from p
in
xmlDoc.XPathSelectElements(
@".//table[@name='wp_posts' ]"
)
where p.XPathSelectElement(
"column[@name='post_type']"
).Value.Trim(
"\n"
.ToCharArray()) ==
"post"
// there're two types - post, attachment, we don't want attachment type of posts
select
new
post
{
ID = p.XPathSelectElement(
"column[@name='ID']"
).Value.Trim(
"\n"
.ToCharArray()),
post_author = p.XPathSelectElement(
"column[@name='post_author']"
).Value.Trim(
"\n"
.ToCharArray()),
post_date = p.XPathSelectElement(
"column[@name='post_date']"
).Value.Trim(
"\n"
.ToCharArray()),
post_date_gmt = p.XPathSelectElement(
"column[@name='post_date_gmt']"
).Value.Trim(
"\n"
.ToCharArray()),
post_content = p.XPathSelectElement(
"column[@name='post_content']"
).Value.Trim(
"\n"
.ToCharArray()),
post_title = p.XPathSelectElement(
"column[@name='post_title']"
).Value.Trim(
"\n"
.ToCharArray()),
post_excerpt = p.XPathSelectElement(
"column[@name='post_excerpt']"
).Value.Trim(
"\n"
.ToCharArray()),
post_status = p.XPathSelectElement(
"column[@name='post_status']"
).Value.Trim(
"\n"
.ToCharArray()),
comment_status = p.XPathSelectElement(
"column[@name='comment_status']"
).Value.Trim(
"\n"
.ToCharArray()),
ping_status = p.XPathSelectElement(
"column[@name='ping_status']"
).Value.Trim(
"\n"
.ToCharArray()),
post_password = p.XPathSelectElement(
"column[@name='post_password']"
).Value.Trim(
"\n"
.ToCharArray()),
post_name = p.XPathSelectElement(
"column[@name='post_name']"
).Value.Trim(
"\n"
.ToCharArray()),
to_ping = p.XPathSelectElement(
"column[@name='to_ping']"
).Value.Trim(
"\n"
.ToCharArray()),
pinged = p.XPathSelectElement(
"column[@name='pinged']"
).Value.Trim(
"\n"
.ToCharArray()),
post_modified = p.XPathSelectElement(
"column[@name='post_modified']"
).Value.Trim(
"\n"
.ToCharArray()),
post_modified_gmt = p.XPathSelectElement(
"column[@name='post_modified_gmt']"
).Value.Trim(
"\n"
.ToCharArray()),
post_content_filtered = p.XPathSelectElement(
"column[@name='post_content_filtered']"
).Value.Trim(
"\n"
.ToCharArray()),
post_parent = p.XPathSelectElement(
"column[@name='post_parent']"
).Value.Trim(
"\n"
.ToCharArray()),
guid = p.XPathSelectElement(
"column[@name='guid']"
).Value.Trim(
"\n"
.ToCharArray()),
menu_order = p.XPathSelectElement(
"column[@name='menu_order']"
).Value.Trim(
"\n"
.ToCharArray()),
post_type = p.XPathSelectElement(
"column[@name='post_type']"
).Value.Trim(
"\n"
.ToCharArray()),
post_mime_type = p.XPathSelectElement(
"column[@name='post_mime_type']"
).Value.Trim(
"\n"
.ToCharArray()),
comment_count = p.XPathSelectElement(
"column[@name='comment_count']"
).Value.Trim(
"\n"
.ToCharArray()),
post_tagcat =
string
.Join(
" "
, (from t
in
queryTagCat where t.id == p.XPathSelectElement(
"column[@name='ID']"
).Value.Trim(
"\n"
.ToCharArray()) select t.name).ToList())
};
Console.WriteLine(System.DateTime.Now +
" done with reading the xml... "
);
// PredicateBuilder
if
(!
string
.IsNullOrEmpty(CONFIG_post_scope_start_date))
query = query.Where(o => Convert.ToDateTime(o.post_date) >= DateTime.Parse(CONFIG_post_scope_start_date));
if
(!
string
.IsNullOrEmpty(CONFIG_post_scope_end_date))
query = query.Where(o => Convert.ToDateTime(o.post_date) <= DateTime.Parse(CONFIG_post_scope_end_date));
list = query.ToList().OrderBy(o => o.post_date);
}
}
}
DTO类using
System;
using
System.Collections.Generic;
using
System.Linq;
using
System.Text;
using
System.Xml.Serialization;
namespace
WordpressExport
{
[Serializable]
public
class
column
{
public
string
name {
get
;
set
; }
public
string
text {
get
;
set
; }
}
[Serializable]
public
class
database
{
[XmlElement(ElementName =
"database"
)]
public
List<table> tables {
get
;
set
; }
}
[Serializable]
public
class
table
{
public
string
name {
get
;
set
; }
public
column column {
get
;
set
; }
}
[Serializable]
public
class
post
{
public
string
ID {
get
;
set
; }
public
string
post_author {
get
;
set
; }
public
string
post_date {
get
;
set
; }
public
string
post_date_gmt {
get
;
set
; }
public
string
post_content {
get
;
set
; }
public
string
post_title {
get
;
set
; }
public
string
post_excerpt {
get
;
set
; }
public
string
post_status {
get
;
set
; }
public
string
comment_status {
get
;
set
; }
public
string
ping_status {
get
;
set
; }
public
string
post_password {
get
;
set
; }
public
string
post_name {
get
;
set
; }
public
string
to_ping {
get
;
set
; }
public
string
pinged {
get
;
set
; }
public
string
post_modified {
get
;
set
; }
public
string
post_modified_gmt {
get
;
set
; }
public
string
post_content_filtered {
get
;
set
; }
public
string
post_parent {
get
;
set
; }
public
string
guid {
get
;
set
; }
public
string
menu_order {
get
;
set
; }
public
string
post_type {
get
;
set
; }
public
string
post_mime_type {
get
;
set
; }
public
string
comment_count {
get
;
set
; }
public
string
post_tagcat {
get
;
set
; }
}
public
enum
TaxonomyEnum
{
category,
link_category,
post_format,
post_tag,
series
}
}
<?
xml
version
=
"1.0"
?>
<
configuration
>
<
startup
><
supportedRuntime
version
=
"v4.0"
sku
=
".NETFramework,Version=v4.0"
/></
startup
>
<
appSettings
>
<
add
key
=
"smtp_addr"
value
=
"smtp.gmail.com"
/>
<
add
key
=
"smtp_acct_name"
value
=
"@gmail.com"
/>
<
add
key
=
"smtp_acct_pwd"
value
=
""
/>
<
add
key
=
"evernote_folder"
value
=
"@folder..."
/>
<
add
key
=
"xml_path"
value
=
"xxxxx\wp_posts.xml"
/>
<
add
key
=
"notebook_email"
value
=
"xxxx@mywiz.cn"
/>
<
add
key
=
"post_scope_start_date"
value
=
"2013-04-11 22:25:50"
/>
<
add
key
=
"post_scope_end_date"
value
=
""
/>
</
appSettings
>
</
configuration
>
三、可改善的地方
- Mail body顶部,我加入了日志详情
- 我没有将日志放进tag,你可以改一下代码使之subject上加入tag (for evernote, 加入#tag1 tag2等,自行查下规则)
- 唯一目前没找到破的是SMTP常随机无响应.. 你如果找到可以破的(比如CATCH到IOExeption重连的),麻烦告诉我
- GMAIL的SMTP有发送次数限制(一天一千左右吧,所以如果有超过的话可能需要更换账号再试,当然你也可以改良下,做个数组,遇到配额错误可以提取下一个SMTP信息)
四、关于为知WIZ
- 个人觉得Wiz是个还有很多提升空间的、自定义性比较强的国产笔记类软件,记得用Wiz的话一定要注意给你不愿意别人看到的条目加个密,不然按Wiz的目录式的HTML文件在电脑中完全没有秘密可言,知道目录的无需登陆就能打加所有日志。
- 没有Wiz账号的,在这顺便递送个邀请码吧,6d485186 (http://www.wiz.cn/i/6d485186),据说用了注册后第一个月有VIP试用,当然你不用也行。
- 比较起Evernote来说,好处就是日志的空间比较大,插件化让它比较有趣味。 比如它可以导出CHM,可以把以往的日志全部转成一个漂亮的CHM。它的搜索体验我觉得比Evernote更好,全文检索的index使得搜索特别快。再多的就不介绍了,敢兴趣的可以自行了解。
愿一路奔跑不退缩,到目前一直从事.Net的B/S,C/S企业应用研发