using System.Text.RegularExpressions; //正则
string strHtml = " </ul> <div id=\"photo-tags\"> <h3 id=\"tags-title\">Tags in this photo: </h3>
<ul id='tags'>
<li> <a href='/user/669345/tags/Belgium'>Belgium </a> </li>
<li> <a href='/user/669345/tags/Belgien'>Belgien </a> </li>
<li> <a href='/user/669345/tags/Urlaub'>Urlaub </a> </li>
<li> <a href='/user/669345/tags/Holidays'>Holidays </a> </li>
<li> <a href='/user/669345/tags/Vakanties'>Vakanties </a> </li>
<li> <a href='/user/669345/tags/CenterParcs'>CenterParcs </a> </li>
<li> <a href='/user/669345/tags/10 000 000'>10 000 000 </a> </li>
</ul> </div> <div id=\"photo-info\">";
TextBox1.Text = "" + strHtml + ""; Regex re = new Regex("(?<=<li> <a href='/user/[^>]*/tags/[^>]*>).*?(?= </a> </li>)"); if (re.IsMatch(strHtml)) { MatchCollection mc = re.Matches(strHtml); foreach (Match ma in mc) { for (int i = 0; i < ma.Groups.Count; i++) { TextBox2.Text += ma.Groups[i].Value + " "; } TextBox2.Text += "\n"; } } else { TextBox2.Text = "no"; }
结果:
Belgium
Belgien
Urlaub
Holidays
Vakanties
CenterParcs
10 000 000
/// <summary>
/// 从文章内容中获取第一张图片,用作文章缩略图
/// </summary>
/// <param name="articleContent"></param>
/// <returns></returns>
public static string GetImageUrlFromArticle(string articleContent)
{
Regex r = new Regex(@"<IMG[^>]+src=\s*(?:'(?<src>[^']+)'|""(?<src>[^""]+)""|(?<src>[^>\s]+))\s*[^>]*>", RegexOptions.IgnoreCase);
MatchCollection mc = r.Matches(articleContent);
if (mc.Count != 0)
{
return mc[0].Groups["src"].Value.ToLower();
}
else
{
return "";
}
}
/// 从文章内容中获取第一张图片,用作文章缩略图
/// </summary>
/// <param name="articleContent"></param>
/// <returns></returns>
public static string GetImageUrlFromArticle(string articleContent)
{
Regex r = new Regex(@"<IMG[^>]+src=\s*(?:'(?<src>[^']+)'|""(?<src>[^""]+)""|(?<src>[^>\s]+))\s*[^>]*>", RegexOptions.IgnoreCase);
MatchCollection mc = r.Matches(articleContent);
if (mc.Count != 0)
{
return mc[0].Groups["src"].Value.ToLower();
}
else
{
return "";
}
}