using System; using System.Text; using System.Text.RegularExpressions; using System.Net; using System.IO; using System.IO.Compression; namespace HuaTong.General.Utility { /// <summary> /// html内容处理 /// </summary> public static class HtmlHelper { /// <summary> /// 纯文本转HTML /// </summary> public static string TextToHTML(string value) { return TextToHTML(value, false, false); } /// <summary> /// 纯文本转HTML /// </summary> public static string TextToHTML(string value, bool isHasEmptyRow) { return TextToHTML(value, isHasEmptyRow, false); } /// <summary> /// 纯文本转HTML /// </summary> public static string TextToHTML(string value, bool isHasEmptyRow, bool isOutBr) { StringBuilder sr = new StringBuilder(); if (isHasEmptyRow) { sr.Append(StringHelper.RemoveEmptyRow(value)); } else { sr.Append(value); } sr.Replace("&", "&"); sr.Replace(">", ">"); sr.Replace("<", "<"); sr.Replace(" ", " "); sr.Replace(""", """); sr.Replace("©", "©"); sr.Replace("®", "®"); sr.Replace("×", "×"); sr.Replace("÷", "÷"); if (isOutBr) { sr.Replace(" ", "<br>"); sr.Replace(" ", "<br>"); sr.Replace(" ", "<br>"); } else { sr.Insert(0, "<p>"); sr.Replace(" ", " "); sr.Replace(" ", "</p> <p>"); sr.Append("</p>"); } return sr.ToString(); } /// <summary> /// HTML输出为JS /// </summary> public static string HtmlToScript(string value) { StringBuilder sr = new StringBuilder(); sr.Append(value); sr.Replace("\", "\\"); sr.Replace("/", "\/"); sr.Replace("'", "\'"); sr.Replace(""", "\""); string[] strs = sr.ToString().Split(new char[] { ' ', ' ' }, StringSplitOptions.RemoveEmptyEntries); return String.Format("document.writeln("{0}");", String.Join(""); document.writeln("", strs), StringSplitOptions.RemoveEmptyEntries); } /// <summary> /// JS脚本输出字符串 /// </summary> /// <returns></returns> public static string ScriptStringFormat(string value) { value = value.Replace("\", "\\"); value = value.Replace("'", "\'"); value = value.Replace(""", "\""); return value; } /// <summary> /// HTML转纯文本 /// </summary> public static string HtmlToText(string value) { string regexstr = @"(&(#)?.+;)|(<[^>]*>)"; return Regex.Replace(value, regexstr, "", RegexOptions.IgnoreCase); } /// <summary> /// 过滤HTML中的不安全标签 /// </summary> public static string HtmlFilter(string value) { value = Regex.Replace(value, @"(<|s+)o([a-z]+s?=)", "$1$2", RegexOptions.IgnoreCase); value = Regex.Replace(value, @"(select|textarea|input|link|iframe|frameset|frame|form|applet|embedlayer|ilayer|meta|object|script|behavior|style)([s|:|>])+", "$1.$2", RegexOptions.IgnoreCase); value = Regex.Replace(value, @"javascript|eval", "", RegexOptions.IgnoreCase); return value; } /// <summary> /// 获取源代码 /// </summary> public static string GetHtmlContent(string url, string encoding) { HttpWebRequest request = null; HttpWebResponse response = null; StreamReader reader = null; try { request = (HttpWebRequest)WebRequest.Create(url); request.Timeout = 10000; request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse(); if (response.StatusCode == HttpStatusCode.OK) { if (response.ContentEncoding != null && StringHelper.IsEqualString(response.ContentEncoding,"gzip")) reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), Encoding.GetEncoding(encoding)); else reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(encoding)); string html = reader.ReadToEnd(); return html; } } catch { } finally { if (response != null) { response.Close(); response = null; } if (reader != null) reader.Close(); if (request != null) request = null; } return string.Empty; } /// <summary> /// 获取HTML网页的编码 /// </summary> /// <returns></returns> public static string GetEncoding(string url) { string charset = string.Empty; HttpWebRequest request = null; HttpWebResponse response = null; StreamReader reader = null; try { request = (HttpWebRequest)WebRequest.Create(url); request.Timeout = 20000; request.AllowAutoRedirect = false; response = (HttpWebResponse)request.GetResponse(); if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024) { if (response.ContentEncoding != null && StringHelper.IsEqualString(response.ContentEncoding, "gzip")) reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress)); else reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII); string html = reader.ReadToEnd(); Regex reg_charset = new Regex(@"charsets*=s*(?<charset>[^""]*)"); if (reg_charset.IsMatch(html)) { return reg_charset.Match(html).Groups["charset"].Value; } else if (response.CharacterSet != string.Empty) { return response.CharacterSet; } else return Encoding.Default.BodyName; } } catch { } finally { if (response != null) { response.Close(); response = null; } if (reader != null) reader.Close(); if (request != null) request = null; } return Encoding.Default.BodyName; } } }