• c# html内容处理类


    using System;
    using System.Text;
    using System.Text.RegularExpressions;
    using System.Net;
    using System.IO;
    using System.IO.Compression;
    
    namespace HuaTong.General.Utility
    {
        /// <summary>
        /// html内容处理
        /// </summary>
        public static class HtmlHelper
        {
            /// <summary>
            /// 纯文本转HTML
            /// </summary>
            public static string TextToHTML(string value)
            {
                return TextToHTML(value, false, false);
            }
            /// <summary>
            /// 纯文本转HTML
            /// </summary>
            public static string TextToHTML(string value, bool isHasEmptyRow)
            {
                return TextToHTML(value, isHasEmptyRow, false);
            }
            /// <summary>
            /// 纯文本转HTML
            /// </summary>
            public static string TextToHTML(string value, bool isHasEmptyRow, bool isOutBr)
            {
                StringBuilder sr = new StringBuilder();
                if (isHasEmptyRow)
                {
                    sr.Append(StringHelper.RemoveEmptyRow(value));
                }
                else
                {
                    sr.Append(value);
                }
    
                sr.Replace("&", "&amp;");
                sr.Replace(">", "&gt;");
                sr.Replace("<", "&lt;");
                sr.Replace(" ", "&nbsp;");
                sr.Replace(""", "&quot;");
                sr.Replace("©", "&copy;");
                sr.Replace("®", "&reg;");
                sr.Replace("×", "&times;");
                sr.Replace("÷", "&divide;");
                if (isOutBr)
                {
                    sr.Replace("
    ", "<br>");
                    sr.Replace("
    ", "<br>");
                    sr.Replace("
    ", "<br>");
                }
                else
                {
                    sr.Insert(0, "<p>");
                    sr.Replace("
    ", "
    ");
                    sr.Replace("
    ", "</p>
    <p>");
                    sr.Append("</p>");
                }
    
                return sr.ToString();
            }
    
            /// <summary>
            /// HTML输出为JS
            /// </summary>
            public static string HtmlToScript(string value)
            {
                StringBuilder sr = new StringBuilder();
                sr.Append(value);
                sr.Replace("\", "\\");
                sr.Replace("/", "\/");
                sr.Replace("'", "\'");
                sr.Replace(""", "\"");
                string[] strs = sr.ToString().Split(new char[] { '
    ', '
    ' },
                    StringSplitOptions.RemoveEmptyEntries);
    
                return String.Format("document.writeln("{0}");",
                    String.Join("");
    document.writeln("", strs),
                    StringSplitOptions.RemoveEmptyEntries);
            }
    
            /// <summary>
            /// JS脚本输出字符串
            /// </summary>
    
            /// <returns></returns>
            public static string ScriptStringFormat(string value)
            {
                value = value.Replace("\", "\\");
                value = value.Replace("'", "\'");
                value = value.Replace(""", "\"");
                return value;
            }
    
            /// <summary>
            /// HTML转纯文本
            /// </summary>
            public static string HtmlToText(string value)
            {
                string regexstr = @"(&(#)?.+;)|(<[^>]*>)";
                return Regex.Replace(value, regexstr, "", RegexOptions.IgnoreCase);
            }
    
            /// <summary>
            /// 过滤HTML中的不安全标签
            /// </summary>
            public static string HtmlFilter(string value)
            {
                value = Regex.Replace(value, @"(<|s+)o([a-z]+s?=)", "$1$2", RegexOptions.IgnoreCase);
                value = Regex.Replace(value, @"(select|textarea|input|link|iframe|frameset|frame|form|applet|embedlayer|ilayer|meta|object|script|behavior|style)([s|:|>])+", "$1.$2", RegexOptions.IgnoreCase);
                value = Regex.Replace(value, @"javascript|eval", "", RegexOptions.IgnoreCase);
                return value;
            }
    
            /// <summary> 
            /// 获取源代码 
            /// </summary> 
            public static string GetHtmlContent(string url, string encoding)
            {
                HttpWebRequest request = null;
                HttpWebResponse response = null;
                StreamReader reader = null;
                try
                {
                    request = (HttpWebRequest)WebRequest.Create(url);
                    request.Timeout = 10000;
                    request.AllowAutoRedirect = false;
                    response = (HttpWebResponse)request.GetResponse();
                    if (response.StatusCode == HttpStatusCode.OK)
                    {
                        if (response.ContentEncoding != null && StringHelper.IsEqualString(response.ContentEncoding,"gzip"))
                            reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), Encoding.GetEncoding(encoding));
                        else
                            reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(encoding));
                        string html = reader.ReadToEnd();
                        return html;
                    }
                }
                catch
                {
                }
                finally
                {
                    if (response != null)
                    {
                        response.Close();
                        response = null;
                    }
                    if (reader != null)
                        reader.Close();
                    if (request != null)
                        request = null;
                }
                return string.Empty;
            }
            /// <summary>
            /// 获取HTML网页的编码
            /// </summary>
    
            /// <returns></returns>
            public static string GetEncoding(string url)
            {
                string charset = string.Empty;
                HttpWebRequest request = null;
                HttpWebResponse response = null;
                StreamReader reader = null;
                try
                {
                    request = (HttpWebRequest)WebRequest.Create(url);
                    request.Timeout = 20000;
                    request.AllowAutoRedirect = false;
                    response = (HttpWebResponse)request.GetResponse();
                    if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
                    {
                        if (response.ContentEncoding != null && StringHelper.IsEqualString(response.ContentEncoding, "gzip"))
                            reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
                        else
                            reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII);
                        string html = reader.ReadToEnd();
                        Regex reg_charset = new Regex(@"charsets*=s*(?<charset>[^""]*)");
                        if (reg_charset.IsMatch(html))
                        {
                            return reg_charset.Match(html).Groups["charset"].Value;
                        }
                        else if (response.CharacterSet != string.Empty)
                        {
                            return response.CharacterSet;
                        }
                        else
                            return Encoding.Default.BodyName;
                    }
                }
                catch
                {
                }
                finally
                {
                    if (response != null)
                    {
                        response.Close();
                        response = null;
                    }
                    if (reader != null)
                        reader.Close();
                    if (request != null)
                        request = null;
                }
                return Encoding.Default.BodyName;
            }
        }
    }
  • 相关阅读:
    iOS NSString的常用用法
    有序数组在数据量较少时候的查找效率比较
    【转载】gdb基本命令总结
    从一个笔误引起的思考
    常见性能优化小技巧原理
    使用T-SQL进行活动目录查询
    你需要一条怎样的牛仔裤?
    #VSTS日志# 2015/12/10 – 终于可以删除工作项了
    #VSTS定制#全新的模版定制能力
    混合使用TFVC和GIT配置库的优化方案
  • 原文地址:https://www.cnblogs.com/password1/p/5870713.html
Copyright © 2020-2023  润新知