• .NET 去除一段文本中的HTML标记


     1  /// <summary>
     2     /// 去除一段文字中的HTML标记
     3     /// </summary>
     4     /// <param name="Htmlstring"></param>
     5     /// <returns></returns>
     6     public static string NoHTML(string Htmlstring)
     7     {
     8         //删除脚本 
     9         Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
    10         //删除HTML 
    11         Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
    12         Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
    13         Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
    14         Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
    15 
    16         Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
    17         Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
    18         Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
    19         Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
    20         Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", "   ", RegexOptions.IgnoreCase);
    21         Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
    22         Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
    23         Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
    24         Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
    25         Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
    26 
    27         Htmlstring.Replace("<", "");
    28         Htmlstring.Replace(">", "");
    29         Htmlstring.Replace("\r\n", "");
    30         Htmlstring = HttpContext.Current.Server.HtmlEncode(Htmlstring).Trim();
    31 
    32         return Htmlstring;
    33     }

     方法2:

     #region Function NoHTML
            public static string NoHTML(string html)
            {
                System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" no[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
                html = regex1.Replace(html, ""); //过滤<script></script>标记 
                html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性 
                html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件 
                html = regex4.Replace(html, ""); //过滤iframe 
                html = regex5.Replace(html, ""); //过滤frameset 
                html = regex6.Replace(html, ""); //过滤frameset 
                html = regex7.Replace(html, ""); //过滤frameset 
                html = regex8.Replace(html, ""); //过滤frameset 
                html = regex9.Replace(html, "");
                html = html.Replace(" ", "");
                html = html.Replace("</strong>", "");
                html = html.Replace("<strong>", "");
                return html;
            }
            #endregion
  • 相关阅读:
    keras 报错 ValueError: Tensor conversion requested dtype int32 for Tensor with dtype float32: 'Tensor("embedding_1/random_uniform:0", shape=(5001, 128), dtype=float32)'
    redis 安装启动及设置密码<windows>
    mysql配置主从数据库
    将已有的项目提交到GitHub
    Top 5 SSH Clients for Windows (Alternatives of PuTTY)
    jQuery 插件写法示例
    Spring 定时操作业务需求
    eclipse 修改js文件无法编译到项目中
    linux 目录结构图解
    MongoDB 概念解析
  • 原文地址:https://www.cnblogs.com/xdoudou/p/3029996.html
Copyright © 2020-2023  润新知