• 过滤HTML标签


    View Code
     1   /// <summary>
     2     /// 过滤字符
     3     /// </summary>
     4     /// <param name="html"></param>
     5     /// <returns></returns>
     6     public string checkStr(string html)
     7     {
     8         System.Text.RegularExpressions.Regex regex1 = new System.Text.RegularExpressions.Regex(@"<script[\s\S]+</script *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
     9         System.Text.RegularExpressions.Regex regex2 = new System.Text.RegularExpressions.Regex(@" href *= *[\s\S]*script *:", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    10         System.Text.RegularExpressions.Regex regex3 = new System.Text.RegularExpressions.Regex(@" no[\s\S]*=", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    11         System.Text.RegularExpressions.Regex regex4 = new System.Text.RegularExpressions.Regex(@"<iframe[\s\S]+</iframe *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    12         System.Text.RegularExpressions.Regex regex5 = new System.Text.RegularExpressions.Regex(@"<frameset[\s\S]+</frameset *>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    13         System.Text.RegularExpressions.Regex regex6 = new System.Text.RegularExpressions.Regex(@"\<img[^\>]+\>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    14         System.Text.RegularExpressions.Regex regex7 = new System.Text.RegularExpressions.Regex(@"</p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    15         System.Text.RegularExpressions.Regex regex8 = new System.Text.RegularExpressions.Regex(@"<p>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    16         System.Text.RegularExpressions.Regex regex9 = new System.Text.RegularExpressions.Regex(@"<[^>]*>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    17          
    18         // 过滤 <!--  -->
    19         System.Text.RegularExpressions.Regex regex10 = new System.Text.RegularExpressions.Regex(@"<!--[\s\S]*?-->", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    20          // 过滤 <script>...</script>
    21         System.Text.RegularExpressions.Regex regex11 = new System.Text.RegularExpressions.Regex(@"<[\s]*?script[^>]*?>[\s\S]*?<[\s]*?\/[\s]*?script[\s]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    22         // 过滤 <style>...</style>
    23         System.Text.RegularExpressions.Regex regex12 = new System.Text.RegularExpressions.Regex(@"<[\s]*?style[^>]*?>[\s\S]*?<[\s]*?\/[\s]*?style[\s]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    24         // 过滤事作 匹配如:<img onclick="if(this.width>1000) return false;"/>
    25         System.Text.RegularExpressions.Regex regex13 = new System.Text.RegularExpressions.Regex(@"<[^>]*?([^>]*?[\s]on[\w]+[\s]*?=[\s]*?([""']?)([^\2]+?)\2)+[^>]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    26         // 过滤HTML标签
    27         System.Text.RegularExpressions.Regex regex14 = new System.Text.RegularExpressions.Regex(@"<[\s\S]*?>", System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    28        
    29         html = regex1.Replace(html, ""); //过滤<script></script>标记
    30         html = regex2.Replace(html, ""); //过滤href=javascript: (<A>) 属性
    31         html = regex3.Replace(html, " _disibledevent="); //过滤其它控件的on...事件
    32         html = regex4.Replace(html, ""); //过滤iframe
    33         html = regex5.Replace(html, ""); //过滤frameset
    34         html = regex6.Replace(html, ""); //过滤frameset
    35         html = regex7.Replace(html, ""); //过滤frameset
    36         html = regex8.Replace(html, ""); //过滤frameset
    37         html = regex9.Replace(html, "");
    38 
    39         html = regex10.Replace(html, "");
    40         html = regex11.Replace(html, "");
    41         html = regex12.Replace(html, "");
    42         html = regex13.Replace(html, "");
    43         html = regex14.Replace(html, "");
    44         
    45         html = html.Replace(" ", "");
    46         html = html.Replace("<>", "");
    47         html = html.Replace("<strong>", "");
    48         html = html.Replace("&lt;br&gt;", "");
    49         html = html.Replace("\r", "");
    50         html = html.Replace("\n", "");
    51         html = html.Replace("&lt;p&gt;", "");
    52         html = html.Replace("&lt;/p&gt;", "");
    53         html = html.Replace("&lt;BR&gt", "");
    54         html = html.Replace("&amp;", "");
    55         html = html.Replace("&lt;P&gt;", "");
    56         html = html.Replace("&lt;/P&gt;&lt;P&gt;&lt;/P&gt;", "");
    57         html = html.Replace("&lt;/P&gt;&lt;/P&gt;", "");
    58         html = html.Replace("/Div;", "");
    59         html = html.Replace("Div;", "");
    60         html = html.Replace("/div;", "");
    61         html = html.Replace("div;", "");
    62         html = html.Replace("nbsp;", "");
    63         html = html.Replace("style=&quot;LINE-HEIGHT: 22px;", "");
    64         html = html.Replace("class=p1", "");
    65         
    66         return html;
    67     }
  • 相关阅读:
    Django-ORM和MySQL事务及三大范式介绍
    django-视图层与模板层
    django初步--+urls解析
    django前戏
    python web开发中跨域问题的解决思路
    MySQL显示ERROR 2003 (HY000): Can't connect to MySQL server on 'localhost' (10061)解决方法
    xpath
    HTTP请求方法
    JavaScript 对象
    JavaScript 关键字
  • 原文地址:https://www.cnblogs.com/lihuijun/p/2970836.html
Copyright © 2020-2023  润新知