• 去掉HTML标记的各种方法


    using System;
    using System.Data;
    using System.Configuration;
    using System.Web;
    using System.Web.Security;
    using System.Web.UI;
    using System.Web.UI.WebControls;
    using System.Web.UI.WebControls.WebParts;
    using System.Web.UI.HtmlControls;
    using System.Text;
    using System.Text.RegularExpressions;

    /// <summary>
    /// Summary description for StringUtilily
    /// </summary>
    public class StringUtilily
    {
          /// <summary>
          /// 随机生成字符串源
          /// </summary>
          public const string RANDOM_STRING_SOURCE = "0123456789abcdefghijklmnopqrstuvwxyz";

          public StringUtilily()
          {
              //
              // TODO: Add constructor logic here
              //
          }
          /// <summary>
          /// 替换字符串
          /// </summary>
          /// <param name="src">要修改的字符串</param>
          /// <param name="pattern">要匹配的正则表达式模式</param>
          /// <param name="replacement">替换字符串</param>
          /// <returns>已修改的字符串</returns>
          public static string Replace(string src, string pattern, string replacement)
          {
              return Replace(src, pattern, replacement, RegexOptions.None);
          }

          /// <summary>
          /// 替换字符串,不区分大小写
          /// </summary>
          /// <param name="src">要修改的字符串</param>
          /// <param name="pattern">要匹配的正则表达式模式</param>
          /// <param name="replacement">替换字符串</param>
          /// <returns>已修改的字符串</returns>
          public static string ReplaceIgnoreCase(string src, string pattern, string replacement)
          {
              return Replace(src, pattern, replacement, RegexOptions.IgnoreCase);
          }

          /// <summary>
          /// 替换字符串
          /// </summary>
          /// <param name="src">要修改的字符串</param>
          /// <param name="pattern">要匹配的正则表达式模式</param>
          /// <param name="replacement">替换字符串</param>
          /// <param name="options">匹配模式</param>
          /// <returns>已修改的字符串</returns>
          public static string Replace(string src, string pattern, string replacement, RegexOptions options)
          {
              Regex regex = new Regex(pattern, options | RegexOptions.Compiled);

              return regex.Replace(src, replacement);
          }

          /// <summary>
          /// 删除字符串中指定的内容
          /// </summary>
          /// <param name="src">要修改的字符串</param>
          /// <param name="pattern">要删除的正则表达式模式</param>
          /// <returns>已删除指定内容的字符串</returns>
          public static string Drop(string src, string pattern)
          {
              return Replace(src, pattern, "");
          }

          /// <summary>
          /// 删除字符串中指定的内容,不区分大小写
          /// </summary>
          /// <param name="src">要修改的字符串</param>
          /// <param name="pattern">要删除的正则表达式模式</param>
          /// <returns>已删除指定内容的字符串</returns>
          public static string DropIgnoreCase(string src, string pattern)
          {
              return ReplaceIgnoreCase(src, pattern, "");
          }

          /// <summary>
          /// 替换字符串到数据库可输入模式
          /// </summary>
          /// <param name="src">待插入数据库的字符串</param>
          /// <returns>可插入数据库的字符串</returns>
          public static string ToSQL(string src)
          {
              if (src == null)
              {
                  return null;
              }
              return Replace(src, "'", "''");
          }

          /// <summary>
          /// 去掉html内容中的指定的html标签
          /// </summary>
          /// <param name="content">html内容</param>
          /// <param name="tagName">html标签</param>
          /// <returns>去掉标签的内容</returns>
          public static string DropHtmlTag(string content, string tagName)
          {
              //去掉<tagname>和</tagname>
              return DropIgnoreCase(content, "<[/]{0,1}" + tagName + "[^\\>]*\\>");
          }

          /// <summary>
          /// 去掉html内容中全部标签
          /// </summary>
          /// <param name="content">html内容</param>
          /// <returns>去掉html标签的内容</returns>
          public static string DropHtmlTag(string content)
          {
              //去掉<*>
              return Drop(content, "<[^\\>]*>");
          }

          /// <summary>
          /// 生成随机字符串
          /// </summary>
          /// <param name="num">字符串的位数</param>
          /// <returns>可插入数据库的字符串</returns>
          public static string GetRandomString(int num)
          {
              string rndStr = "";
              Random rnd = new Random();
              for (int i = 0; i < num; i++)
              {
                  rndStr += RANDOM_STRING_SOURCE.Substring(Convert.ToInt32(Math.Round(rnd.NextDouble() * 36, 0)), 1);
              }
              return rndStr;
          }
          /// <summary>
          /// 判断一个数据是不是数字
          /// </summary>
          /// <param name="inputData">字符串</param>
          /// <returns>结果</returns>
          public static bool IsNumeric(string inputData)
          {
              Regex _isNumber = new Regex(@"^\d+$");
              Match m = _isNumber.Match(inputData);
              return m.Success;
          }

          /// <summary>
          /// 转换html标签为web页可见内容
          /// </summary>
          /// <param name="src"></param>
          /// <returns></returns>
          public static string EscapeHtml(string src)
          {
              if (src == null)
              {
                  return null;
              }
              string s = src;
              s = Replace(s, ">", "&gt;");
              s = Replace(s, "<", "&lt;");
              return s;
          }

          /// <summary>
          /// 将字符串格式化成HTML代码
          /// </summary>
          /// <param name="str">要格式化的字符串</param>
          /// <returns>格式化后的字符串</returns>
          public static String ToHtml(string str)
          {
              if (str == null || str.Equals(""))
              {
                  return str;
              }

              StringBuilder sb = new StringBuilder(str);
              sb.Replace("&", "&amp;");
              sb.Replace("<", "&lt;");
              sb.Replace(">", "&gt;");
              sb.Replace("\r\n", "<br>");
              sb.Replace("\n", "<br>");
              sb.Replace("\t", " ");
              sb.Replace(" ", "&nbsp;");
              return sb.ToString();
          }


          /// <summary>
          /// 将HTML代码转化成文本格式
          /// </summary>
          /// <param name="str">要格式化的字符串</param>
          /// <returns>格式化后的字符串</returns>
          public static String ToTxt(String str)
          {
              if (str == null || str.Equals(""))
              {
                  return str;
              }

              StringBuilder sb = new StringBuilder(str);
              sb.Replace("&nbsp;", " ");
              sb.Replace("<br>", "\r\n");
              sb.Replace("&lt;", "<");
              sb.Replace("&gt;", ">");
              sb.Replace("&amp;", "&");
              return sb.ToString();

                 System.IO.StreamReader sr = new StreamReader("d:\\bb.html", System.Text.Encoding.UTF8);
                 string HtmlString = sr.ReadToEnd();
                 string Temp = HtmlString.Substring(HtmlString.IndexOf("<body>"));
                 string Stup1 = Regex.Replace(Temp, @"<script>[\s\S]*</script>", "");   //过滤Js代码;
                 string Stup2 = Regex.Replace(Stup1, @"<style[\s\S]*</style>", "");       //过滤Css样式
                 string Stup3 = Regex.Replace(Stup2, "<.+?>", "");                        //过滤html标签
                 string Stup4 = Regex.Replace(Stup3, @"\s", "");

             }
         }
    }

  • 相关阅读:
    在编码转错的情况下,如何恢复
    【娱乐】给你的电脑检查兼容性,并获取你的电脑上安装的软件
    发布一个纯PHP的中文关键字自动提取工具
    [转]程序员能力矩阵 Programmer Competency Matrix
    解决PHP数组内存耗用太多的问题
    哈希表之数学原理
    PHP高级编程之单线程实现并行抓取网页
    如何自动的检测字符串编码
    如何检测网络中断, 并自动重启网卡
    完全二叉树判断,简单而复杂
  • 原文地址:https://www.cnblogs.com/wuhuisheng/p/1778740.html
Copyright © 2020-2023  润新知