• HtmlEntities


    #region GetOnlyTextFromHtmlCode + RemoveHtmlChars + RemoveTagFromHtmlCode
            /// <summary>
            /// http://www.codeproject.com/script/Content/ViewAssociatedFile.aspx?rzp=%2FKB%2Fedit%2FZetaHtmlEditControl%2F%2FZetaHtmlEditControl-Source.zip&zep=Control%2FHtmlEditControl.cs&obid=43954&obtid=2&ovid=13
            /// </summary>
            /// <param name="htmlCode"></param>
            /// <returns></returns>
            private static string getOnlyTextFromHtmlCode(string htmlCode)
            {
                //<br>
                htmlCode = htmlCode.Replace("
    ", @" ");
                htmlCode = htmlCode.Replace("
    ", @" ");
                htmlCode = htmlCode.Replace("
    ", @" ");
    
                htmlCode = htmlCode.Replace(@"</p>", Environment.NewLine + Environment.NewLine);
                htmlCode = htmlCode.Replace(@"</P>", Environment.NewLine + Environment.NewLine);
    
                //html comment 
                htmlCode = Regex.Replace(
                    htmlCode,
                    @"<!--.*?-->",
                    string.Empty,
                    RegexOptions.Singleline | RegexOptions.IgnoreCase);
    
                //<p>
                htmlCode = Regex.Replace(htmlCode,
                    @"<br[^>]*>",
                    Environment.NewLine,
                    RegexOptions.Singleline | RegexOptions.IgnoreCase);
    
                //tags
                htmlCode = removeTagFromHtmlCode(@"style", htmlCode);
                htmlCode = removeTagFromHtmlCode(@"script", htmlCode);
    
                //html
                htmlCode = Regex.Replace(
                    htmlCode,
                    "<(.|
    )+?>",
                    string.Empty,
                    RegexOptions.Singleline | RegexOptions.IgnoreCase);
    
                //umlaute
                htmlCode = unescapeHtmlEntities(htmlCode);
    
                //whitespaces
                htmlCode = Regex.Replace(
                    htmlCode,
                    @" +",
                    @" ",
                    RegexOptions.Singleline | RegexOptions.IgnoreCase);
    
                return htmlCode;
            }
            /// <summary>
            /// http://dev.w3.org/html5/html-author/charref
            /// </summary>
            /// <param name="htmlCode"></param>
            /// <returns></returns>
            private static string unescapeHtmlEntities(string htmlCode)
            {

          htmlCode = htmlCode.Replace(@"&nbsp;", @" ");

          htmlCode = htmlCode.Replace(@"&Auml;", @"ä");
          htmlCode = htmlCode.Replace(@"&absp;", @"");
          htmlCode = htmlCode.Replace(@"&obsp;", @"");
          htmlCode = htmlCode.Replace(@"&Obsp;", @"");
          htmlCode = htmlCode.Replace(@"&ubsp;", @"");
          htmlCode = htmlCode.Replace(@"&Ubsp;", @"");
          htmlCode = htmlCode.Replace(@"&szlig;", @"ß");

          htmlCode = htmlCode.Replace(@"&pound;", @"£");
          htmlCode = htmlCode.Replace(@"&sect;", @"§");
          htmlCode = htmlCode.Replace(@"&copy;", @"©");
          htmlCode = htmlCode.Replace(@"&reg;", @"®");
          htmlCode = htmlCode.Replace(@"&micro;", @"µ");
          htmlCode = htmlCode.Replace(@"&para;", @"¶");
          htmlCode = htmlCode.Replace(@"&Oslash;", @"Ø");
          htmlCode = htmlCode.Replace(@"&oslash;", @"Ø");
          htmlCode = htmlCode.Replace(@"&divide;", @"÷");
          htmlCode = htmlCode.Replace(@"&times;", @"×");

                return htmlCode;
            }
    
            private static string removeTagFromHtmlCode(
                string tag,
                string htmlCode)
            {
                return Regex.Replace(
                    htmlCode,
                    string.Format(@"<{0}.*?</{1}>", tag, tag),
                    string.Empty,
                    RegexOptions.Singleline | RegexOptions.IgnoreCase);
            }
            #endregion
    

      

  • 相关阅读:
    【转载】Fiddler 抓包工具使用指北: 弱网络环境模拟限速测试流程
    【原创】python+selenium,用xlrd,读取excel数据,执行测试用例
    自动化测试常用断言的使用方法(python+selenium)
    selenium中的等待方法及区别
    python利用unittest进行测试用例执行的几种方式
    使用uiautomator做UI测试
    Python+Appium学习之启动手机APP或者浏览器
    查看Android应用包名、Activity的几个方法
    JavaWeb前置知识(一) : 动态和静态的区别、两种架构、常见状态码
    随笔分类
  • 原文地址:https://www.cnblogs.com/geovindu/p/4310328.html
Copyright © 2020-2023  润新知