• html转换为纯文本,支持撇号


     /// <summary>
            /// html转换为纯文本
            /// </summary>
            /// <param name="source"></param>
            /// <returns></returns>
            private static string HtmlToPlainText(string source)
            {
                string result;
    
                //remove line breaks,tabs
                result = source.Replace("
    ", " ");
                result = result.Replace("
    ", " ");
                result = result.Replace("	", " ");
    
                //remove the header
                result = Regex.Replace(result, "(<head>).*(</head>)", string.Empty, RegexOptions.IgnoreCase);
    
                result = Regex.Replace(result, @"<( )*script([^>])*>", "<script>", RegexOptions.IgnoreCase);
                result = Regex.Replace(result, @"(<script>).*(</script>)", string.Empty, RegexOptions.IgnoreCase);
    
                //remove all styles
                result = Regex.Replace(result, @"<( )*style([^>])*>", "<style>", RegexOptions.IgnoreCase); //clearing attributes
                result = Regex.Replace(result, "(<style>).*(</style>)", string.Empty, RegexOptions.IgnoreCase);
    
                //insert tabs in spaces of <td> tags
                result = Regex.Replace(result, @"<( )*td([^>])*>", " ", RegexOptions.IgnoreCase);
    
                //insert line breaks in places of <br> and <li> tags
                result = Regex.Replace(result, @"<( )*br( )*>", "
    ", RegexOptions.IgnoreCase);
                result = Regex.Replace(result, @"<( )*li( )*>", "
    ", RegexOptions.IgnoreCase);
    
                //insert line paragraphs in places of <tr> and <p> tags
                result = Regex.Replace(result, @"<( )*tr([^>])*>", "
    
    ", RegexOptions.IgnoreCase);
                result = Regex.Replace(result, @"<( )*p([^>])*>", "
    
    ", RegexOptions.IgnoreCase);
    
                //remove anything thats enclosed inside < >
                result = Regex.Replace(result, @"<[^>]*>", string.Empty, RegexOptions.IgnoreCase);
    
                //replace special characters:
                result = Regex.Replace(result, @"&amp;", "&", RegexOptions.IgnoreCase);
                result = Regex.Replace(result, @"&nbsp;", " ", RegexOptions.IgnoreCase);
                result = Regex.Replace(result, @"&lt;", "<", RegexOptions.IgnoreCase);
                result = Regex.Replace(result, @"&gt;", ">", RegexOptions.IgnoreCase);
                result = Regex.Replace(result, @"&#39;", "'", RegexOptions.IgnoreCase);
                result = Regex.Replace(result, @"&(.{2,6});", string.Empty, RegexOptions.IgnoreCase);
    
                //remove extra line breaks and tabs
                result = Regex.Replace(result, @" ( )+", " ");
                result = Regex.Replace(result, "(
    )( )+(
    )", "
    
    ");
                result = Regex.Replace(result, @"(
    
    )+", "
    ");
    
                return result;
            }
  • 相关阅读:
    使用intellij idea搭建spring-springmvc-mybatis整合框架环境
    lij IDEA项目包分层结构显示设置
    SpringMVC---applicationContext.xml
    SpringMVC 常用applicationContext.xml、web.xml、servlet-mvc.xml简单配置
    java web,从零开始,一步一步配置ssm(Spring+SpringMVC+MyBatis)框架
    Spring+SpringMVC+MyBatis+easyUI整合进阶篇(二)RESTful API实战笔记(接口设计及Java后端实现)
    SSM后台管理系统(Spring SpringMVC Mybatis Mysql EasyUI)
    SSM 搭建精美实用的管理系统
    SSM搭建一个后台管理系统
    mac下的夜神模拟器链接vscode
  • 原文地址:https://www.cnblogs.com/haorui/p/4228490.html
Copyright © 2020-2023  润新知