• 取html里的img和去html标签


    C#  :

    public string RemoveHTML(string html)
    {
    html = Regex.Replace(html, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"([ ])[s]+", "", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"-->", "", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"<!--.*", "", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"&(quot|#34);", """, RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"&(iexcl|#161);", "xa1", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"&(cent|#162);", "xa2", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"&(pound|#163);", "xa3", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"&(copy|#169);", "xa9", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"&#(d+);", "", RegexOptions.IgnoreCase);
    html = Regex.Replace(html, @"<img[^>]*>;", "", RegexOptions.IgnoreCase);
    html.Replace("<", "");
    html.Replace(">", "");
    html.Replace(" ", "");
    return html;
    }

    public static string[] GetHtmlImageUrlList(string sHtmlText)
    {
    // 定义正则表达式用来匹配 img 标签
    Regex regImg = new Regex(@"<img[^<>]*?src[s ]*=[s ]*[""']?[s ]*(?<imgUrl>[^s ""'<>]*)[^<>]*?/?[s ]*>", RegexOptions.IgnoreCase);

    // 搜索匹配的字符串
    MatchCollection matches = regImg.Matches(sHtmlText);
    int i = 0;
    string[] sUrlList = new string[matches.Count];

    // 取得匹配项列表
    foreach (Match match in matches)
    sUrlList[i++] = match.Groups["imgUrl"].Value;
    return sUrlList;
    }

    js:

    function getimgsrc(htmlstr) {
    var reg = /<img.+?src=('|")?([^'"]+)('|")?(?:s+|>)/gim;
    var arr = []; while (tem = reg.exec(htmlstr)) { arr.push(tem[2]); }
    return arr;
    }
    function removeHTMLTag(str) {
    str = str.replace(/</?[^>]*>/g, ''); //去除HTML tag
    str = str.replace(/[ | ]* /g, ' '); //去除行尾空白
    //str = str.replace(/ [s| | ]* /g,' '); //去除多余空行
    str = str.replace(/&nbsp;/ig, ''); //去掉&nbsp;
    return str;
    }

  • 相关阅读:
    kubernetes案例 tomcat+mysql
    elasticsearch+logstash+kibana部署
    elasticsearch集群部署以及head插件安装
    Rhel7.4系统部署cobbler
    部署Hadoop2.0高性能集群
    使用haproxy实现负载均衡集群
    nginx实现动静分离的负载均衡集群
    heartrbeat实现web服务器高可用
    keepalived+lvs
    LVS集群之IP TUN模式以及网站压力测试
  • 原文地址:https://www.cnblogs.com/codeloves/p/3461539.html
Copyright © 2020-2023  润新知