• 关于getHTML()方法和getHtmlAjax()方法 GetHttpLength, 清除HTML标签


    public string getHtml(string Url, string type = "UTF-8")
    {
    try
    {
    System.Net.WebRequest wReq = System.Net.WebRequest.Create(Url);
    System.Net.WebResponse wResp = wReq.GetResponse();
    System.IO.Stream respStream = wResp.GetResponseStream();
    using (System.IO.StreamReader reader = new System.IO.StreamReader(respStream, Encoding.GetEncoding(type)))
    {
    return reader.ReadToEnd().Replace(" ", "").Replace(" ", "");
    }
    }
    catch (System.Exception ex)
    {
    }
    return "";
    }

    //清除HTML标签

    public String ClearHtml(String str)
    {
    return (String.IsNullOrEmpty(str)) ? String.Empty : System.Text.RegularExpressions.Regex.Replace(str, @"<[^>]*>", String.Empty, System.Text.RegularExpressions.RegexOptions.IgnoreCase);
    }

    public string getHtmlAjax(string Url, string type = "UTF-8")
    {
    wb = new WebBrowser();
    wb.Navigate(Url);
    while (wb.ReadyState != WebBrowserReadyState.Complete)
    {
    Application.DoEvents();
    }

    System.Timers.Timer timer = new System.Timers.Timer();
    var isComplete = false;
    timer.Elapsed += new System.Timers.ElapsedEventHandler((sender, e) =>
    {
    //加载完毕
    isComplete = true;
    timer.Stop();
    });
    timer.Interval = 1000 * 5;
    timer.Start();
    while (!isComplete)
    Application.DoEvents();
    var htmldocument = (mshtml.HTMLDocument)wb.Document.DomDocument;
    string Content = htmldocument.documentElement.outerHTML.Replace(" ", "").Replace(" ", "");
    Regex reg = new Regex("<DIV class="panelContentWrap".*?下一页</A>");
    Content = reg.Match(Content).Value.ToString();
    reg = new Regex("<TABLE class="ID_table stocks-info-table".*?下一页</A>");
    Content = reg.Match(Content).Value.ToString();
    return Content;
    }

    public long GetHttpLength(string url)
    {
    var length = 0l;
    try
    {
    var req = (HttpWebRequest)WebRequest.CreateDefault(new Uri(url));
    req.Method = "HEAD";
    req.Timeout = 5000;
    var res = (HttpWebResponse)req.GetResponse();
    if (res.StatusCode == HttpStatusCode.OK)
    {
    length = res.ContentLength;
    }

    res.Close();
    return length;
    }
    catch (WebException wex)
    {
    return 0;
    }
    }

  • 相关阅读:
    dada的GCD
    涛神的城堡
    手机信号
    涛涛的Party
    壮壮的数组
    不安全字符串
    gdb core 调试多线程
    makefile $@, $^, $<, $? 表示的意义
    KMP算法的next[]数组通俗解释
    【原创】支持同时生成多个main函数 makefile 模板
  • 原文地址:https://www.cnblogs.com/it1042290135/p/5564367.html
Copyright © 2020-2023  润新知