• 【.net】获取网页CDM的下载链接的地址


    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Runtime.InteropServices;
    using System.Text;
    using System.Threading.Tasks;
    using System.Windows.Forms;
    using Framework.Core.Crawl;
    using HtmlAgilityPack;
    
    namespace WebCaptureSolution
    {
        static class Program
        {
            /// <summary>
            /// 应用程序的主入口点。
            [DllImport("urlmon.dll", CharSet = CharSet.Ansi)]
            private static extern int UrlMkSetSessionOption(int dwOption, string pBuffer, int dwBufferLength, int dwReserved);
    
            const int URLMON_OPTION_USERAGENT = 0x10000001;
    
            const string SPUserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36";
            [STAThread]
    
            static void Main(string[] args)
            {
                Application.EnableVisualStyles();
                Application.SetCompatibleTextRenderingDefault(false);
                string log_url = "http://www.handsupowo.pl/member.php?action=login";
    
                string url1 = "http://www.handsupowo.pl/archive/index.php?forum-13.html";
                List<string> aList = new List<string>();
                List<string> lastinfo = new List<string>();
    
                var form = new WebCapture();
    
                form.DocumentOK = false;
                #region ログイン
                if (!string.IsNullOrEmpty(SPUserAgent))
                {
                    UrlMkSetSessionOption(URLMON_OPTION_USERAGENT, SPUserAgent, SPUserAgent.Length, 0);
                }
                form.Navigate(log_url);
                while (!form.DocumentOK)
                {
                    Application.DoEvents();
                }
    
                //step 1 login
                var s = form.WebBrowser.Document.GetElementById("content");
                var input = s.GetElementsByTagName("input");
    
                for (int i = 0; i < input.Count; i++)
                {
                    var p = input[i];
                    if (p.OuterHtml.Contains("username"))
                    {
                        p.SetAttribute("value", "id");
                    }
                    else if (p.OuterHtml.Contains("pass"))
                    {
                        p.SetAttribute("value", "password");
                    }
                    else if (p.OuterHtml.Contains("submit"))
                    {
                        p.InvokeMember("Click");
                        break;
    
                    }
    
                }
                var startTime = System.DateTime.Now;
                while ((System.DateTime.Now - startTime).TotalSeconds <= 10)
                {
                    Application.DoEvents();
                }
                // System.IO.File.WriteAllText(form.SavePath, form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml, System.Text.Encoding.UTF8);
                // step 2
                #endregion
                form.DocumentOK = false;
    
                form.Navigate(url1);
                while (!form.DocumentOK)
                {
                    Application.DoEvents();
                }
                HtmlAgilityPack.HtmlDocument htmldoc = new HtmlAgilityPack.HtmlDocument();
                htmldoc.LoadHtml(form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml);
                //  div[contains(@class,'ads-creative')]
                var anodes = htmldoc.DocumentNode.SelectNodes("//div[@class='threadlist']//a");
                foreach (var tn in anodes)
                {
                    aList.Add(tn.Attributes["href"].Value);
                }
                // 循环访问a
                foreach (var cdmurl in aList)
                {
                    form.DocumentOK = false;
    
                    form.Navigate(cdmurl);
                    while (!form.DocumentOK)
                    {
                        Application.DoEvents();
                    }
                    startTime = System.DateTime.Now;
                    while ((System.DateTime.Now - startTime).TotalSeconds <= 5)
                    {
                        Application.DoEvents();
                    }
                    htmldoc.LoadHtml(form.WebBrowser.Document.GetElementsByTagName("html")[0].OuterHtml);
                    var downloadurl = htmldoc.DocumentNode.SelectNodes("//a[@rel='nofollow']");
                    var info = htmldoc.DocumentNode.SelectSingleNode("//div[@id='fullversion']//a");
                    if (downloadurl == null)
                    {
                        downloadurl = htmldoc.DocumentNode.SelectNodes("//a[@target='_blank']");
    
                    }
                    List<string> dllist = new List<string>();
                    if (downloadurl.Count == 1)
                    {
                        dllist.Add(info.InnerText);
                        dllist.Add(downloadurl[0].Attributes["href"].Value);
                    }
                    else
                    {
                        foreach (var dl in downloadurl)
                        {
                            if (dllist.Count == 0)
                            {
                                dllist.Add(info.InnerText);
    
                            }
                            else
                            {
                                dllist.Add(dl.Attributes["href"].Value);
                            }
    
    
                        }
    
                    }
    
                    lastinfo.Add(string.Join(Environment.NewLine, dllist.ToArray()));
                    lastinfo.Add(Environment.NewLine);
    
    
                }
    
    
                System.IO.File.WriteAllLines(@"D:NodejsmyjsDownLoadUrl.txt", lastinfo.ToArray(), Encoding.UTF8);
    
    
    
            }
        }
    }
    

      

  • 相关阅读:
    CSUOJ 2192: Wells弹键盘 (dp)
    Gym101981I Magic Potion(最大流)
    BZOJ2588: Count on a tree 主席树
    C#添加多个Sheet表
    DataTable排序,检索,合并
    检索 COM 类工厂中 CLSID 为 {} 的组件时失败,原因是出现以下错误: 80070005
    C#获取URL参数值
    JQuery爱好者们的福音 jQuery EasyUI 开源插件套装 完全替代ExtJS
    12个ajax弹出层效果
    CSS完美兼容IE6/IE7/FF的通用方法
  • 原文地址:https://www.cnblogs.com/c-x-a/p/6400796.html
Copyright © 2020-2023  润新知