• HttpWebRequest抓数据遇到的问题


    1、有些网站访问速度慢,而且这个网站的连接数(比如全球内衣,另外对于女生各种什么内衣不懂的也可以上去查看了解哈),因为没有即时的关闭,造成抓取页面数据的时候超时也严重。

      解决:把相应的HttpWebResponse.Close(),   HttpWebRequest.Abort();  以及HttpWebRequest.KeepAlive=false,还有吧超时时间设置长一点, 之后连接超时的几率就贬低了。还有直接c盘的host文件的域名直接指向某个IP,减少去dns服务器查找的时间

    2、抓中国供应商的时候开了多线程跑的太快,几十条就出现拉动类的验证码。

    解决:使用代理或者移动的宽带,去拨号

    RASDisplay ras = new RASDisplay();
    ras.Disconnect();//断开连接
    ras.Connect("ADSL");//重新拨号

    //因为拨号不会马上连接成功,需要时间
    Thread.Sleep(5000);

    下面是封装请求的类库

    public static string getRequest(string url, string charset = "utf-8")
    {
    HttpWebRequest myreq = null;
    HttpWebResponse myres = null;
    StreamReader reader = null;
    Stream stream = null;
    string result = "";
    string code = charset; //charset.ToLower()
    //code = "utf-8";
    try
    {
    myreq = (HttpWebRequest)WebRequest.Create(url);
    myreq.Timeout = 20000;
    myreq.Method = "GET";
    myreq.KeepAlive = false;
    myreq.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";
    //myreq.UserAgent = "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)";
    myreq.Headers.Add("content", "text/html; charset=" + code);
    //myreq.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727)";
    myreq.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36";

    myreq.KeepAlive = true;
    myres = (HttpWebResponse)myreq.GetResponse();
    stream = myres.GetResponseStream();
    reader = new StreamReader(stream, System.Text.Encoding.GetEncoding(code));
    result = reader.ReadToEnd();

    reader.Close();
    reader.Dispose();

    stream.Close();
    stream.Dispose();
    }
    catch
    { }
    finally
    {
    if (myreq != null)
    {
    myres.Close();
    }
    if (myreq != null)
    {
    myreq.Abort();
    }

    }

    return result;
    }

    #region 自动拨号
    /* 自动拨号
    * 1、右击“网上邻居”--属性;
    2、选择“宽带连接”,右击“属性”- >“选项”;
    3、把“提示名称、密码和证书等”前面的对号去掉,点“确定”退出;

    4、生成模式务必改成x86
    */
    public struct RASCONN
    {
    public int dwSize;
    public IntPtr hrasconn;
    [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 257)]
    public string szEntryName;
    [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 17)]
    public string szDeviceType;
    [MarshalAs(UnmanagedType.ByValTStr, SizeConst = 129)]
    public string szDeviceName;
    }

    [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Auto)]
    public struct RasStats
    {
    public int dwSize;
    public int dwBytesXmited;
    public int dwBytesRcved;
    public int dwFramesXmited;
    public int dwFramesRcved;
    public int dwCrcErr;
    public int dwTimeoutErr;
    public int dwAlignmentErr;
    public int dwHardwareOverrunErr;
    public int dwFramingErr;
    public int dwBufferOverrunErr;
    public int dwCompressionRatioIn;
    public int dwCompressionRatioOut;
    public int dwBps;
    public int dwConnectionDuration;
    }

    [StructLayout(LayoutKind.Sequential, CharSet = CharSet.Auto)]
    public struct RasEntryName
    {
    public int dwSize;
    //[MarshalAs(UnmanagedType.ByValTStr,SizeConst=(int)RasFieldSizeConstants.RAS_MaxEntryName + 1)]
    public string szEntryName;
    //#if WINVER5
    // public int dwFlags;
    // [MarshalAs(UnmanagedType.ByValTStr,SizeConst=260+1)]
    // public string szPhonebookPath;
    //#endif
    }
    public class RAS
    {
    [DllImport("Rasapi32.dll", EntryPoint = "RasEnumConnectionsA",
    SetLastError = true)]

    internal static extern int RasEnumConnections
    (
    ref RASCONN lprasconn, // buffer to receive connections data
    ref int lpcb, // size in bytes of buffer
    ref int lpcConnections // number of connections written to buffer
    );


    [DllImport("rasapi32.dll", CharSet = CharSet.Auto)]
    internal static extern uint RasGetConnectionStatistics(
    IntPtr hRasConn, // handle to the connection
    [In, Out]RasStats lpStatistics // buffer to receive statistics
    );
    [DllImport("rasapi32.dll", CharSet = CharSet.Auto)]
    public extern static uint RasHangUp(
    IntPtr hrasconn // handle to the RAS connection to hang up
    );

    [DllImport("rasapi32.dll", CharSet = CharSet.Auto)]
    public extern static uint RasEnumEntries(
    string reserved, // reserved, must be NULL
    string lpszPhonebook, // pointer to full path and
    // file name of phone-book file
    [In, Out]RasEntryName[] lprasentryname, // buffer to receive
    // phone-book entries
    ref int lpcb, // size in bytes of buffer
    out int lpcEntries // number of entries written
    // to buffer
    );

    [DllImport("wininet.dll", CharSet = CharSet.Auto)]
    public extern static int InternetDial(
    IntPtr hwnd,
    [In]string lpszConnectoid,
    uint dwFlags,
    ref int lpdwConnection,
    uint dwReserved
    );

    public RAS()
    {
    }
    }
    public enum DEL_CACHE_TYPE //要删除的类型。
    {
    File,//表示internet临时文件
    Cookie //表示Cookie
    }

    public class RASDisplay
    {
    [DllImport("wininet.dll", CharSet = CharSet.Auto)]
    public static extern bool DeleteUrlCacheEntry(
    DEL_CACHE_TYPE type
    );
    private string m_duration;
    private string m_ConnectionName;
    private string[] m_ConnectionNames;
    private double m_TX;
    private double m_RX;
    private bool m_connected;
    private IntPtr m_ConnectedRasHandle;

    RasStats status = new RasStats();
    public RASDisplay()
    {
    m_connected = true;

    RAS lpras = new RAS();
    RASCONN lprasConn = new RASCONN();

    lprasConn.dwSize = Marshal.SizeOf(typeof(RASCONN));
    lprasConn.hrasconn = IntPtr.Zero;

    int lpcb = 0;
    int lpcConnections = 0;
    int nRet = 0;
    lpcb = Marshal.SizeOf(typeof(RASCONN));

    nRet = RAS.RasEnumConnections(ref lprasConn, ref lpcb, ref
    lpcConnections);

    if (nRet != 0)
    {
    m_connected = false;
    return;

    }

    if (lpcConnections > 0)
    {
    //for (int i = 0; i < lpcConnections; i++)

    //{
    RasStats stats = new RasStats();

    m_ConnectedRasHandle = lprasConn.hrasconn;
    RAS.RasGetConnectionStatistics(lprasConn.hrasconn, stats);


    m_ConnectionName = lprasConn.szEntryName;

    int Hours = 0;
    int Minutes = 0;
    int Seconds = 0;

    Hours = ((stats.dwConnectionDuration / 1000) / 3600);
    Minutes = ((stats.dwConnectionDuration / 1000) / 60) - (Hours * 60);
    Seconds = ((stats.dwConnectionDuration / 1000)) - (Minutes * 60) - (Hours * 3600);


    m_duration = Hours + " hours " + Minutes + " minutes " + Seconds + " secs";
    m_TX = stats.dwBytesXmited;
    m_RX = stats.dwBytesRcved;
    //}
    }
    else
    {
    m_connected = false;
    }


    int lpNames = 1;
    int entryNameSize = 0;
    int lpSize = 0;
    RasEntryName[] names = null;

    entryNameSize = Marshal.SizeOf(typeof(RasEntryName));
    lpSize = lpNames * entryNameSize;

    names = new RasEntryName[lpNames];
    names[0].dwSize = entryNameSize;

    uint retval = RAS.RasEnumEntries(null, null, names, ref lpSize, out lpNames);

    //if we have more than one connection, we need to do it again
    if (lpNames > 1)
    {
    names = new RasEntryName[lpNames];
    for (int i = 0; i < names.Length; i++)
    {
    names[i].dwSize = entryNameSize;
    }

    retval = RAS.RasEnumEntries(null, null, names, ref lpSize, out lpNames);

    }
    m_ConnectionNames = new string[names.Length];


    if (lpNames > 0)
    {
    for (int i = 0; i < names.Length; i++)
    {
    m_ConnectionNames[i] = names[i].szEntryName;
    }
    }
    }

    public string Duration
    {
    get
    {
    return m_connected ? m_duration : "";
    }
    }

    public string[] Connections
    {
    get
    {
    return m_ConnectionNames;
    }
    }

    public double BytesTransmitted
    {
    get
    {
    return m_connected ? m_TX : 0;
    }
    }
    public double BytesReceived
    {
    get
    {
    return m_connected ? m_RX : 0;

    }
    }
    public string ConnectionName
    {
    get
    {
    return m_connected ? m_ConnectionName : "";
    }
    }
    public bool IsConnected
    {
    get
    {
    return m_connected;
    }
    }

    public int Connect(string Connection)
    {
    int temp = 0;
    uint INTERNET_AUTO_DIAL_UNATTENDED = 2;
    int retVal = RAS.InternetDial(IntPtr.Zero, Connection, INTERNET_AUTO_DIAL_UNATTENDED, ref temp, 0);
    return retVal;
    }
    public void Disconnect()
    {
    RAS.RasHangUp(m_ConnectedRasHandle);
    }
    }

    endregion 自动拨号

  • 相关阅读:
    LAMP 环境搭建
    环境搭建/安装部署
    Linux 系统安装(5分钟)
    Eclipse 配置
    wsdl生成的客户端
    Oracle 修改字符集
    jquery expand
    js中不常用的对象或方法
    js中面向对象的封装
    JavaScript点击li显示索引
  • 原文地址:https://www.cnblogs.com/zhian/p/5810048.html
Copyright © 2020-2023  润新知