• 某查查企业爬虫(模拟人工验证码)


    1、.net core 3.1 C#  selenium 

    //爬取所有省份、城市、区县
    public override async Task WriteAreaToFileAsync(string configPath, string directory)
            {
                const string BaseProvinceLink = "https://www.qcc.com/search?key={keyword}#industrycode:K&";
                const string BaseCityLink = "https://www.qcc.com/search_getCityListHtml?province={0}";
                const string BaseCountyLink = "https://www.qcc.com/search_getCountyListHtml?city={0}";
                List<string> provinces = await GetCodeAsync(new Uri(BaseProvinceLink), ".sfilter-tag.clearfix.provinceChoose dd a");
                const string baseText = "province:{0}&city:{1}&county:{2}&";
                List<string> list = new List<string>();
                foreach (var province in provinces)
                {
                    StringBuilder.Clear();
                    Uri provinceUri = new Uri(StringBuilder.AppendFormat(BaseCityLink, province).ToString());
                    var cities = await GetCodeAsync(provinceUri, "dd a");
                    foreach (var city in cities)
                    {
                        StringBuilder.Clear();
                        Uri cityUri = new Uri(StringBuilder.AppendFormat(BaseCountyLink, city).ToString());
                        var counties = await GetCodeAsync(cityUri, "dd a");
                        foreach (var county in counties)
                        {
                            StringBuilder.Clear();
                            StringBuilder.Append(BaseProvinceLink);
                            string area = StringBuilder.AppendFormat(baseText, province, city, county).
                                Replace("search", "search_index").Replace("中介#", "中介&ajaxflag=1&")
                                .Replace(":industrycode", "=industrycode").ToString();
                            list.Add(area);
                        }
                    }
                }
                await File.WriteAllLinesAsync("企查查.txt", list);
            }
    
    //分页爬取企业信息
            private async Task<bool> GetAgentsAsync(Uri cityUri)
            {
                LogHelper.Info(cityUri.ToString());
                var pageSource = await HttpClient.GetStringAsync(cityUri);
                while (!pageSource.Contains("查企业"))
                {
                    if (pageSource.StartsWith("<script>window.location"))
                    {
                        VertifyCode(new Uri(pageSource.Split("'")[1]));
                        pageSource = await HttpClient.GetStringAsync(cityUri);
                    }
                    else if (pageSource.Contains("小查还没找到数据"))
                    {
                        return false;
                    }
                }
                var block = JumonyParser.Parse(pageSource).Find(".m_srchList tbody tr td:nth-child(3)");
                foreach (var item in block)
                {
                    await VertifyAsync(item.InnerHtml());
                }
                if (block.Count() < PageSize)
                {
                    return false;
                }
                return true;
            }
    

    2、结果截图

    3、需要开通vip账号

    4、过滑动验证码

  • 相关阅读:
    奇数阶魔方问题
    《DSP using MATLAB》示例9.3
    《DSP using MATLAB》示例9.2
    《DSP using MATLAB》示例9.1
    找个目标很重要
    《DSP using MATLAB》示例Example 8.30
    《DSP using MATLAB》示例Example 8.29
    《DSP using MATLAB》示例Example 8.28
    《DSP using MATLAB》示例Example 8.27
    《DSP using MATLAB》示例Example 8.26
  • 原文地址:https://www.cnblogs.com/Zdelta/p/14122308.html
Copyright © 2020-2023  润新知