• 【最新原创】中国移动(中国联通)_通信账单,详单,个人信息抓取爬虫代码


    源码以及最新稳定爬全国移动,联通详单账单软件购买 !请点击此!

    整体效果如下:

    所有运营商抓取到的数据都放到了一个库的三个表里面,后期做数据分析用。

    下面分享几个核心的源代码给 正在研究这个的朋友们。

    简单架构:

    爬虫核心代码:

    代码有些乱,基本把整个联通上的数据都能抓全了,大家自己优化代码把。

    (原文地址:http://www.cnblogs.com/x-poior/p/5641437.html)

    using Crawler.Common;
    using Crawler.Interface;
    using System;
    using System.Collections.Generic;
    using System.Linq;
    using System.Net;
    using System.Text;
    using System.Text.RegularExpressions;
    using System.Threading;
    using System.Threading.Tasks;
    using YXRepository.Log;
    using YXRepository.Model;
    
    namespace Crawler {
        public class CrawlerUNC : CrawlerBase, ICrawler {
    
            HttpHelperNew hhn;
            static IList<String> loglist;
            private string currentUVC {
    
                get {
    
                    return HttpHelperNew.UNCuacverifykey;
                }
            }
            /// <summary>
            /// 中国联通 初始化链接
            /// </summary>
            public CrawlerUNC(string number, string pwd) {
                hhn = new HttpHelperNew();
                loglist = new List<string>();
    
                currentPhoneNumber = number;
                currentPhoneServicePwd = pwd;
    
                loginIsNeedVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CheckNeedVerify";
                loginVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CreateImage";
                loginToVerifyImgRequestUrl = "https://uac.10010.com/portal/Service/CtaIdyChk";
            }
    
            public bool IsLoginNeedVerify() {
                string _url = loginIsNeedVerifyImgRequestUrl;
                string queryData = "callback=jQuery17205245009011952871_" + TimeStamp.GetTimeStamp_13() + "&userName="+currentPhoneNumber+"&pwdType=01&_="
                        + TimeStamp.GetTimeStamp_13();
                string retString = hhn.HttpGet(_url, queryData, HttpForType.联通);
                //添加日志记录:
                CollectJsonLog(_url,queryData,0,retString);
                //
                return retString.Contains(@"""resultCode"":""false""") ? false : true;  
            }
    
            /// <summary>
            /// 联通登录
            /// </summary>
            /// <param name="imgcode"></param>
            /// <returns></returns>
            public bool IsLoginImgVerifyOk(string imgcode) {
                currentLoginImgCode = imgcode;
    
                string _url = loginToVerifyImgRequestUrl;
                string queryData = "callback=jQuery17208163765012834383_1463034583178&verifyCode=" + currentLoginImgCode + "&verifyType=1&_=1463034805373";
                string retString = hhn.HttpGet(_url, queryData);
                //添加日志记录:
                CollectJsonLog(_url, queryData, 0, retString);
                //
                return retString.Contains(@"""resultCode"":""true""") ? true : false;
            }
    
            public string GetLoginImg() {
                loginVerifyImgStream = string.Empty;
                string queryData = "t=1463034742570";
                string part1 = "data:image/png;base64,";
                string part2 = "";//添加日志记录:
                CollectJsonLog(loginVerifyImgRequestUrl, queryData, 0, part2);
                //
                if (!string.IsNullOrEmpty(part2))
                    loginVerifyImgStream = part1 + part2;
                return loginVerifyImgStream;
            }
    
            public bool LogOut() { 
                bool retValue = true;
                 
                return retValue;
    
            }
             
            public bool Login(out string loginret) {
                loginret = string.Empty;
                loginRequestUrl = "";
           
                bool retValue = false;
                string retString = string.Empty;
                do {
                    retString = hhn.HttpGet(loginRequestUrl, "", HttpForType.联通);
                    Thread.Sleep(500);
                }
                while (retString.Contains(@"所属省份系统正在升级")); 
                CollectJsonLog(loginRequestUrl,"",0,retString);return retValue;
            }
    /// <summary>
            /// 联通
            /// </summary>
            public  void UNCInitPage() {
                string url = "https://login.10010.com/captchazh.htm?type=05";
                string retS =  hhn.HttpGet(url, "", HttpForType.联通);
                //添加日志记录:
                CollectJsonLog(url, "", 0, retS);
                //
                //设置Cookie"WT_FPC"
                string wt_fpc = JsHelper.GetJsMethd("GetWT_FPC", null);
                CookieCollection hcc = new CookieCollection();
                Cookie wtcookie = new Cookie() {
                    Expires = DateTime.Now.AddYears(10),
                    Path = "/",
                    Domain = ".10010.com",
                    Name = "WT_FPC",
                    Value = "id=2c78d939da42319e6221460629342754:lv=1460686951978:ss=1460685811376"
                    //Value = wt_fpc.Substring(wt_fpc.IndexOf('=') + 1, wt_fpc.Length - 7)//此处 写死也可以,服务器不做校验。
                };
                hcc.Add(wtcookie);
                hhn.cookie.Add(wtcookie);
            }
    
            public bool SendQuerySms() { 
                return true;
            }
    public IList<T> GetQueryData<T>(T temp) {
                return null;
            }
    
            private string getMyDetails() {
                if (checkLogin) { 
                    string infoUrl = "http://iservice.10010.com/e3/static/query/searchPerInfo/?_=1464073258330&menuid=000100020001";
                    string retString = hhn.HttpPost(infoUrl, "", HttpForType.联通);
                    //添加日志记录:
                    CollectJsonLog(infoUrl,"",1,retString);
                    //
                    return retString;
                }
                return "";
            }
    
            public TXInfoModel GetInfo() {
                TXInfoModel tim = new TXInfoModel();
    
    
    
                #region 第一部分
                string infoUrl = "https://uac.10010.com/cust/infomgr/anonymousInfoAJAX";
                string retString = hhn.HttpGet(infoUrl, "");
                //添加日志记录:
                CollectJsonLog(infoUrl,"",0,retString);
                //
                tim.CustomerName = Utilities.QuMiddle(retString, @"name"":""", @"""");
    
                tim.CustomerSex = Utilities.QuMiddle(retString, @"sex"":""", @"""")=="1"?"":"";
                #endregion
    
                #region 第二部分
                string retString2 = getMyDetails();
                tim.Email = Utilities.QuMiddle(retString2, @"sendemail"":""", @"""");
    
                DateTime innettime;
                DateTime.TryParse(Utilities.QuMiddle(retString2, @"opendate"":""", @""""), out innettime);
                tim.InNetTime = innettime;
    
                tim.Grade = "";//星级得分
                tim.IDCard = Utilities.QuMiddle(retString2, @"certnum"":""", @"""");
                tim.PhoneNumber = Utilities.QuMiddle(retString2, @"usernumber"":""", @""""); ;
                tim.ProviderName = "中国联通:" + Utilities.QuMiddle(retString2, @"brand"":""", @"""") + "-" + Utilities.QuMiddle(retString2, @"productName"":""", @"""");//01 ,02,03
                tim.RegAddress = Utilities.QuMiddle(retString2, @"certaddr"":""", @"""");
    
    
                tim.ContactNum = Utilities.QuMiddle(retString2, @"usernumber"":""", @"""");
                tim.NetAge = "";
                tim.PhoneStatus = Utilities.QuMiddle(retString2, @"subscrbstat"":""", @"""");
                tim.RealNameInfo = Utilities.QuMiddle(retString2, @"certtype"":""", @"""");
                tim.StarLevel = Utilities.QuMiddle(retString2, @"custlvl"":""", @"""");
                tim.LevelInfo = "";
                tim.ZipCode = "";  
                #endregion
    
                #region 第三部分 话费余额/储蓄余额 
                string infoUrl11 = "http://iservice.10010.com/e3/static/query/accountBalance/search?_=1464858050369&menuid=000100010002";
                string retString11 = hhn.HttpPost(infoUrl11, "type=onlyAccount", HttpForType.联通);
                //添加日志记录:
                CollectJsonLog(infoUrl11, "", 0, retString11);
                //
                tim.CurFee = Decimal.Parse(Utilities.QuMiddle(retString11, @"userbalance"":""", @""""));
                tim.CurFeeTotal = Decimal.Parse(Utilities.QuMiddle(retString11, @"acctbalance"":""", @""""));
                #endregion
    
                #region 第三部分 积分
                string infoUrl22 = "http://iservice.10010.com/e3/static/query/headerView";
                string retString22 = hhn.HttpPost(infoUrl22, "", HttpForType.联通);
                //添加日志记录:
                CollectJsonLog(infoUrl22, "", 0, retString22);
                //
                int score;
                int.TryParse(Utilities.QuMiddle(retString22, @"sore"":""", @""""), out score);//联通某些类型卡 返回的json中可能没有 积分这个 字段。
                tim.PointValue = score;
                #endregion
    
                #region 第四部分 归属地查询
                tim.PhoneAttribution = PhoneAttribution.getGuiShuDiNet(tim.PhoneNumber);
                #endregion
    
    
                return tim;   
            }
    
    
            /// <summary>
            /// 获取五个月账单概括
            /// </summary>
            /// <returns></returns>
            public IList<TXZhangDanModel> GetZhangDan() {
                IList<TXZhangDanModel> listZD = new List<TXZhangDanModel>();
                TXZhangDanModel temp;string retS = string.Empty;
                string[] temptimes;int loopi=0;
                List<string> tempss = GetZhangDanPostData(out temptimes);
                if (checkLogin) {
                    foreach (var data in tempss) {
                        //当前月份的通话账单进行处理!
                        loopi++;
                        string PostdataS = data;
                        retS = hhn.HttpPost(infoUrl, PostdataS, HttpForType.联通);
                        //添加日志记录:
                        CollectJsonLog(infoUrl, PostdataS,1,retS);
                        //
                        //解析每月账单数据构建model
                        DateTime start1,end1;decimal d1;
                        string temps1= new Regex(@"""billcycle""(:)("".*?"")").Match(retS).ToString();
                    }
                }
    
                return listZD;   
    
            }
    
          /// <summary>
          /// 获取详单
          /// </summary>
          /// <returns></returns>
            public IList<TXXiangDanModel> GetXiangDan() {
                IList<TXXiangDanModel> listXD = new List<TXXiangDanModel>();
                TXXiangDanModel temp;
                string retS = string.Empty;
                List<string> tempss = GetXiangDanPostData();
                if (checkLogin) {
                    foreach (var data in tempss) {
                        retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通);
                        //添加日志记录:
                        CollectJsonLog(callListRequestUrl, PostdataS, 1, retS);
    
                        while (retS.Contains("暂时无法为您提供服务")) {
                            retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通);
                            Thread.Sleep(500);
                        }
                        if (retS.Contains("系统检测您的访问过于频繁")) {
                            throw new Exception("访问获取详单链接过于频繁!请明天再试");
                        } 
    
    
                        //注意,以下代码解析当月记录总数,获得所有确定的游标集合。
                        string currentMonthTotalNum = Utilities.QuMiddle(retS, @"totalRecord"":", @","""); //从上面返回串,获取当月记录总数
                        List<String> curCurorlist = GetAllcurCuror(currentMonthTotalNum);//集合,翻页用.
                         
    
                        foreach (var curcuror in curCurorlist) {
                            //当前月份的通话详单进行处理!
                            PostdataS = "beginDate=" + data.Split('&')[0] + "&endDate=" + data.Split('&')[1] + "&pageNo=" + curcuror + "&pageSize=50";
                            retS = hhn.HttpPost(callListRequestUrl, PostdataS, HttpForType.联通);
                            //添加日志记录:
                            CollectJsonLog(callListRequestUrl, PostdataS, 1, retS);
                            //
    
                            //匹配startTime date
                            MatchCollection stlist = new Regex(@"""calldate""(:)("".*?"")").Matches(retS);
                            //匹配startTime time
                            MatchCollection stlist2 = new Regex(@"""calltime""(:)("".*?"")").Matches(retS);
    
                            //匹配commPlac
                            MatchCollection cplist = new Regex(@"""homeareaName""(:)("".*?"")").Matches(retS);
                            //匹配commMode
                            MatchCollection cmlist = new Regex(@"""calltypeName""(:)("".*?"")").Matches(retS);
                            //匹配anotherNm 
                            MatchCollection anlist = new Regex(@"""othernum""(:)("".*?"")").Matches(retS);
                            //匹配commTime 
                            MatchCollection ctilist = new Regex(@"""calllonghour""(:)("".*?"")").Matches(retS);
    
                            //匹配commType 
                            //MatchCollection ctylist = new Regex(@"""romatype""(:)("".*?"")").Matches(retS);
                            //匹配commType 
                            MatchCollection ctylist = new Regex(@"""landtype""(:)("".*?"")").Matches(retS);
    
                            //匹配commFee 
                            MatchCollection cflist = new Regex(@"""totalfee""(:)("".*?"")").Matches(retS);
    
                            if ((stlist.Count == cplist.Count) && (cplist.Count == cmlist.Count) && (cmlist.Count == anlist.Count)
                                && (anlist.Count == ctilist.Count) && (ctilist.Count == ctylist.Count) && (ctylist.Count == cflist.Count)) {
    
                                //解析每月详单数据构建model
                                for (int i = 0; i < stlist.Count; i++) {
                                    temp = new TXXiangDanModel() {
                                        anotherNm = Utilities.QuMiddle(anlist[i].Value, @"othernum"":""", @""""),
                                        commFee = decimal.Parse(Utilities.QuMiddle(cflist[i].Value, @"totalfee"":""", @"""")),
                                        commMode = Utilities.QuMiddle(cmlist[i].Value, @"calltypeName"":""", @""""),
                                        commPlac = Utilities.QuMiddle(cplist[i].Value, @"homeareaName"":""", @""""),
                                        commTime = Utilities.QuMiddle(ctilist[i].Value, @"calllonghour"":""", @""""),
                                        commType = Utilities.QuMiddle(ctylist[i].Value, @"landtype"":""", @""""),
                                        startTime = DateTime.Parse(Utilities.QuMiddle(stlist[i].Value, @"calldate"":""", @"""")
                                            + " " + Utilities.QuMiddle(stlist2[i].Value, @"calltime"":""", @""""))
                                    };
                                    listXD.Add(temp);
                                }
                            }
                        }
                    }
                }
                return listXD;
            }
    
            /// <summary>
            /// 翻页索引(1,2,3,4,5),用于联通翻页查询账单。默认每页50条记录
            /// </summary>
            /// <param name="totalNum">当月份总数目</param>
            /// <returns></returns>
            private List<String> GetAllcurCuror(string totalNum) {
                List<string> retlist = new List<string>();
                int totalnum1;
                int.TryParse(totalNum, out totalnum1);
                if (totalnum1 == 0)
                    return retlist;
                else {
                    int yushu = totalnum1 % 50;//比如totalNum 201,余数1
                    int curcurorCount = totalnum1 / 50 + (yushu == 0 ? 0 : 1);//5页
                    for (int i = 0; i < curcurorCount; i++) {
                        retlist.Add((i+1).ToString());//1,2,3,4,5
                    }
                    return retlist;
                }
    
            }
    
            /// <summary>
            /// 最近5个月账单需要的Post数据()
            /// </summary>
            /// <returns></returns>
            private List<string> GetZhangDanPostData(out string[] startendS) {
                //            string dataS = "billdate=201604&querycode=0001&querytype=0001";
                List<string> retlist = new List<string>(); startendS = new string[5];
                DateTime nowtime = DateTime.Now;
                for (int i = 0; i < 5; i++) {
                    string mm =  nowtime.AddMonths((-1)*i).ToString("yyyyMM");
                    string dataS = "billdate="+mm+"&querycode=0001&querytype=0001";
                    string seS;
                    if(i==0)
                        seS = new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).ToString() + "&" + nowtime.AddMonths((-1) * i).ToString(); //整理账单起&止月份,非本月
                    else
                        seS = new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).ToString() + "&" + new DateTime(nowtime.AddMonths((-1) * i).Year, nowtime.AddMonths((-1) * i).Month, 1).AddMonths(1).AddDays(-1).ToString(); //整理账单起&止月份,本月
                    startendS[i] = (seS);
                    retlist.Add(dataS);
                }
                return retlist;
            }
    
            /// <summary>
            /// 获取5个月详单需要的post数据(2016-04-01&2016-04-30格式)
            /// </summary>
            /// <returns></returns>
            private List<string> GetXiangDanPostData() {
              List<string> retlist = new List<string>();
    
    
    
              int year = DateTime.Now.Year;//当前年  
              int mouth = DateTime.Now.Month;//当前月  
    
              int beforeYear = 0;
              int beforeMouth = 0;
    
               for (int i = 0; i < 5; i++) {
                  if (mouth <= 1 && i!=0 )//如果当前月是一月,那么年份就要减1  
                  {
                      beforeYear = year - i;
                      beforeMouth = 12;//上个月  
                  } else {
                      beforeYear = year;
                      beforeMouth = mouth - i;//上个月  
                  }
                  string beforeMouthOneDay = beforeYear + "-" + beforeMouth + "-" + "01";//上个月第一天  
                  string beforeMouthLastDay;
                  if (i != 0)
                      beforeMouthLastDay = beforeYear + "-" + beforeMouth + "-" + DateTime.DaysInMonth(year, beforeMouth);//上个月最后一天
                  else
                      beforeMouthLastDay = DateTime.Now.ToString("yyyy-MM-dd");
                  retlist.Add(DateTime.Parse(beforeMouthOneDay).ToString("yyyy-MM-dd") + "&" + DateTime.Parse(beforeMouthLastDay).ToString("yyyy-MM-dd"));
               }
               return retlist;
            }
    
            public static void CollectJsonLog(string url, string data, int method1, string responseS) {
                string method = method1 == 1 ? "Post" : "Get";
                loglist.Add(string.Format("【请求url:{0} , 请求数据:{1} , 请求方式:{2}, 返回数据:{3} 】", url, data, method, responseS));
            }
    
            public IList<String> GetAllJsonLog() {
                return loglist;
            }
        }
    }

    源码,请联系我

  • 相关阅读:
    ps基础磨皮(混入了奇怪的博客~)
    spring boot配置ssl
    Chrome自动翻译失效的解决办法
    小红书数美滑块验证码
    使用DataLoader报错AttributeError: 'int' object has no attribute 'numel'
    glidedsky-爬虫-验证码-1
    glidedsky-爬虫-雪碧图-2
    glidedsky-爬虫-雪碧图-1
    glidedsky-爬虫-CSS反爬
    glidedsky-爬虫-字体反爬-1
  • 原文地址:https://www.cnblogs.com/x-poior/p/5641437.html
Copyright © 2020-2023  润新知