• scala爬取指定地点的所有列车班次


    需求介绍:

       爬取指定地点的所有全国相关的列车班次详情。将结果写进mysql。

    步骤及所遇到的问题:

    1.寻取全国站点静态信息   https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9002 一个静态的js文件

    2.借助web工具,本人谷歌浏览器F12在12306相关页面找取所需要的接口

    3.寻找规律,注意去重写进mysql

    直接上代码,看注释:

    object TrainSchedulesMain {
      def main(args: Array[String]): Unit = {
        //    val dateStr = "2019-01-07"
        if (args == null || args.length < 2) {
          System.err.println("args is null or missing")
          System.exit(1)
        }
        val dateStrList = args(0).trim
        val station = args(1).trim
    
        assert(StringUtils.isNotBlank(dateStrList), "dateStrList is null or empty")
        assert(StringUtils.isNotBlank(station), "station is null or empty")
    
        // 打印参数
        println(args.mkString("  "))
        //获取全国所有的站点信息
        val allStationsMap = analysisAllStations()
        /*
        * 1.深圳
        * 2.深圳西
        * 3.深圳东
        * 4.深圳坪山
        * 5.深圳北
        * 6.福田
        */
        //    val fromStations = collection.mutable.ListBuffer("深圳", "深圳西", "深圳东", "深圳坪山", "深圳北", "福田")
        var dateStr = "";
        //出发站集合
        val fromStations = collection.mutable.ListBuffer(station.trim)
        dateStrList.split(",").foreach(date => {
          dateStr = date
          fromStations.foreach(r => {
            var n = 0
            //出发站
            val fromStationRequest = allStationsMap.get(r).get
            allStationsMap.foreach(d => {
              val toStationRequest = d._2 //train code
              //根据出发站和到达站请求12306
              excuteAnaly(fromStationRequest, toStationRequest)
              println(dateStr)
              println("进度:" + r)
              n += 1
              println(n)
            })
    
          })
        })
    
        def excuteAnaly(fromStationRequest: String, toStationRequest: String) = {
          val url1 =
            s"""https://kyfw.12306.cn/otn/leftTicket/queryZ?leftTicketDTO.train_date=${dateStr}&leftTicketDTO.from_station=${fromStationRequest}&leftTicketDTO.to_station=${toStationRequest}&purpose_codes=ADULT"""
              .stripMargin
          println("url1: " + url1)
          //获取所查询的 出发站-》到达站 的所有班次列车信息
          val responseStr = HttpRequest.sendGet(url1)
          println("url1Res: " + responseStr)
          if (StringUtils.isNotBlank(responseStr) && JSONUtil.isJson(responseStr)) {
            val allStationModel = JSONUtil.toJavaBean(responseStr, new AllStationTimes().getClass)
            //表示初始站为深圳  目的地为 toStationRequest 是有值
            if (allStationModel != null && allStationModel.getHttpstatus == 200 && allStationModel.getData.getResult != null && allStationModel.getData.getResult.size() > 0) {
              val resList = allStationModel.getData.getResult
              for (item <- 0 until resList.size()) {
                val str = resList.get(item)
                val indexNumStart = str.indexOf("预订")
                if (indexNumStart > -1) {
                  val arrs = str.substring(indexNumStart).split("\|")
    
                  val trainNo = arrs(1)
                  val trainCode = arrs(2)
                  val startStation = arrs(3)
                  val endStation = arrs(4)
                  val fromStation = arrs(5)
                  val toStation = arrs(6)
                  // 并且目的地站必须是终点站才会当做一条 班次 写入msyql[否则是过站,导致重复写入mysql.并且站点还不全]
                  if (toStation.trim.equals(endStation.trim)) {
                    val url2 =s"""https://kyfw.12306.cn/otn/czxx/queryByTrainNo?train_no=${trainNo}&from_station_telecode=${fromStation}&to_station_telecode=${toStation}&depart_date=${dateStr}"""
                    println("url2: " + url2)
                    //查询上一步所有班次的详细各个站点顺序信息
                    val res = HttpRequest.sendGet(url2)
                    println("url2Res: " + res)
                    if (StringUtils.isNotBlank(res) && JSONUtil.isJson(res)) {
                      val trainSchedulesModel = JSONUtil.toJavaBean(res, new TrainSchedules().getClass)
                      if (trainSchedulesModel != null && trainSchedulesModel.getData.getData.size() > 0) {
                        val data0 = trainSchedulesModel.getData.getData.get(0)
    
                        if (!MysqlHandleUtil(MysqlConnect.trainDB).isHasValue(
                          s"""select *
                             |
                             |from train_schedules
                             |where train_code='${data0.getStation_train_code}'
                             |and start_station_name='${data0.getStart_station_name}'
                             |and end_station_name='${data0.getEnd_station_name}'
                             |and start_time='${data0.getStart_time}'
                             |and dates='$dateStr'
                             |"""
                            .stripMargin)) {
                          val sql =
                            s"""insert into
                               |train_schedules(`train_code`,`start_station_name`,`end_station_name`,`start_time`,`arrive_time`,`dates`,`data`)
                               |values('${data0.getStation_train_code}','${data0.getStart_station_name}','${data0.getEnd_station_name}','${data0.getStart_time}','${trainSchedulesModel.getData.getData.get(trainSchedulesModel.getData.getData.size() - 1).getArrive_time}','${dateStr}','${JSONUtil.toJsonString(trainSchedulesModel.getData.getData)}')"""
                              .stripMargin
                          MysqlHandleUtil(MysqlConnect.trainDB).insertData(sql)
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    
      /**
        * 解析全国所有站点(中文名字,编码)
        *
        * @author XXXX
        * @date 17:08
        * @param []
     * @return scala.collection.mutable.HashMap<java.lang.String,java.lang.String>
        */
      def analysisAllStations(): collection.mutable.HashMap[String, String] = {
        val hashMap = new mutable.HashMap[String, String]()
        val url = "https://kyfw.12306.cn/otn/resources/js/framework/station_name.js?station_version=1.9002"
        val response = HttpRequest.sendGet(url)
        assert(StringUtils.isNotBlank(response), "response is null or empty")
        val arrs = response.split("@")
        val len = arrs.length
        for (item <- 1 until len) {
          val ars = arrs(item).split("\|")
          hashMap.put(ars(1), ars(2))
        }
        hashMap
      }
    }
    

      

    效果:

  • 相关阅读:
    [Linux] expect命令 (自动交互脚本)
    [MAC] 终端bash_profile配置不生效问题
    [IDEA] 开发常用插件
    [MAC] 环境常用工具
    [IDEA] 快捷键输出固定代码模板
    家庭网络-多无线路由器实现无缝漫游
    家庭网络-AP组网方案(POE供电)
    家庭网络-软路由搭建方案
    队列使用
    [多线程] 线程池的使用
  • 原文地址:https://www.cnblogs.com/ityuanmanito/p/10476811.html
Copyright © 2020-2023  润新知