• java调用Linux执行Python爬虫,并将数据存储到elasticsearch中--(java后台代码)


    该篇博客主要是java代码,如需相应脚本及java连接elasticsearch工具类代码,请移步到上一篇博客(https://www.cnblogs.com/chenyuanbo/p/9973685.html)

    一、创建连接执行Linux脚本工具类

    package com.yjlc.platform.utils.Elasticsearch;
    
    import ch.ethz.ssh2.Connection;
    import ch.ethz.ssh2.StreamGobbler;
    
    import java.io.*;
    /**
     * --------------------------------------------------------------
     * CopyRights(c)2018,YJLC
     * All Rights Reserved
     * <p>
     * FileName: SingletonUtil.java
     * Description:
     * Author: cyb
     * CreateDate: 2018-11-15
     * --------------------------------------------------------------
     */
    public class SingletonUtil {
        //无参构造
        private SingletonUtil(){}
        private volatile static SingletonUtil instance;
        //字符编码默认是utf-8
        public static String  DEFAULTCHART="UTF-8";
        public static Connection conn;
        private String ip;
        private String userName;
        private String userPwd;
        public static Boolean flag=false;
    //有参构造
        public SingletonUtil(String ip, String userName, String userPwd) {
            this.ip = ip;
            this.userName = userName;
            this.userPwd = userPwd;
        }
    
        public SingletonUtil getInstance(String ip, String userName, String userPwd){
            if(instance==null){
                synchronized(SingletonUtil.class){
                    //防止多线程多次创建
                    if(instance==null){
                        instance=new SingletonUtil(ip,userName, userPwd);
                    }
                }
            }
            flag= instance.login();//调用登录方法
            return instance;
        }
        //登录
        public Boolean login(){
            boolean flg=false;
            try {
                System.out.println("进入连接");
                conn = new Connection(ip);
                try {
                    conn.connect();//连接
                } catch (IOException e) {
                    e.printStackTrace();
                }
                flg=conn.authenticateWithPassword(userName, userPwd);//认证
                if (flg){
                    System.out.println("认证成功!");
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
            return flg;
        }
    
        /**
         *@description:纯文本格式返回
         *@author:cyb
         *@date: 2018-11-15 16:56
        *@param: in
        *@param: charset
         *@return: java.lang.String
         */
        public static String processStdout(InputStream in, String charset){
            InputStream    stdout = new StreamGobbler(in);
            StringBuffer buffer = new StringBuffer();;
            try {
                BufferedReader br = new BufferedReader(new InputStreamReader(stdout,charset));
                String line=null;
                while((line=br.readLine()) != null){
                    buffer.append(line+"
    ");
                }
            } catch (UnsupportedEncodingException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            }
            return buffer.toString();
        }
    }

    二、控制层

    /**
         *@description:开启爬虫
         *@author:cyb
         *@date: 2018-11-14 15:59
         *@param: id
         *@param: execute
         *@return: java.util.Map<java.lang.String,java.lang.Object>
         */
        @RequestMapping("openTask")
        @ResponseBody
        public Map<String,Object> openTask(String id,Boolean execute){
            Map<String,Object> map = new HashMap<>();
            //根据id查询任务详细信息
            BsKnowledgeInfoDTO  knowledgeInfoDTO=  knolegeService.getDataInfoById(id);
            if(execute==true){
                execute=false;
            }else {
                execute=true;
            }
            knowledgeInfoDTO.setExecute(execute);//修改任务的状态(开启、关闭)
            int k = knolegeService.updateDataInfo(knowledgeInfoDTO);
    //        StringBuilder url = new StringBuilder(knowledgeInfoDTO.getPath()) ;//爬虫目标路径
            StringBuilder url= new StringBuilder("https://mil.news.sina.com.cn/");
            StringBuilder reptileMethod= new StringBuilder("http://192.168.200.8:8000/news");//爬虫方法http://192.168.200.8:8000/news
            StringBuilder themeid= new StringBuilder("hottopic");//存储索引名称
           //http://192.168.200.8:8000/news?themeid=hottopic&url=https://mil.news.sina.com.cn/history/2018-11-15/doc-ihmutuec0443667.shtml
            StringBuilder path =reptileMethod.append("?").append("themid=").append(themeid).append("&").append("url=").append(url);
            String ip="192.168.200.8";//Linux 路径
            String userName ="root";
            String userPwd ="yjlc20148";
            int w = knolegeService.reptile(path.toString(),ip,userName,userPwd);
            if(w==200){
                map.put("code",200);
                map.put("message","爬虫成功!");
            }else if(w==206){
                map.put("code",206);
                map.put("message","连接失败!");
            }
            return map;
        }

    三、service层(此处省略了service接口层)

    /**
     *@description: 爬虫
     *@author:cyb
     *@date: 2018-11-15 20:52
    *@param: path 爬虫方法路径+ES存储索引+爬虫目标url合集
    *@param: ip 连接ip地址
    *@param: userName :用户名
    *@param: userPwd:用户密码
     *@return: int
     */
    @Override
    public int reptile(String path,String ip,String userName,String userPwd) {
        SingletonUtil singletonUtil = new SingletonUtil("192.168.200.8", "root","yjlc20148");
        singletonUtil.getInstance(ip, userName,userPwd);
        Boolean b =SingletonUtil.flag;//看是否连接成功
        if(b==true){
            System.out.println("=====第一个步骤=====");
            Session session= null;//打开一个会话
            try {
                session = singletonUtil.conn.openSession();
                session.execCommand("sh /opt/zc/linux_sina.sh");//执行命令
            } catch (IOException e) {
                e.printStackTrace();
            }
            //TODO:多条命令
            String result=singletonUtil.processStdout(session.getStdout(),singletonUtil.DEFAULTCHART);
            //如果为得到标准输出为空,说明脚本执行出错了
            if(StringUtils.isBlank(result)){
                System.out.println("脚本出错");
               result=singletonUtil.processStdout(session.getStderr(),singletonUtil.DEFAULTCHART);
            }
            System.out.println("第一个步骤脚本运行成功"+result);
            ConnectNetworkUtil connectNetworkUtil = new ConnectNetworkUtil();
            connectNetworkUtil.ConnectNetwork(path);
            System.out.println("采集成功!");
            session.close();//关闭session
            singletonUtil.conn.close();//爬虫关闭连接
            return 200;//爬虫成功
        }else {
            return 206;//连接失败
        }
    
    }

    以上代码已省略了service接口层和java连接elasticsearch工具类(上一篇博客中已写到),以上代码仅供参考,若代码中有不合理或者不规范的地方,请各位指出,技术在于交流!

  • 相关阅读:
    Java设计模式之单例模式
    docker常用命令2
    Failed to convert value of type 'java.lang.String' to required type 'java.time.LocalDate';
    Apache RocketMQ在linux上的常用命令
    RocketMQ的broker启动失败解决
    xshell与xftp使用注意
    Springboot项目打包成jar运行2种方式
    docker常用命令记录
    MySql常用语句总结更新
    springboot启动报错start bean 'eurekaAutoServiceRegistration' NullPointerException
  • 原文地址:https://www.cnblogs.com/chenyuanbo/p/9973769.html
Copyright © 2020-2023  润新知