• python爬虫简单实现,并在java中调用python脚本,将数据保存在json文件中


    # coding:utf-8
    
    import urllib2
    from bs4 import BeautifulSoup
    import json
    import sys
    
    reload(sys)
    
    sys.setdefaultencoding('utf-8')
    class dataBean(object) :
    
        def __init__(self, title, url,date):
            self.date = date
            self.url = url
            self.title = title
        def obj_2_json(obj):
            return {
                "title":obj.title,
                "url":obj.url,
                "date":obj.date
            }
    url = "http://localhost:8088/news.html"
    response3 = urllib2.urlopen(url)
    soup = BeautifulSoup(response3.read(), 'html.parser', from_encoding='utf-8')
    links = soup.find_all('a',class_='')
    data=[]
    contents = soup.find('ul', class_="w_newslistpage_list").findAll("li")
    
    
    for content in contents:
        bean = dataBean(content.find("span").find("a").get_text(), content.find("span").find("a")['href'],
                        content.find('span', class_="date").get_text())
        data.append(dataBean(content.find("span").find("a").get_text(), content.find("span").find("a")['href'],
                        content.find('span', class_="date").get_text()))
    
    jsondata= json.dumps(data,default=dataBean.obj_2_json, ensure_ascii=False,encoding='utf-8')
    fileObject = open('data.json', 'w')
    fileObject.write(jsondata)
    fileObject.close()
    print jsondata

    java中调用,借助jython.jar,并将bs4文件拷贝在当前文件夹下即可

    import org.python.core.Py;
    import org.python.core.PyString;
    import org.python.util.PythonInterpreter;
    
    
    public class Main {
    //jython安装
        public static void main(String[] args) {
            String code = "# -*- coding: utf-8 -*-
    " +
                    "import sys
    " +
                    "reload(sys)
    " + "import urllib2
    " +
                    "sys.setdefaultencoding('utf-8')
    " +
                    "import json
    ";
            new Thread(new Runnable() {
                @Override
                public void run() {
                    PythonInterpreter interpreter = new PythonInterpreter();
                    interpreter.exec("from bs4 import BeautifulSoup");
                    PyString code2 = Py.newStringUTF8(code);
                    interpreter.exec(code2);
                    interpreter.execfile("D:\java\test\src\GetNewsDataToLocal.py");
    
                }
            }
            ).start();
        }
    
    
    }

    可在当前文件夹看到json文件

  • 相关阅读:
    详细版Jmeter随机参数的接口并发测试总结
    Windows下MQTT代理服务器的搭建
    关于使用elascticsearch的两个小技巧
    解决easyswoole的swServer_start_check: onTask event callback must be set at报错
    解决使用宝塔安装的swoole扩展,运行项目出现的3个常见问题
    浅谈一下ThinkPHP5.1实现事务嵌套的特性
    资源出现多个 "Access-Control-Allow-Origin"
    Mac 制作系统启动盘
    深入剖析分布式一致性共识算法
    分布式系统限流算法分析与实现
  • 原文地址:https://www.cnblogs.com/loaderman/p/10137082.html
Copyright © 2020-2023  润新知