1.使用webzip进行整站或指定页面的下载
2.使用jsoup进行在线网页内容获取
例子:
@Autowired private TmCategoryOneRepository tmCategoryOneRepository; public HashMap<String, String> importTmCategoryOne(String url) { try { Document doc = Jsoup.connect(url).get(); Elements results = doc.getElementsByClass("dw"); Elements results1 = doc.getElementsByTag("h3"); Elements results2 = doc.getElementsByTag("h1"); Elements results3 = results.select("a"); List<String> list = new ArrayList<String>(); List<String> list1 = new ArrayList<String>(); List<String> list2 = new ArrayList<String>(); List<String> list3 = new ArrayList<String>(); for (Element element : results3) { list3.add(element.attr("href")); } for (Element element : results2) { list1.add(element.text()); } for (Element element : results1) { list.add(element.text()); } for (Element element : results) { list2.add(element.text()); } for (int i = 0; i < list2.size(); i++) { TmCategoryOne tmo = new TmCategoryOne(); tmo.setId(OidMgr.requestOID("tm_category_one").toString()); tmo.setName(list2.get(i)); tmo.setUrl(list3.get(i)); tmo.setParticulars(list1.get(i)); tmo.setCode(i + ""); tmo.setAnnotation(list.get(i)); tmCategoryOneRepository.save(tmo); } } catch (IOException e) { e.printStackTrace(); } return new HashMap<>();