public static List<ContentCommodityDto> loadData(int loadSize) {
List<ContentCommodityDto> dataList = new ArrayList<>();
Table table = null;
try {
table = HbaseUtil.getTable(tableName);
Scan scan = new Scan();
scan.setMaxResultSize(10);
scan.addColumn(HadoopConfig.FAMILY, "shopType".getBytes());
scan.addColumn(HadoopConfig.FAMILY, "netURL".getBytes());
scan.addColumn(HadoopConfig.FAMILY, "ocr".getBytes());
scan.addColumn(HadoopConfig.FAMILY, "text".getBytes());
scan.addColumn(HadoopConfig.FAMILY, "temp".getBytes());
scan.addColumn(HadoopConfig.FAMILY, "cityCode".getBytes());
FilterList allFilters = new FilterList(FilterList.Operator.MUST_PASS_ALL);
SingleColumnValueFilter filterNeedClean = new SingleColumnValueFilter("info".getBytes(), "cityCode".getBytes(), CompareFilter.CompareOp.EQUAL, new BinaryPrefixComparator(Bytes.toBytes("61")));
filterNeedClean.setFilterIfMissing(true);
allFilters.addFilter(filterNeedClean);
scan.setFilter(allFilters);
ResultScanner scaner = table.getScanner(scan);
for (Result rs : scaner) {
String shopType = Bytes.toString(rs.getValue("info".getBytes(), "shopType".getBytes()));
if ("0".equals(shopType) || "1".equals(shopType) || "2".equals(shopType) || "4".equals(shopType) || "5".equals(shopType) || "6".equals(shopType) || "7".equals(shopType)) {
String rowkey = Bytes.toString(rs.getRow());
String ocr = Bytes.toString(rs.getValue("info".getBytes(), "ocr".getBytes()));
String text = Bytes.toString(rs.getValue("info".getBytes(), "text".getBytes()));
text = StringUtils.isEmpty(text) ? "" : text;
String filePath = Bytes.toString(rs.getValue("info".getBytes(), "netURL".getBytes()));
String temp = Bytes.toString(rs.getValue("info".getBytes(), "temp".getBytes()));
if ((StringUtils.isNotEmpty(ocr) && ocr.equals("2"))
|| (StringUtils.isNotEmpty(ocr) && "1".equals(ocr))
|| StringUtils.isEmpty(filePath)
|| StringUtils.isNotEmpty(text)
|| "1".equals(temp)
|| FileUtils.formatUrl(filePath).endsWith(".gif")) {
continue;
}
ContentCommodityDto dto = new ContentCommodityDto();
dto.setFilePath(filePath);
dto.setRowkey(rowkey);
dto.setOcr(ocr);
dataList.add(dto);
if (dataList.size() == loadSize) {
break;
}
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
HbaseUtil.closeTable(table);
}
return dataList;
}