package com.shinho.bigdatalake.redis; import com.amazonaws.regions.Region; import com.amazonaws.regions.Regions; import com.amazonaws.services.s3.AmazonS3; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.nio.charset.Charset; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map; import org.apache.commons.csv.CSVFormat; import org.apache.commons.csv.CSVParser; import org.apache.commons.csv.CSVRecord; import org.apache.commons.io.IOUtils; import org.apache.log4j.Logger; import com.alibaba.fastjson.JSON; import com.amazonaws.auth.BasicAWSCredentials; import com.amazonaws.services.s3.AmazonS3Client; import com.amazonaws.services.s3.model.GetObjectRequest; import com.amazonaws.services.s3.model.ObjectListing; import com.amazonaws.services.s3.model.S3Object; import com.amazonaws.services.s3.model.S3ObjectInputStream; import com.amazonaws.services.s3.model.S3ObjectSummary; import com.shinho.test.TestApp; import redis.clients.jedis.Jedis; public class S3Utils { private static final Logger logger = Logger.getLogger(S3Utils.class); public static AmazonS3 s3 = null; public static final String AWS_ACCESS_KEY = "xxx"; public static final String AWS_SECRET_KEY = "xxx"; public static final String bucketName = "xxx"; static{ s3 = new AmazonS3Client(new BasicAWSCredentials(AWS_ACCESS_KEY, AWS_SECRET_KEY)); s3.setRegion(Region.getRegion(Regions.CN_NORTH_1)); } public static int getData() throws Exception{ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); String json = ""; ObjectListing ol = s3.listObjects(bucketName); List<S3ObjectSummary> objects = ol.getObjectSummaries(); for (S3ObjectSummary os: objects) { System.out.println("* " + os.getKey()); } S3Object obj = s3.getObject(new GetObjectRequest(bucketName,"test/tmp_test_put1.csv")); Jedis jedis = new Jedis("xxx"); jedis.select(15); List<CSVRecord> list = null; if(obj != null){ InputStream input = null; input = obj.getObjectContent(); InputStreamReader reader = new InputStreamReader(input, Charset.forName("utf-8")); CSVFormat format = CSVFormat.DEFAULT; CSVParser parser = format.parse(reader); logger.info("开始读取:"+sdf.format(new Date())); list = parser.getRecords(); logger.info("完成读取:"+sdf.format(new Date())); for(CSVRecord csv:list){ Map<String, String> map = csv.toMap(); String key = "gdt_cmplya_mid_dist_goal;" + map.get(2)+";" +map.get(1)+";"; String value = JSON.toJSONString(map); jedis.set(key, value); } logger.info("完成转换:"+sdf.format(new Date())); } return list.size(); } }
pom.xml如下:
<dependency> <groupId>com.amazonaws</groupId> <artifactId>aws-java-sdk</artifactId> <version>1.10.26</version> </dependency> <dependency> <groupId>org.apache.commons</groupId> <artifactId>commons-csv</artifactId> <version>1.6</version> </dependency> <dependency> <groupId>redis.clients</groupId> <artifactId>jedis</artifactId> <version>2.9.0</version> </dependency>