• java spark list 转为 RDD 转为 dataset 写入表中


    package com.example.demo;
    
    import java.util.ArrayList;
    import java.util.Arrays;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    
    import org.apache.spark.api.java.JavaRDD;
    import org.apache.spark.api.java.JavaSparkContext;
    import org.apache.spark.sql.Dataset;
    import org.apache.spark.sql.Row;
    import org.apache.spark.sql.SQLContext;
    import org.apache.spark.sql.SparkSession;
    
    public class DemoApplication {
    
    	public static void main(String[] args) {
    		
    		
    //		/*-----------------------线上调用方式--------------------------*/
    		// 读入店铺id数据
    		SparkSession spark = SparkSession.builder().appName("demo_spark").enableHiveSupport().getOrCreate();
    		Dataset<Row> vender_set = spark.sql("select pop_vender_id from app.app_sjzt_payout_apply_with_order where dt = '2019-08-05' and pop_vender_id is not null");
    		System.out.println( "数据读取 OK" );
    		
    		
    		JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
    //		JavaSparkContext sc = new JavaSparkContext();
    		SQLContext sqlContext = new SQLContext(sc);
    
    		// 将数据去重,转换成 List<Row> 格式
    		vender_set =  vender_set.distinct();
    		vender_set = vender_set.na().fill(0L);
    		JavaRDD<Row> vender= vender_set.toJavaRDD();
    		List<Row> vender_list = vender.collect();
    		
    
    		// 遍历商家id,调用jsf接口,创建list 保存返回数据
    		List<String> list_temp = new ArrayList<String>();
    		for(Row row:vender_list) {
    			String id = row.getString(0);
    			String result = service.venderDownAmountList(id);
    			
    			System.out.println( "接口调用返回值 OK" );
    			
    			// 解析json串 ,按照JSONObject 和 JSONArray 一层一层解析 并过返回滤数据
    			JSONObject jsonOBJ = JSON.parseObject(result);
    			JSONArray data = jsonOBJ.getJSONArray("data");
    			if (data != null) {
    				JSONObject data_all = data.getJSONObject(0);
    				double amount = data_all.getDouble("jfDownAmount");
    				// 将商家id 和 倒挂金额存下来
    				list_temp.add("{"vender_id":"+id+","amount":"+amount+"}");
    			}
    			else {
    				continue;
    			}
    			
    			System.out.println( "解析 OK" );
    			
    		}
    		// list 转为 RDD 
    		JavaRDD<String> venderRDD = sc.parallelize(list_temp);
    		
    		// 注册成表
    		Dataset<Row> vender_table = sqlContext.read().json(venderRDD);
    		vender_table.registerTempTable("vender");
    		System.out.println( "注册表 OK" );
    		
    		// 写入数据库
    		spark.sql("insert overwrite table dev.dev_jypt_vender_dropaway_amount select vender.vender_id,vender.amount from vender");
    		System.out.println( "写入数据表 OK" );
    
    		sc.stop();		
    		System.out.println( "Hello World!" );
    		
    	}
    }
    

      

  • 相关阅读:
    Zabbix通过进程名监控进程状态配置详解
    kibana 统计field所有值百分比
    使用Logstash filter grok过滤日志文件
    python 修改文件内容
    清理elasticsearch的索引
    zabbix3.2.1安装graphtrees插件
    snmpwalk用法
    Zabbix通过SNMPv2监控DELL服务器的硬件信息
    zabbix上的宏(macro)介绍
    解决TeamViewer无法按给定网络地址联系伙伴
  • 原文地址:https://www.cnblogs.com/Allen-rg/p/11365013.html
Copyright © 2020-2023  润新知