考虑很多:
压背、限流、JVM优化,出错的重试等
#!/bin/bash num_executors=1 executor_memory=1g driver_memory=1g executor_cores=1 realtime_queue=root # backpressure receiver_max_rate=100 receiver_initial_rate=30 my_job_name="streamingSYN" main_class="com.df.QZ.HeartOrderChart" spark-submit --master yarn --deploy-mode cluster --name ${my_job_name} --class ${main_class} --driver-memory ${driver_memory} --num-executors ${num_executors} --executor-cores ${executor_cores} --executor-memory ${executor_memory} --queue ${realtime_queue} --conf spark.driver.extraJavaOptions=-Dlog4j.configuration=log4j-yarn.properties --conf spark.executor.extraJavaOptions=-Dlog4j.configuration=log4j-yarn.properties --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.locality.wait=10 --conf spark.task.maxFailures=8 --conf spark.ui.killEnabled=false --conf spark.logConf=true --conf spark.streaming.blockInterval=200 --conf spark.streaming.receiver.writeAheadLog.enable=true --conf spark.streaming.backpressure.enabled=true --conf spark.streaming.backpressure.pid.minRate=10 --conf spark.streaming.receiver.maxRate=${receiver_max_rate} --conf spark.streaming.kafka.maxRatePerPartition=${receiver_max_rate} --conf spark.streaming.backpressure.initialRate=${receiver_initial_rate} --conf spark.yarn.driver.memoryOverhead=512 --conf spark.yarn.executor.memoryOverhead=1024 --conf spark.yarn.maxAppAttempts=4 --conf spark.yarn.am.attemptFailuresValidityInterval=1h --conf spark.yarn.max.executor.failures=$((8 * ${num_executors})) --conf spark.yarn.executor.failuresValidityInterval=1h --driver-java-options "-XX:+UseConcMarkSweepGC -XX:+CMSClassUnloadingEnabled -XX:ParallelCMSThreads=4 -XX:+CMSParallelRemarkEnabled -XX:+UseCMSCompactAtFullCollection -XX:CMSInitiatingOccupancyFraction=70 -XX:CMSFullGCsBeforeCompaction=2 -XX:-UseCompressedOops -XX:+PrintHeapAtGC" hdfs://df1:9000/Thermodynamic-1.0-SNAPSHOT.jar 1 df1:9092,df2:9092,df3:9092 driverinfo cm1 df1:2181,df2:2181,df3:2181