HDFS
org.apache.hadoop.hdfs.server.datanode.DataNode
org.apache.hadoop.hdfs.server.namenode.NameNode
## 非 HA 模式下才有 SecondaryNameNode,配置 HA 的话是两个 NameNode
org.apache.hadoop.hdfs.server.namenode.SecondaryNameNode
## 配置 HA 模式才有 JournalNode,用于两个 NameNode 间的数据同步
org.apache.hadoop.hdfs.qjournal.server.JournalNode
## 配置 HA 模式才有 DFSZKFailoverController,用于 NameNode 的故障恢复
org.apache.hadoop.hdfs.tools.DFSZKFailoverController
MapReduce
## 记录 MapReduce Job 的历史信息
org.apache.hadoop.mapreduce.v2.hs.JobHistoryServer
YARN
org.apache.hadoop.yarn.server.nodemanager.NodeManager
org.apache.hadoop.yarn.server.resourcemanager.ResourceManager
## 使用了 App Timeline Server,用于记录 Yarn Job 的历史信息
org.apache.hadoop.yarn.server.applicationhistoryservice.ApplicationHistoryServer
Spark
## 用于记录 Spark Job 的历史信息
## 在 Job 运行时,可以通过 YARN UI 跳到 Spark Job UI
## 在 Job 结束后,需要通过 Spark History Server UI 查看 Job 的历史信息(默认端口是 18080)
org.apache.spark.deploy.history.HistoryServer
## 这是提交 Spark 应用的 Client
org.apache.spark.deploy.SparkSubmit
--master yarn
--deploy-mode cluster
--conf spark.driver.memory=1G
--conf spark.driver.extraClassPath=postgresql-xxx.jar
--name Test
--py-files Test.zip
--jars postgresql-xxx.jar,spark-streaming-kafka-xxx.jar
--executor-memory 1G
--num-executors 4
Test.py
## 这里运行的就是 Driver 程序
## 每一个 Spark 应用都有一个 ApplicationMaster 运行 Driver
org.apache.spark.deploy.yarn.ApplicationMaster
--class org.apache.spark.deploy.PythonRunner
--primary-py-file Test.py
--arg param
--properties-file /mnt/resource/hadoop/yarn/local/usercache/spark/appcache/application_1574480275665_115552/container_e12_1574480275665_115552_01_000001/__spark_conf__/__spark_conf__.properties
## 这里运行的就是 Executor 程序
## 每一个 Spark 应用有一个或多个 Executor 程序
## 可以看到 Executor 会和对应的 Driver 链接上
org.apache.spark.executor.CoarseGrainedExecutorBackend
--driver-url spark://CoarseGrainedScheduler@192.168.3.16:42742
--executor-id 3
--hostname hadoop-1
--cores 1
--app-id application_1574480275665_115552
--user-class-path file:/mnt/resource/hadoop/yarn/local/usercache/spark/appcache/application_1574480275665_115552/container_e12_1574480275665_115552_01_000004/__app__.jar
--user-class-path file:/mnt/resource/hadoop/yarn/local/usercache/spark/appcache/application_1574480275665_115552/container_e12_1574480275665_115552_01_000004/postgresql-xxx.jar
--user-class-path file:/mnt/resource/hadoop/yarn/local/usercache/spark/appcache/application_1574480275665_115552/container_e12_1574480275665_115552_01_000004/spark-streaming-kafka-xxx.jar
......
Ambari
org.apache.ambari.server.controller.AmbariServer
ZooKeeper
org.apache.zookeeper.server.quorum.QuorumPeerMain conf/zoo.cfg
Kafka
kafka.Kafka config/server.properties
HBase
org.apache.hadoop.hbase.master.HMaster start
org.apache.hadoop.hbase.regionserver.HRegionServer start
org.apache.phoenix.queryserver.server.Main