spark
将结果存回hdfs
时,如果使用9000
端口,会报Protocol message end-group tag did not match expected tag
,需要将端口就改为8020
异常原文如下:
Showing 4096 bytes of 7480 total. Click here for the full log.
n.QueryExecution.toRdd(QueryExecution.scala:56)
at org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:256)
at org.apache.spark.sql.DataFrameWriter.dataSource$lzycompute$1(DataFrameWriter.scala:181)
at org.apache.spark.sql.DataFrameWriter.org$apache$spark$sql$DataFrameWriter$$dataSource$1(DataFrameWriter.scala:181)
at org.apache.spark.sql.DataFrameWriter$$anonfun$save$1.apply$mcV$sp(DataFrameWriter.scala:188)
at org.apache.spark.sql.DataFrameWriter.executeAndCallQEListener(DataFrameWriter.scala:154)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:188)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:172)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
Caused by: java.io.IOException: com.google.protobuf.InvalidProtocolBufferException: Protocol message end-group tag did not match expected tag.
at org.apache.hadoop.ipc.Client$Connection$1.run(Client.java:718)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
at org.apache.hadoop.ipc.Client$Connection.handleSaslConnectionFailure(Client.java:681)
at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:769)
at org.apache.hadoop.ipc.Client$Connection.access$3000(Client.java:396)
at org.apache.hadoop.ipc.Client.getConnection(Client.java:1557)
at org.apache.hadoop.ipc.Client.call(Client.java:1480)
... 45 more
Caused by: com.google.protobuf.InvalidProtocolBufferException: Protocol message end-group tag did not match expected tag.
at com.google.protobuf.InvalidProtocolBufferException.invalidEndTag(InvalidProtocolBufferException.java:94)
at com.google.protobuf.CodedInputStream.checkLastTagWas(CodedInputStream.java:124)
at com.google.protobuf.AbstractParser.parsePartialFrom(AbstractParser.java:143)
at com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:176)
at com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:188)
at com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:193)
at com.google.protobuf.AbstractParser.parseFrom(AbstractParser.java:49)
at org.apache.hadoop.ipc.protobuf.RpcHeaderProtos$RpcResponseHeaderProto.parseFrom(RpcHeaderProtos.java:3147)
at org.apache.hadoop.ipc.ProtobufRpcEngine$RpcResponseMessageWrapper.parseHeaderFrom(ProtobufRpcEngine.java:443)
at org.apache.hadoop.ipc.ProtobufRpcEngine$RpcResponseMessageWrapper.parseHeaderFrom(ProtobufRpcEngine.java:419)
at org.apache.hadoop.ipc.ProtobufRpcEngine$RpcMessageWithHeader.readFields(ProtobufRpcEngine.java:339)
at org.apache.hadoop.ipc.ProtobufRpcEngine$RpcResponseMessageWrapper.readFields(ProtobufRpcEngine.java:419)
at org.apache.hadoop.security.SaslRpcClient.saslConnect(SaslRpcClient.java:373)
at org.apache.hadoop.ipc.Client$Connection.setupSaslConnection(Client.java:594)
at org.apache.hadoop.ipc.Client$Connection.access$2000(Client.java:396)
at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:761)
at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:757)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1924)
at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:756)
... 48 more
原代码:
read_df.write.format("json").save("hdfs://s0:9000/input/myjson.json")
修改为:
read_df.write.format("json").save("hdfs://s0:8020/input/myjson.json")