self_example/Big_data_example/log/agent.log

690 lines
57 KiB
Plaintext

2022-05-12 15:39:46,456 ERROR --- [ main] org.apache.hadoop.hdfs.KeyProviderCache (line: 87) : Could not find uri with key [dfs.encryption.key.provider.uri] to create a keyProvider !!
2022-05-12 16:24:18,594 ERROR --- [ main] org.apache.spark.SparkContext (line: 94) : Error initializing SparkContext.
org.apache.spark.SparkException: A master URL must be set in your configuration
at org.apache.spark.SparkContext.<init>(SparkContext.scala:380)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2555)
at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$1(SparkSession.scala:930)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:921)
at com.atguigu.userprofile.app.TaskSQLApp$.main(TaskSQLApp.scala:36)
at com.atguigu.userprofile.app.TaskSQLApp.main(TaskSQLApp.scala)
2022-05-13 10:18:35,221 ERROR --- [ main] org.apache.spark.SparkContext (line: 94) : Error initializing SparkContext.
org.apache.spark.SparkException: A master URL must be set in your configuration
at org.apache.spark.SparkContext.<init>(SparkContext.scala:380)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2555)
at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$1(SparkSession.scala:930)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:921)
at com.atguigu.userprofile.app.TaskSQLApp$.main(TaskSQLApp.scala:36)
at com.atguigu.userprofile.app.TaskSQLApp.main(TaskSQLApp.scala)
2022-05-13 14:19:25,658 ERROR --- [ main] org.apache.spark.SparkContext (line: 94) : Error initializing SparkContext.
org.apache.spark.SparkException: A master URL must be set in your configuration
at org.apache.spark.SparkContext.<init>(SparkContext.scala:380)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2555)
at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$1(SparkSession.scala:930)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:921)
at com.atguigu.userprofile.app.TaskSQLApp$.main(TaskSQLApp.scala:36)
at com.atguigu.userprofile.app.TaskSQLApp.main(TaskSQLApp.scala)
2022-05-13 15:18:17,035 ERROR --- [ main] org.apache.spark.sql.execution.datasources.FileFormatWriter (line: 94) : Aborting job 85152cbf-ae0d-42f0-b113-d26bdfbaae7f.
org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
ObjectHashAggregate(keys=[uid#9], functions=[collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, tg_base_persona_gender#17, tg_base_persona_agegroup#19])
+- Exchange hashpartitioning(uid#9, 200), true, [id=#46]
+- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
+- Union
:- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
: +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:52:55 CST 2022
Last Access: UNKNOWN
Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
+- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
+- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:57:29 CST 2022
Last Access: UNKNOWN
Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:172)
at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile(SaveAsHiveFile.scala:97)
at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile$(SaveAsHiveFile.scala:48)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.saveAsHiveFile(InsertIntoHiveTable.scala:68)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:208)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:101)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:120)
at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3616)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3614)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:606)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:601)
at com.atguigu.userprofile.app.TaskMergeApp$.main(TaskMergeApp.scala:107)
at com.atguigu.userprofile.app.TaskMergeApp.main(TaskMergeApp.scala)
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Exchange hashpartitioning(uid#9, 200), true, [id=#46]
+- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
+- Union
:- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
: +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:52:55 CST 2022
Last Access: UNKNOWN
Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
+- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
+- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:57:29 CST 2022
Last Access: UNKNOWN
Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:95)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
... 32 more
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
+- Union
:- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
: +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:52:55 CST 2022
Last Access: UNKNOWN
Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
+- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
+- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:57:29 CST 2022
Last Access: UNKNOWN
Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:64)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:64)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:83)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:81)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:98)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
... 40 more
Caused by: java.lang.RuntimeException: Error in configuring object
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:191)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1(UnionRDD.scala:85)
at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1$adapted(UnionRDD.scala:85)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:273)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike.map(TraversableLike.scala:273)
at scala.collection.TraversableLike.map$(TraversableLike.scala:266)
at scala.collection.immutable.List.map(List.scala:298)
at org.apache.spark.rdd.UnionRDD.getPartitions(UnionRDD.scala:85)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.SparkContext.$anonfun$union$2(SparkContext.scala:1369)
at org.apache.spark.SparkContext.$anonfun$union$2$adapted(SparkContext.scala:1369)
at scala.collection.TraversableLike.noneIn$1(TraversableLike.scala:306)
at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:372)
at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:284)
at scala.collection.AbstractTraversable.filterImpl(Traversable.scala:108)
at scala.collection.TraversableLike.filter(TraversableLike.scala:382)
at scala.collection.TraversableLike.filter$(TraversableLike.scala:382)
at scala.collection.AbstractTraversable.filter(Traversable.scala:108)
at org.apache.spark.SparkContext.$anonfun$union$1(SparkContext.scala:1369)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:751)
at org.apache.spark.SparkContext.union(SparkContext.scala:1368)
at org.apache.spark.sql.execution.UnionExec.doExecute(basicPhysicalOperators.scala:644)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
... 52 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109)
... 108 more
Caused by: java.lang.IllegalArgumentException: Compression codec com.hadoop.compression.lzo.LzoCodec not found.
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:139)
at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:180)
at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45)
... 113 more
Caused by: java.lang.ClassNotFoundException: Class com.hadoop.compression.lzo.LzoCodec not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132)
... 115 more
2022-05-13 15:24:35,126 ERROR --- [ main] org.apache.spark.sql.execution.datasources.FileFormatWriter (line: 94) : Aborting job 8e596ddc-afde-40a2-a502-bd69209a43c2.
org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
ObjectHashAggregate(keys=[uid#9], functions=[collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, tg_base_persona_gender#17, tg_base_persona_agegroup#19])
+- Exchange hashpartitioning(uid#9, 200), true, [id=#46]
+- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
+- Union
:- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
: +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:52:55 CST 2022
Last Access: UNKNOWN
Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
+- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
+- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:57:29 CST 2022
Last Access: UNKNOWN
Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:172)
at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile(SaveAsHiveFile.scala:97)
at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile$(SaveAsHiveFile.scala:48)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.saveAsHiveFile(InsertIntoHiveTable.scala:68)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:208)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:101)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:120)
at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3616)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3614)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:606)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:601)
at com.atguigu.userprofile.app.TaskMergeApp$.main(TaskMergeApp.scala:109)
at com.atguigu.userprofile.app.TaskMergeApp.main(TaskMergeApp.scala)
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Exchange hashpartitioning(uid#9, 200), true, [id=#46]
+- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
+- Union
:- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
: +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:52:55 CST 2022
Last Access: UNKNOWN
Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
+- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
+- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:57:29 CST 2022
Last Access: UNKNOWN
Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:95)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
... 32 more
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
+- Union
:- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
: +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:52:55 CST 2022
Last Access: UNKNOWN
Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
+- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
+- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:57:29 CST 2022
Last Access: UNKNOWN
Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:64)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:64)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:83)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:81)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:98)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
... 40 more
Caused by: java.lang.RuntimeException: Error in configuring object
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:191)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1(UnionRDD.scala:85)
at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1$adapted(UnionRDD.scala:85)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:273)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike.map(TraversableLike.scala:273)
at scala.collection.TraversableLike.map$(TraversableLike.scala:266)
at scala.collection.immutable.List.map(List.scala:298)
at org.apache.spark.rdd.UnionRDD.getPartitions(UnionRDD.scala:85)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.SparkContext.$anonfun$union$2(SparkContext.scala:1369)
at org.apache.spark.SparkContext.$anonfun$union$2$adapted(SparkContext.scala:1369)
at scala.collection.TraversableLike.noneIn$1(TraversableLike.scala:306)
at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:372)
at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:284)
at scala.collection.AbstractTraversable.filterImpl(Traversable.scala:108)
at scala.collection.TraversableLike.filter(TraversableLike.scala:382)
at scala.collection.TraversableLike.filter$(TraversableLike.scala:382)
at scala.collection.AbstractTraversable.filter(Traversable.scala:108)
at org.apache.spark.SparkContext.$anonfun$union$1(SparkContext.scala:1369)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:751)
at org.apache.spark.SparkContext.union(SparkContext.scala:1368)
at org.apache.spark.sql.execution.UnionExec.doExecute(basicPhysicalOperators.scala:644)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
... 52 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109)
... 108 more
Caused by: java.lang.IllegalArgumentException: Compression codec com.hadoop.compression.lzo.LzoCodec not found.
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:139)
at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:180)
at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45)
... 113 more
Caused by: java.lang.ClassNotFoundException: Class com.hadoop.compression.lzo.LzoCodec not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132)
... 115 more
2022-05-13 15:27:07,187 ERROR --- [ main] org.apache.spark.sql.execution.datasources.FileFormatWriter (line: 94) : Aborting job e032d846-e5b1-4539-875b-e2c44428931d.
org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
ObjectHashAggregate(keys=[uid#9], functions=[collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, tg_base_persona_gender#17, tg_base_persona_agegroup#19])
+- Exchange hashpartitioning(uid#9, 200), true, [id=#46]
+- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
+- Union
:- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
: +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:52:55 CST 2022
Last Access: UNKNOWN
Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
+- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
+- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:57:29 CST 2022
Last Access: UNKNOWN
Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:172)
at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile(SaveAsHiveFile.scala:97)
at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile$(SaveAsHiveFile.scala:48)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.saveAsHiveFile(InsertIntoHiveTable.scala:68)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:208)
at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:101)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:120)
at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3616)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3614)
at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229)
at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:606)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:601)
at com.atguigu.userprofile.app.TaskMergeApp$.main(TaskMergeApp.scala:109)
at com.atguigu.userprofile.app.TaskMergeApp.main(TaskMergeApp.scala)
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Exchange hashpartitioning(uid#9, 200), true, [id=#46]
+- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
+- Union
:- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
: +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:52:55 CST 2022
Last Access: UNKNOWN
Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
+- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
+- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:57:29 CST 2022
Last Access: UNKNOWN
Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:95)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
... 32 more
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
+- Union
:- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
: +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:52:55 CST 2022
Last Access: UNKNOWN
Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
+- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
+- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
Partition Values: [dt=2020-06-14]
Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
InputFormat: org.apache.hadoop.mapred.TextInputFormat
OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
Storage Properties: [serialization.format= , field.delim= ]
Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
Created Time: Fri May 13 14:57:29 CST 2022
Last Access: UNKNOWN
Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:64)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:64)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:83)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:81)
at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:98)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
... 40 more
Caused by: java.lang.RuntimeException: Error in configuring object
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112)
at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78)
at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:191)
at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1(UnionRDD.scala:85)
at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1$adapted(UnionRDD.scala:85)
at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:273)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike.map(TraversableLike.scala:273)
at scala.collection.TraversableLike.map$(TraversableLike.scala:266)
at scala.collection.immutable.List.map(List.scala:298)
at org.apache.spark.rdd.UnionRDD.getPartitions(UnionRDD.scala:85)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
at org.apache.spark.SparkContext.$anonfun$union$2(SparkContext.scala:1369)
at org.apache.spark.SparkContext.$anonfun$union$2$adapted(SparkContext.scala:1369)
at scala.collection.TraversableLike.noneIn$1(TraversableLike.scala:306)
at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:372)
at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:284)
at scala.collection.AbstractTraversable.filterImpl(Traversable.scala:108)
at scala.collection.TraversableLike.filter(TraversableLike.scala:382)
at scala.collection.TraversableLike.filter$(TraversableLike.scala:382)
at scala.collection.AbstractTraversable.filter(Traversable.scala:108)
at org.apache.spark.SparkContext.$anonfun$union$1(SparkContext.scala:1369)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.SparkContext.withScope(SparkContext.scala:751)
at org.apache.spark.SparkContext.union(SparkContext.scala:1368)
at org.apache.spark.sql.execution.UnionExec.doExecute(basicPhysicalOperators.scala:644)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107)
at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
... 52 more
Caused by: java.lang.reflect.InvocationTargetException
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109)
... 108 more
Caused by: java.lang.IllegalArgumentException: Compression codec com.hadoop.compression.lzo.LzoCodec not found.
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:139)
at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:180)
at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45)
... 113 more
Caused by: java.lang.ClassNotFoundException: Class com.hadoop.compression.lzo.LzoCodec not found
at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)
at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132)
... 115 more
2022-05-13 15:34:00,710 ERROR --- [ main] org.apache.hadoop.hdfs.KeyProviderCache (line: 87) : Could not find uri with key [dfs.encryption.key.provider.uri] to create a keyProvider !!