2022-05-12 15:39:46,456 ERROR --- [ main] org.apache.hadoop.hdfs.KeyProviderCache (line: 87) : Could not find uri with key [dfs.encryption.key.provider.uri] to create a keyProvider !! 2022-05-12 16:24:18,594 ERROR --- [ main] org.apache.spark.SparkContext (line: 94) : Error initializing SparkContext. org.apache.spark.SparkException: A master URL must be set in your configuration at org.apache.spark.SparkContext.(SparkContext.scala:380) at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2555) at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$1(SparkSession.scala:930) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:921) at com.atguigu.userprofile.app.TaskSQLApp$.main(TaskSQLApp.scala:36) at com.atguigu.userprofile.app.TaskSQLApp.main(TaskSQLApp.scala) 2022-05-13 10:18:35,221 ERROR --- [ main] org.apache.spark.SparkContext (line: 94) : Error initializing SparkContext. org.apache.spark.SparkException: A master URL must be set in your configuration at org.apache.spark.SparkContext.(SparkContext.scala:380) at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2555) at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$1(SparkSession.scala:930) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:921) at com.atguigu.userprofile.app.TaskSQLApp$.main(TaskSQLApp.scala:36) at com.atguigu.userprofile.app.TaskSQLApp.main(TaskSQLApp.scala) 2022-05-13 14:19:25,658 ERROR --- [ main] org.apache.spark.SparkContext (line: 94) : Error initializing SparkContext. org.apache.spark.SparkException: A master URL must be set in your configuration at org.apache.spark.SparkContext.(SparkContext.scala:380) at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2555) at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$1(SparkSession.scala:930) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:921) at com.atguigu.userprofile.app.TaskSQLApp$.main(TaskSQLApp.scala:36) at com.atguigu.userprofile.app.TaskSQLApp.main(TaskSQLApp.scala) 2022-05-13 15:18:17,035 ERROR --- [ main] org.apache.spark.sql.execution.datasources.FileFormatWriter (line: 94) : Aborting job 85152cbf-ae0d-42f0-b113-d26bdfbaae7f. org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: ObjectHashAggregate(keys=[uid#9], functions=[collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, tg_base_persona_gender#17, tg_base_persona_agegroup#19]) +- Exchange hashpartitioning(uid#9, 200), true, [id=#46] +- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26]) +- Union :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10] : +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:52:55 CST 2022 Last Access: UNKNOWN Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)] +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13] +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:57:29 CST 2022 Last Access: UNKNOWN Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:172) at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile(SaveAsHiveFile.scala:97) at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile$(SaveAsHiveFile.scala:48) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.saveAsHiveFile(InsertIntoHiveTable.scala:68) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:208) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:101) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106) at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:120) at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3616) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3614) at org.apache.spark.sql.Dataset.(Dataset.scala:229) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:606) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:601) at com.atguigu.userprofile.app.TaskMergeApp$.main(TaskMergeApp.scala:107) at com.atguigu.userprofile.app.TaskMergeApp.main(TaskMergeApp.scala) Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange hashpartitioning(uid#9, 200), true, [id=#46] +- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26]) +- Union :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10] : +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:52:55 CST 2022 Last Access: UNKNOWN Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)] +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13] +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:57:29 CST 2022 Last Access: UNKNOWN Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:95) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 32 more Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26]) +- Union :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10] : +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:52:55 CST 2022 Last Access: UNKNOWN Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)] +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13] +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:57:29 CST 2022 Last Access: UNKNOWN Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:64) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:64) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:83) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:81) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:98) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 40 more Caused by: java.lang.RuntimeException: Error in configuring object at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112) at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78) at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136) at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:191) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1(UnionRDD.scala:85) at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1$adapted(UnionRDD.scala:85) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:273) at scala.collection.immutable.List.foreach(List.scala:392) at scala.collection.TraversableLike.map(TraversableLike.scala:273) at scala.collection.TraversableLike.map$(TraversableLike.scala:266) at scala.collection.immutable.List.map(List.scala:298) at org.apache.spark.rdd.UnionRDD.getPartitions(UnionRDD.scala:85) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.SparkContext.$anonfun$union$2(SparkContext.scala:1369) at org.apache.spark.SparkContext.$anonfun$union$2$adapted(SparkContext.scala:1369) at scala.collection.TraversableLike.noneIn$1(TraversableLike.scala:306) at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:372) at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:284) at scala.collection.AbstractTraversable.filterImpl(Traversable.scala:108) at scala.collection.TraversableLike.filter(TraversableLike.scala:382) at scala.collection.TraversableLike.filter$(TraversableLike.scala:382) at scala.collection.AbstractTraversable.filter(Traversable.scala:108) at org.apache.spark.SparkContext.$anonfun$union$1(SparkContext.scala:1369) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.SparkContext.withScope(SparkContext.scala:751) at org.apache.spark.SparkContext.union(SparkContext.scala:1368) at org.apache.spark.sql.execution.UnionExec.doExecute(basicPhysicalOperators.scala:644) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 52 more Caused by: java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109) ... 108 more Caused by: java.lang.IllegalArgumentException: Compression codec com.hadoop.compression.lzo.LzoCodec not found. at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:139) at org.apache.hadoop.io.compress.CompressionCodecFactory.(CompressionCodecFactory.java:180) at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45) ... 113 more Caused by: java.lang.ClassNotFoundException: Class com.hadoop.compression.lzo.LzoCodec not found at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101) at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132) ... 115 more 2022-05-13 15:24:35,126 ERROR --- [ main] org.apache.spark.sql.execution.datasources.FileFormatWriter (line: 94) : Aborting job 8e596ddc-afde-40a2-a502-bd69209a43c2. org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: ObjectHashAggregate(keys=[uid#9], functions=[collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, tg_base_persona_gender#17, tg_base_persona_agegroup#19]) +- Exchange hashpartitioning(uid#9, 200), true, [id=#46] +- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26]) +- Union :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10] : +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:52:55 CST 2022 Last Access: UNKNOWN Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)] +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13] +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:57:29 CST 2022 Last Access: UNKNOWN Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:172) at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile(SaveAsHiveFile.scala:97) at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile$(SaveAsHiveFile.scala:48) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.saveAsHiveFile(InsertIntoHiveTable.scala:68) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:208) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:101) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106) at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:120) at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3616) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3614) at org.apache.spark.sql.Dataset.(Dataset.scala:229) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:606) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:601) at com.atguigu.userprofile.app.TaskMergeApp$.main(TaskMergeApp.scala:109) at com.atguigu.userprofile.app.TaskMergeApp.main(TaskMergeApp.scala) Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange hashpartitioning(uid#9, 200), true, [id=#46] +- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26]) +- Union :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10] : +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:52:55 CST 2022 Last Access: UNKNOWN Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)] +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13] +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:57:29 CST 2022 Last Access: UNKNOWN Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:95) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 32 more Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26]) +- Union :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10] : +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:52:55 CST 2022 Last Access: UNKNOWN Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)] +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13] +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:57:29 CST 2022 Last Access: UNKNOWN Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:64) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:64) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:83) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:81) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:98) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 40 more Caused by: java.lang.RuntimeException: Error in configuring object at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112) at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78) at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136) at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:191) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1(UnionRDD.scala:85) at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1$adapted(UnionRDD.scala:85) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:273) at scala.collection.immutable.List.foreach(List.scala:392) at scala.collection.TraversableLike.map(TraversableLike.scala:273) at scala.collection.TraversableLike.map$(TraversableLike.scala:266) at scala.collection.immutable.List.map(List.scala:298) at org.apache.spark.rdd.UnionRDD.getPartitions(UnionRDD.scala:85) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.SparkContext.$anonfun$union$2(SparkContext.scala:1369) at org.apache.spark.SparkContext.$anonfun$union$2$adapted(SparkContext.scala:1369) at scala.collection.TraversableLike.noneIn$1(TraversableLike.scala:306) at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:372) at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:284) at scala.collection.AbstractTraversable.filterImpl(Traversable.scala:108) at scala.collection.TraversableLike.filter(TraversableLike.scala:382) at scala.collection.TraversableLike.filter$(TraversableLike.scala:382) at scala.collection.AbstractTraversable.filter(Traversable.scala:108) at org.apache.spark.SparkContext.$anonfun$union$1(SparkContext.scala:1369) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.SparkContext.withScope(SparkContext.scala:751) at org.apache.spark.SparkContext.union(SparkContext.scala:1368) at org.apache.spark.sql.execution.UnionExec.doExecute(basicPhysicalOperators.scala:644) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 52 more Caused by: java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109) ... 108 more Caused by: java.lang.IllegalArgumentException: Compression codec com.hadoop.compression.lzo.LzoCodec not found. at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:139) at org.apache.hadoop.io.compress.CompressionCodecFactory.(CompressionCodecFactory.java:180) at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45) ... 113 more Caused by: java.lang.ClassNotFoundException: Class com.hadoop.compression.lzo.LzoCodec not found at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101) at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132) ... 115 more 2022-05-13 15:27:07,187 ERROR --- [ main] org.apache.spark.sql.execution.datasources.FileFormatWriter (line: 94) : Aborting job e032d846-e5b1-4539-875b-e2c44428931d. org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: ObjectHashAggregate(keys=[uid#9], functions=[collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, tg_base_persona_gender#17, tg_base_persona_agegroup#19]) +- Exchange hashpartitioning(uid#9, 200), true, [id=#46] +- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26]) +- Union :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10] : +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:52:55 CST 2022 Last Access: UNKNOWN Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)] +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13] +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:57:29 CST 2022 Last Access: UNKNOWN Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:172) at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile(SaveAsHiveFile.scala:97) at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile$(SaveAsHiveFile.scala:48) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.saveAsHiveFile(InsertIntoHiveTable.scala:68) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:208) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:101) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108) at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106) at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:120) at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3616) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3614) at org.apache.spark.sql.Dataset.(Dataset.scala:229) at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97) at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:606) at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:601) at com.atguigu.userprofile.app.TaskMergeApp$.main(TaskMergeApp.scala:109) at com.atguigu.userprofile.app.TaskMergeApp.main(TaskMergeApp.scala) Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange hashpartitioning(uid#9, 200), true, [id=#46] +- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26]) +- Union :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10] : +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:52:55 CST 2022 Last Access: UNKNOWN Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)] +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13] +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:57:29 CST 2022 Last Access: UNKNOWN Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:95) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 32 more Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26]) +- Union :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10] : +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:52:55 CST 2022 Last Access: UNKNOWN Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)] +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13] +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition( Partition Values: [dt=2020-06-14] Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14 Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe InputFormat: org.apache.hadoop.mapred.TextInputFormat OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Storage Properties: [serialization.format= , field.delim= ] Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0} Created Time: Fri May 13 14:57:29 CST 2022 Last Access: UNKNOWN Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:64) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:64) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:83) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:81) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:98) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 40 more Caused by: java.lang.RuntimeException: Error in configuring object at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112) at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78) at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136) at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:191) at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1(UnionRDD.scala:85) at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1$adapted(UnionRDD.scala:85) at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:273) at scala.collection.immutable.List.foreach(List.scala:392) at scala.collection.TraversableLike.map(TraversableLike.scala:273) at scala.collection.TraversableLike.map$(TraversableLike.scala:266) at scala.collection.immutable.List.map(List.scala:298) at org.apache.spark.rdd.UnionRDD.getPartitions(UnionRDD.scala:85) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276) at scala.Option.getOrElse(Option.scala:189) at org.apache.spark.rdd.RDD.partitions(RDD.scala:272) at org.apache.spark.SparkContext.$anonfun$union$2(SparkContext.scala:1369) at org.apache.spark.SparkContext.$anonfun$union$2$adapted(SparkContext.scala:1369) at scala.collection.TraversableLike.noneIn$1(TraversableLike.scala:306) at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:372) at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:284) at scala.collection.AbstractTraversable.filterImpl(Traversable.scala:108) at scala.collection.TraversableLike.filter(TraversableLike.scala:382) at scala.collection.TraversableLike.filter$(TraversableLike.scala:382) at scala.collection.AbstractTraversable.filter(Traversable.scala:108) at org.apache.spark.SparkContext.$anonfun$union$1(SparkContext.scala:1369) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.SparkContext.withScope(SparkContext.scala:751) at org.apache.spark.SparkContext.union(SparkContext.scala:1368) at org.apache.spark.sql.execution.UnionExec.doExecute(basicPhysicalOperators.scala:644) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171) at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 52 more Caused by: java.lang.reflect.InvocationTargetException at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109) ... 108 more Caused by: java.lang.IllegalArgumentException: Compression codec com.hadoop.compression.lzo.LzoCodec not found. at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:139) at org.apache.hadoop.io.compress.CompressionCodecFactory.(CompressionCodecFactory.java:180) at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45) ... 113 more Caused by: java.lang.ClassNotFoundException: Class com.hadoop.compression.lzo.LzoCodec not found at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101) at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132) ... 115 more 2022-05-13 15:34:00,710 ERROR --- [ main] org.apache.hadoop.hdfs.KeyProviderCache (line: 87) : Could not find uri with key [dfs.encryption.key.provider.uri] to create a keyProvider !!