self_example/Big_data_example/log/agent.log

2022-05-12 15:39:46,456  ERROR --- [                                              main]  org.apache.hadoop.hdfs.KeyProviderCache                                         (line:    87)  :  Could not find uri with key [dfs.encryption.key.provider.uri] to create a keyProvider !!
2022-05-12 16:24:18,594  ERROR --- [                                              main]  org.apache.spark.SparkContext                                                   (line:    94)  :  Error initializing SparkContext.
org.apache.spark.SparkException: A master URL must be set in your configuration
	at org.apache.spark.SparkContext.<init>(SparkContext.scala:380)
	at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2555)
	at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$1(SparkSession.scala:930)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:921)
	at com.atguigu.userprofile.app.TaskSQLApp$.main(TaskSQLApp.scala:36)
	at com.atguigu.userprofile.app.TaskSQLApp.main(TaskSQLApp.scala)
2022-05-13 10:18:35,221  ERROR --- [                                              main]  org.apache.spark.SparkContext                                                   (line:    94)  :  Error initializing SparkContext.
org.apache.spark.SparkException: A master URL must be set in your configuration
	at org.apache.spark.SparkContext.<init>(SparkContext.scala:380)
	at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2555)
	at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$1(SparkSession.scala:930)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:921)
	at com.atguigu.userprofile.app.TaskSQLApp$.main(TaskSQLApp.scala:36)
	at com.atguigu.userprofile.app.TaskSQLApp.main(TaskSQLApp.scala)
2022-05-13 14:19:25,658  ERROR --- [                                              main]  org.apache.spark.SparkContext                                                   (line:    94)  :  Error initializing SparkContext.
org.apache.spark.SparkException: A master URL must be set in your configuration
	at org.apache.spark.SparkContext.<init>(SparkContext.scala:380)
	at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2555)
	at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$1(SparkSession.scala:930)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:921)
	at com.atguigu.userprofile.app.TaskSQLApp$.main(TaskSQLApp.scala:36)
	at com.atguigu.userprofile.app.TaskSQLApp.main(TaskSQLApp.scala)
2022-05-13 15:18:17,035  ERROR --- [                                              main]  org.apache.spark.sql.execution.datasources.FileFormatWriter                     (line:    94)  :  Aborting job 85152cbf-ae0d-42f0-b113-d26bdfbaae7f.
org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
ObjectHashAggregate(keys=[uid#9], functions=[collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, tg_base_persona_gender#17, tg_base_persona_agegroup#19])
+- Exchange hashpartitioning(uid#9, 200), true, [id=#46]
   +- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
      +- Union
         :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
         :  +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:52:55 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
         +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
            +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:57:29 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
	at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:172)
	at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile(SaveAsHiveFile.scala:97)
	at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile$(SaveAsHiveFile.scala:48)
	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.saveAsHiveFile(InsertIntoHiveTable.scala:68)
	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:208)
	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:101)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:120)
	at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
	at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3616)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3614)
	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229)
	at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
	at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:606)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:601)
	at com.atguigu.userprofile.app.TaskMergeApp$.main(TaskMergeApp.scala:107)
	at com.atguigu.userprofile.app.TaskMergeApp.main(TaskMergeApp.scala)
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Exchange hashpartitioning(uid#9, 200), true, [id=#46]
+- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
   +- Union
      :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
      :  +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:52:55 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
      +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
         +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:57:29 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:95)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
	at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
	... 32 more
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
+- Union
   :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
   :  +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:52:55 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
   +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
      +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:57:29 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
	at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:64)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:64)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:83)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:81)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:98)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
	... 40 more
Caused by: java.lang.RuntimeException: Error in configuring object
	at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112)
	at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78)
	at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
	at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:191)
	at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1(UnionRDD.scala:85)
	at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1$adapted(UnionRDD.scala:85)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:273)
	at scala.collection.immutable.List.foreach(List.scala:392)
	at scala.collection.TraversableLike.map(TraversableLike.scala:273)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:266)
	at scala.collection.immutable.List.map(List.scala:298)
	at org.apache.spark.rdd.UnionRDD.getPartitions(UnionRDD.scala:85)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.SparkContext.$anonfun$union$2(SparkContext.scala:1369)
	at org.apache.spark.SparkContext.$anonfun$union$2$adapted(SparkContext.scala:1369)
	at scala.collection.TraversableLike.noneIn$1(TraversableLike.scala:306)
	at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:372)
	at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:284)
	at scala.collection.AbstractTraversable.filterImpl(Traversable.scala:108)
	at scala.collection.TraversableLike.filter(TraversableLike.scala:382)
	at scala.collection.TraversableLike.filter$(TraversableLike.scala:382)
	at scala.collection.AbstractTraversable.filter(Traversable.scala:108)
	at org.apache.spark.SparkContext.$anonfun$union$1(SparkContext.scala:1369)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.SparkContext.withScope(SparkContext.scala:751)
	at org.apache.spark.SparkContext.union(SparkContext.scala:1368)
	at org.apache.spark.sql.execution.UnionExec.doExecute(basicPhysicalOperators.scala:644)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
	at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
	... 52 more
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109)
	... 108 more
Caused by: java.lang.IllegalArgumentException: Compression codec com.hadoop.compression.lzo.LzoCodec not found.
	at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:139)
	at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:180)
	at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45)
	... 113 more
Caused by: java.lang.ClassNotFoundException: Class com.hadoop.compression.lzo.LzoCodec not found
	at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)
	at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132)
	... 115 more
2022-05-13 15:24:35,126  ERROR --- [                                              main]  org.apache.spark.sql.execution.datasources.FileFormatWriter                     (line:    94)  :  Aborting job 8e596ddc-afde-40a2-a502-bd69209a43c2.
org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
ObjectHashAggregate(keys=[uid#9], functions=[collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, tg_base_persona_gender#17, tg_base_persona_agegroup#19])
+- Exchange hashpartitioning(uid#9, 200), true, [id=#46]
   +- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
      +- Union
         :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
         :  +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:52:55 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
         +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
            +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:57:29 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
	at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:172)
	at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile(SaveAsHiveFile.scala:97)
	at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile$(SaveAsHiveFile.scala:48)
	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.saveAsHiveFile(InsertIntoHiveTable.scala:68)
	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:208)
	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:101)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:120)
	at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
	at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3616)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3614)
	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229)
	at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
	at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:606)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:601)
	at com.atguigu.userprofile.app.TaskMergeApp$.main(TaskMergeApp.scala:109)
	at com.atguigu.userprofile.app.TaskMergeApp.main(TaskMergeApp.scala)
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Exchange hashpartitioning(uid#9, 200), true, [id=#46]
+- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
   +- Union
      :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
      :  +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:52:55 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
      +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
         +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:57:29 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:95)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
	at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
	... 32 more
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
+- Union
   :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
   :  +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:52:55 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
   +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
      +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:57:29 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
	at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:64)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:64)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:83)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:81)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:98)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
	... 40 more
Caused by: java.lang.RuntimeException: Error in configuring object
	at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112)
	at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78)
	at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
	at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:191)
	at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1(UnionRDD.scala:85)
	at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1$adapted(UnionRDD.scala:85)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:273)
	at scala.collection.immutable.List.foreach(List.scala:392)
	at scala.collection.TraversableLike.map(TraversableLike.scala:273)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:266)
	at scala.collection.immutable.List.map(List.scala:298)
	at org.apache.spark.rdd.UnionRDD.getPartitions(UnionRDD.scala:85)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.SparkContext.$anonfun$union$2(SparkContext.scala:1369)
	at org.apache.spark.SparkContext.$anonfun$union$2$adapted(SparkContext.scala:1369)
	at scala.collection.TraversableLike.noneIn$1(TraversableLike.scala:306)
	at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:372)
	at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:284)
	at scala.collection.AbstractTraversable.filterImpl(Traversable.scala:108)
	at scala.collection.TraversableLike.filter(TraversableLike.scala:382)
	at scala.collection.TraversableLike.filter$(TraversableLike.scala:382)
	at scala.collection.AbstractTraversable.filter(Traversable.scala:108)
	at org.apache.spark.SparkContext.$anonfun$union$1(SparkContext.scala:1369)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.SparkContext.withScope(SparkContext.scala:751)
	at org.apache.spark.SparkContext.union(SparkContext.scala:1368)
	at org.apache.spark.sql.execution.UnionExec.doExecute(basicPhysicalOperators.scala:644)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
	at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
	... 52 more
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109)
	... 108 more
Caused by: java.lang.IllegalArgumentException: Compression codec com.hadoop.compression.lzo.LzoCodec not found.
	at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:139)
	at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:180)
	at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45)
	... 113 more
Caused by: java.lang.ClassNotFoundException: Class com.hadoop.compression.lzo.LzoCodec not found
	at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)
	at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132)
	... 115 more
2022-05-13 15:27:07,187  ERROR --- [                                              main]  org.apache.spark.sql.execution.datasources.FileFormatWriter                     (line:    94)  :  Aborting job e032d846-e5b1-4539-875b-e2c44428931d.
org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
ObjectHashAggregate(keys=[uid#9], functions=[collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, tg_base_persona_gender#17, tg_base_persona_agegroup#19])
+- Exchange hashpartitioning(uid#9, 200), true, [id=#46]
   +- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
      +- Union
         :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
         :  +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:52:55 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
         +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
            +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:57:29 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
	at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
	at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:172)
	at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile(SaveAsHiveFile.scala:97)
	at org.apache.spark.sql.hive.execution.SaveAsHiveFile.saveAsHiveFile$(SaveAsHiveFile.scala:48)
	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.saveAsHiveFile(InsertIntoHiveTable.scala:68)
	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.processInsert(InsertIntoHiveTable.scala:208)
	at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.run(InsertIntoHiveTable.scala:101)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:108)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:106)
	at org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:120)
	at org.apache.spark.sql.Dataset.$anonfun$logicalPlan$1(Dataset.scala:229)
	at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3616)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:100)
	at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160)
	at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
	at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
	at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3614)
	at org.apache.spark.sql.Dataset.<init>(Dataset.scala:229)
	at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:100)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
	at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97)
	at org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:606)
	at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763)
	at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:601)
	at com.atguigu.userprofile.app.TaskMergeApp$.main(TaskMergeApp.scala:109)
	at com.atguigu.userprofile.app.TaskMergeApp.main(TaskMergeApp.scala)
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
Exchange hashpartitioning(uid#9, 200), true, [id=#46]
+- ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
   +- Union
      :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
      :  +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:52:55 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
      +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
         +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:57:29 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:95)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
	at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
	... 32 more
Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
ObjectHashAggregate(keys=[uid#9], functions=[partial_collect_list(if ((tag_code#6 <=> tg_base_persona_gender)) tag_value#10 else null, 0, 0), partial_collect_list(if ((tag_code#6 <=> tg_base_persona_agegroup)) tag_value#10 else null, 0, 0)], output=[uid#9, buf#25, buf#26])
+- Union
   :- *(1) Project [uid#9, tg_base_persona_gender AS tag_code#6, tag_value#10]
   :  +- Scan hive user_profile0224.tg_base_persona_gender [tag_value#10, uid#9], HiveTableRelation `user_profile0224`.`tg_base_persona_gender`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#9, tag_value#10], [dt#11], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_gender/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652424775, totalSize=7092, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:52:55 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 7092 bytes)), [isnotnull(dt#11), (dt#11 = 2020-06-14)]
   +- *(2) Project [uid#12, tg_base_persona_agegroup AS tag_code#7, tag_value#13]
      +- Scan hive user_profile0224.tg_base_persona_agegroup [tag_value#13, uid#12], HiveTableRelation `user_profile0224`.`tg_base_persona_agegroup`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [uid#12, tag_value#13], [dt#14], Statistics(sizeInBytes=8.0 EiB), Stream(CatalogPartition(
	Partition Values: [dt=2020-06-14]
	Location: hdfs://Ding202:8020/user_profile/user_profile0224/tg_base_persona_agegroup/dt=2020-06-14
	Serde Library: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
	InputFormat: org.apache.hadoop.mapred.TextInputFormat
	OutputFormat: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
	Storage Properties: [serialization.format=	, field.delim=	]
	Partition Parameters: {rawDataSize=0, numFiles=1, transient_lastDdlTime=1652425049, totalSize=8487, COLUMN_STATS_ACCURATE={"BASIC_STATS":"true"}, numRows=0}
	Created Time: Fri May 13 14:57:29 CST 2022
	Last Access: UNKNOWN
	Partition Statistics: 8487 bytes)), [isnotnull(dt#14), (dt#14 = 2020-06-14)]

	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
	at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.doExecute(ObjectHashAggregateExec.scala:102)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD$lzycompute(ShuffleExchangeExec.scala:64)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.inputRDD(ShuffleExchangeExec.scala:64)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency$lzycompute(ShuffleExchangeExec.scala:83)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.shuffleDependency(ShuffleExchangeExec.scala:81)
	at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:98)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
	... 40 more
Caused by: java.lang.RuntimeException: Error in configuring object
	at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:112)
	at org.apache.hadoop.util.ReflectionUtils.setConf(ReflectionUtils.java:78)
	at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:136)
	at org.apache.spark.rdd.HadoopRDD.getInputFormat(HadoopRDD.scala:191)
	at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:205)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1(UnionRDD.scala:85)
	at org.apache.spark.rdd.UnionRDD.$anonfun$getPartitions$1$adapted(UnionRDD.scala:85)
	at scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:273)
	at scala.collection.immutable.List.foreach(List.scala:392)
	at scala.collection.TraversableLike.map(TraversableLike.scala:273)
	at scala.collection.TraversableLike.map$(TraversableLike.scala:266)
	at scala.collection.immutable.List.map(List.scala:298)
	at org.apache.spark.rdd.UnionRDD.getPartitions(UnionRDD.scala:85)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49)
	at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:276)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.rdd.RDD.partitions(RDD.scala:272)
	at org.apache.spark.SparkContext.$anonfun$union$2(SparkContext.scala:1369)
	at org.apache.spark.SparkContext.$anonfun$union$2$adapted(SparkContext.scala:1369)
	at scala.collection.TraversableLike.noneIn$1(TraversableLike.scala:306)
	at scala.collection.TraversableLike.filterImpl(TraversableLike.scala:372)
	at scala.collection.TraversableLike.filterImpl$(TraversableLike.scala:284)
	at scala.collection.AbstractTraversable.filterImpl(Traversable.scala:108)
	at scala.collection.TraversableLike.filter(TraversableLike.scala:382)
	at scala.collection.TraversableLike.filter$(TraversableLike.scala:382)
	at scala.collection.AbstractTraversable.filter(Traversable.scala:108)
	at org.apache.spark.SparkContext.$anonfun$union$1(SparkContext.scala:1369)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
	at org.apache.spark.SparkContext.withScope(SparkContext.scala:751)
	at org.apache.spark.SparkContext.union(SparkContext.scala:1368)
	at org.apache.spark.sql.execution.UnionExec.doExecute(basicPhysicalOperators.scala:644)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:175)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:213)
	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
	at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:210)
	at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:171)
	at org.apache.spark.sql.execution.aggregate.ObjectHashAggregateExec.$anonfun$doExecute$1(ObjectHashAggregateExec.scala:107)
	at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
	... 52 more
Caused by: java.lang.reflect.InvocationTargetException
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at org.apache.hadoop.util.ReflectionUtils.setJobConf(ReflectionUtils.java:109)
	... 108 more
Caused by: java.lang.IllegalArgumentException: Compression codec com.hadoop.compression.lzo.LzoCodec not found.
	at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:139)
	at org.apache.hadoop.io.compress.CompressionCodecFactory.<init>(CompressionCodecFactory.java:180)
	at org.apache.hadoop.mapred.TextInputFormat.configure(TextInputFormat.java:45)
	... 113 more
Caused by: java.lang.ClassNotFoundException: Class com.hadoop.compression.lzo.LzoCodec not found
	at org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:2101)
	at org.apache.hadoop.io.compress.CompressionCodecFactory.getCodecClasses(CompressionCodecFactory.java:132)
	... 115 more
2022-05-13 15:34:00,710  ERROR --- [                                              main]  org.apache.hadoop.hdfs.KeyProviderCache                                         (line:    87)  :  Could not find uri with key [dfs.encryption.key.provider.uri] to create a keyProvider !!