From 43e222b76bf52483c4556d0c6c2eda90235be165 Mon Sep 17 00:00:00 2001 From: markilue <745518019@qq.com> Date: Wed, 18 May 2022 19:44:33 +0800 Subject: [PATCH] =?UTF-8?q?=E7=94=A8=E6=88=B7=E7=94=BB=E5=83=8Fexample?= =?UTF-8?q?=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Big_data_example/user_profile/pom.xml | 4 + .../user_profile/toBitmap/pom.xml | 64 ++++++++++ .../src/main/resources/config.properties | 11 ++ .../toBitmap/src/main/resources/core-site.xml | 19 +++ .../toBitmap/src/main/resources/hdfs-site.xml | 19 +++ .../toBitmap/src/main/resources/hive-site.xml | 41 ++++++ .../src/main/resources/log4j.properties | 12 ++ .../atguigu/userfile/app/ToBitmapApp.scala | 119 ++++++++++++++++++ 8 files changed, 289 insertions(+) create mode 100644 Big_data_example/user_profile/toBitmap/pom.xml create mode 100644 Big_data_example/user_profile/toBitmap/src/main/resources/config.properties create mode 100644 Big_data_example/user_profile/toBitmap/src/main/resources/core-site.xml create mode 100644 Big_data_example/user_profile/toBitmap/src/main/resources/hdfs-site.xml create mode 100644 Big_data_example/user_profile/toBitmap/src/main/resources/hive-site.xml create mode 100644 Big_data_example/user_profile/toBitmap/src/main/resources/log4j.properties create mode 100644 Big_data_example/user_profile/toBitmap/src/main/scala/com/atguigu/userfile/app/ToBitmapApp.scala diff --git a/Big_data_example/user_profile/pom.xml b/Big_data_example/user_profile/pom.xml index 50cc701..5bf8b88 100644 --- a/Big_data_example/user_profile/pom.xml +++ b/Big_data_example/user_profile/pom.xml @@ -13,6 +13,7 @@ sql merge export-clickhouse + toBitmap @@ -128,4 +129,7 @@ + + + \ No newline at end of file diff --git a/Big_data_example/user_profile/toBitmap/pom.xml b/Big_data_example/user_profile/toBitmap/pom.xml new file mode 100644 index 0000000..47ad44f --- /dev/null +++ b/Big_data_example/user_profile/toBitmap/pom.xml @@ -0,0 +1,64 @@ + + + + user_profile + com.atguigu + 1.0-SNAPSHOT + + 4.0.0 + + toBitmap + + + + common + com.atguigu + 1.0-SNAPSHOT + + + + + + + + + net.alchim31.maven + scala-maven-plugin + 3.4.6 + + + + + compile + testCompile + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.0.0 + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + + + \ No newline at end of file diff --git a/Big_data_example/user_profile/toBitmap/src/main/resources/config.properties b/Big_data_example/user_profile/toBitmap/src/main/resources/config.properties new file mode 100644 index 0000000..d76795a --- /dev/null +++ b/Big_data_example/user_profile/toBitmap/src/main/resources/config.properties @@ -0,0 +1,11 @@ +hdfs-store.path=hdfs://Ding202:8020/user_profile +data-warehouse.dbname=gmall +user-profile.dbname=user_profile0224 + +# mysql +mysql.url=jdbc:mysql://Ding202:3306/user_profile_manager_0224?characterEncoding=utf-8&useSSL=false +mysql.username=root +mysql.password=123456 + +# clickhouse +clickhouse.url=jdbc:clickhouse://Ding202:8123/user_profile0224 diff --git a/Big_data_example/user_profile/toBitmap/src/main/resources/core-site.xml b/Big_data_example/user_profile/toBitmap/src/main/resources/core-site.xml new file mode 100644 index 0000000..e32dcfe --- /dev/null +++ b/Big_data_example/user_profile/toBitmap/src/main/resources/core-site.xml @@ -0,0 +1,19 @@ + + + + + io.compression.codecs + + org.apache.hadoop.io.compress.GzipCodec, + org.apache.hadoop.io.compress.DefaultCodec, + org.apache.hadoop.io.compress.BZip2Codec, + org.apache.hadoop.io.compress.SnappyCodec, + com.hadoop.compression.lzo.LzoCodec, + com.hadoop.compression.lzo.LzopCodec + + + + io.compression.codec.lzo.class + com.hadoop.compression.lzo.LzoCodec + + diff --git a/Big_data_example/user_profile/toBitmap/src/main/resources/hdfs-site.xml b/Big_data_example/user_profile/toBitmap/src/main/resources/hdfs-site.xml new file mode 100644 index 0000000..671652f --- /dev/null +++ b/Big_data_example/user_profile/toBitmap/src/main/resources/hdfs-site.xml @@ -0,0 +1,19 @@ + + + + + dfs.namenode.http-address + Ding202:9870 + + + + dfs.namenode.secondary.http-address + Ding204:9868 + + + dfs.client.use.datanode.hostname + true + + + + diff --git a/Big_data_example/user_profile/toBitmap/src/main/resources/hive-site.xml b/Big_data_example/user_profile/toBitmap/src/main/resources/hive-site.xml new file mode 100644 index 0000000..32ece85 --- /dev/null +++ b/Big_data_example/user_profile/toBitmap/src/main/resources/hive-site.xml @@ -0,0 +1,41 @@ + + + + + javax.jdo.option.ConnectionURL + jdbc:mysql://Ding202:3306/metastore?createDatabaseIfNotExist=true&characterEncoding=utf-8&useSSL=false + JDBC connect string for a JDBC metastore + + + + javax.jdo.option.ConnectionDriverName + com.mysql.jdbc.Driver + Driver class name for a JDBC metastore + + + + javax.jdo.option.ConnectionUserName + root + username to use against metastore database + + + + javax.jdo.option.ConnectionPassword + 123456 + password to use against metastore database + + + hive.cli.print.header + true + + + + hive.cli.print.current.db + true + + + hive.metastore.schema.verification + false + + + diff --git a/Big_data_example/user_profile/toBitmap/src/main/resources/log4j.properties b/Big_data_example/user_profile/toBitmap/src/main/resources/log4j.properties new file mode 100644 index 0000000..48f5794 --- /dev/null +++ b/Big_data_example/user_profile/toBitmap/src/main/resources/log4j.properties @@ -0,0 +1,12 @@ +log4j.rootLogger=error, stdout,R +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%50t] %-80c(line:%5L) : %m%n + +log4j.appender.R=org.apache.log4j.RollingFileAppender +log4j.appender.R.File=../log/agent.log +log4j.appender.R.MaxFileSize=1024KB +log4j.appender.R.MaxBackupIndex=1 + +log4j.appender.R.layout=org.apache.log4j.PatternLayout +log4j.appender.R.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%50t] %-80c(line:%6L) : %m%n diff --git a/Big_data_example/user_profile/toBitmap/src/main/scala/com/atguigu/userfile/app/ToBitmapApp.scala b/Big_data_example/user_profile/toBitmap/src/main/scala/com/atguigu/userfile/app/ToBitmapApp.scala new file mode 100644 index 0000000..cce74b0 --- /dev/null +++ b/Big_data_example/user_profile/toBitmap/src/main/scala/com/atguigu/userfile/app/ToBitmapApp.scala @@ -0,0 +1,119 @@ +package com.atguigu.userfile.app + +import com.atguigu.userprofile.common.bean.TagInfo +import com.atguigu.userprofile.common.constants.ConstCode +import com.atguigu.userprofile.common.dao.TagInfoDAO +import com.atguigu.userprofile.common.utils.ClickhouseUtils +import org.apache.spark.SparkConf +import org.apache.spark.sql.SparkSession + +import scala.collection.mutable.ListBuffer + +/** + * 1 建表 4个表 string decimal long date + * 2 要查询标签清单列表 查mysql + * 把标签列表 根据标签的值类型不同 ,分成四个列表 + * 每个列表组成独立的insert select 语句 ,一个四个insert + * 3 insert into select + */ +object ToBitmapApp { + + + def main(args: Array[String]): Unit = { + + //声明环境 + val sparkConf: SparkConf = new SparkConf().setAppName("bitmap_app")//.setMaster("local[*]") + val sparkSession: SparkSession = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate() + + val taskId: String = args(0); + val taskDate: String = args(1); + + //TODO 1 建表 4个表 string decimal long date(提前一次手工创建) + + //clickhouse 上的表 + + //TODO 2 要查询标签清单列表 查mysql + // * 把标签列表 根据标签的值类型不同 ,分成四个列表 + // * 每个列表组成独立的insert select 语句 ,一个四个insert + + val tagList: List[TagInfo] = TagInfoDAO.getTagInfoList() + val tagInfoStringList: ListBuffer[TagInfo] = new ListBuffer[TagInfo] + val tagInfoDecimalList: ListBuffer[TagInfo] = new ListBuffer[TagInfo] + val tagInfoLongList: ListBuffer[TagInfo] = new ListBuffer[TagInfo] + val tagInfoDateList: ListBuffer[TagInfo] = new ListBuffer[TagInfo] + + tagList.foreach( + tagInfo => { + if (tagInfo.tagValueType == ConstCode.TAG_VALUE_TYPE_STRING) { + tagInfoStringList.append(tagInfo) + } + if (tagInfo.tagValueType == ConstCode.TAG_VALUE_TYPE_DECIMAL) { + tagInfoDecimalList.append(tagInfo) + } + if (tagInfo.tagValueType == ConstCode.TAG_VALUE_TYPE_LONG) { + tagInfoLongList.append(tagInfo) + } + if (tagInfo.tagValueType == ConstCode.TAG_VALUE_TYPE_DATE) { + tagInfoDateList.append(tagInfo) + } + } + ) + + //TODO 3 分别根据不同的标签插入到map表 insert into select + insertTag(tagInfoStringList, "user_tag_value_string", taskDate.replace("-", "")) + insertTag(tagInfoDecimalList, "user_tag_value_decimal", taskDate.replace("-", "")) + insertTag(tagInfoLongList, "user_tag_value_long", taskDate.replace("-", "")) + insertTag(tagInfoDateList, "user_tag_value_date", taskDate.replace("-", "")) + + + } + + /** + * insert into $tableName + * select tv.1,tv.2,groupBitmapState(uid),'$taskDate' as dt + * from( + * select uid,arrayJoin([('gender',gender),('agegroup',agegroup),('favor',favor)]) tv from user_tag_merge + * ) user_tag + * group by tv.1,tv.2; + * + */ + + def insertTag(tagList: ListBuffer[TagInfo], bitmapTableName: String, taskDate: String): Unit = { + + //TODO 需要做幂等性处理,每次插入数据前,需要做分区清理 + //alter table ${bitmapTableName} delete where dt='$taskDate' + + val clearSQL=s"alter table ${bitmapTableName} delete where dt='$taskDate'" + println(clearSQL) + ClickhouseUtils.executeSql(clearSQL) + + + + if(tagList.size >0){ + //('gender',gender),('agegroup',agegroup),('favor',favor) + val tagCodeSQL: String = tagList.map( + tagInfo => s"('${tagInfo.tagCode}',${tagInfo.tagCode.toLowerCase()})" + ).mkString(",") + + val insertIntoSQL = + s""" + |insert into ${bitmapTableName} + |select tv.1,tv.2,groupBitmapState(cast (uid as UInt64) ),'${taskDate}' as dt + |from( + |select uid,arrayJoin([$tagCodeSQL]) tv from user_tag_merge_${taskDate} + |) user_tag + |where tv.2<>'' + | group by tv.1,tv.2 + |""".stripMargin + + println(insertIntoSQL) + + ClickhouseUtils.executeSql(insertIntoSQL) + + } + + + + } + +}