From 43e222b76bf52483c4556d0c6c2eda90235be165 Mon Sep 17 00:00:00 2001
From: markilue <745518019@qq.com>
Date: Wed, 18 May 2022 19:44:33 +0800
Subject: [PATCH] =?UTF-8?q?=E7=94=A8=E6=88=B7=E7=94=BB=E5=83=8Fexample?=
=?UTF-8?q?=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
Big_data_example/user_profile/pom.xml | 4 +
.../user_profile/toBitmap/pom.xml | 64 ++++++++++
.../src/main/resources/config.properties | 11 ++
.../toBitmap/src/main/resources/core-site.xml | 19 +++
.../toBitmap/src/main/resources/hdfs-site.xml | 19 +++
.../toBitmap/src/main/resources/hive-site.xml | 41 ++++++
.../src/main/resources/log4j.properties | 12 ++
.../atguigu/userfile/app/ToBitmapApp.scala | 119 ++++++++++++++++++
8 files changed, 289 insertions(+)
create mode 100644 Big_data_example/user_profile/toBitmap/pom.xml
create mode 100644 Big_data_example/user_profile/toBitmap/src/main/resources/config.properties
create mode 100644 Big_data_example/user_profile/toBitmap/src/main/resources/core-site.xml
create mode 100644 Big_data_example/user_profile/toBitmap/src/main/resources/hdfs-site.xml
create mode 100644 Big_data_example/user_profile/toBitmap/src/main/resources/hive-site.xml
create mode 100644 Big_data_example/user_profile/toBitmap/src/main/resources/log4j.properties
create mode 100644 Big_data_example/user_profile/toBitmap/src/main/scala/com/atguigu/userfile/app/ToBitmapApp.scala
diff --git a/Big_data_example/user_profile/pom.xml b/Big_data_example/user_profile/pom.xml
index 50cc701..5bf8b88 100644
--- a/Big_data_example/user_profile/pom.xml
+++ b/Big_data_example/user_profile/pom.xml
@@ -13,6 +13,7 @@
sql
merge
export-clickhouse
+ toBitmap
@@ -128,4 +129,7 @@
+
+
+
\ No newline at end of file
diff --git a/Big_data_example/user_profile/toBitmap/pom.xml b/Big_data_example/user_profile/toBitmap/pom.xml
new file mode 100644
index 0000000..47ad44f
--- /dev/null
+++ b/Big_data_example/user_profile/toBitmap/pom.xml
@@ -0,0 +1,64 @@
+
+
+
+ user_profile
+ com.atguigu
+ 1.0-SNAPSHOT
+
+ 4.0.0
+
+ toBitmap
+
+
+
+ common
+ com.atguigu
+ 1.0-SNAPSHOT
+
+
+
+
+
+
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+ 3.4.6
+
+
+
+
+ compile
+ testCompile
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+ 3.0.0
+
+
+ jar-with-dependencies
+
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Big_data_example/user_profile/toBitmap/src/main/resources/config.properties b/Big_data_example/user_profile/toBitmap/src/main/resources/config.properties
new file mode 100644
index 0000000..d76795a
--- /dev/null
+++ b/Big_data_example/user_profile/toBitmap/src/main/resources/config.properties
@@ -0,0 +1,11 @@
+hdfs-store.path=hdfs://Ding202:8020/user_profile
+data-warehouse.dbname=gmall
+user-profile.dbname=user_profile0224
+
+# mysql
+mysql.url=jdbc:mysql://Ding202:3306/user_profile_manager_0224?characterEncoding=utf-8&useSSL=false
+mysql.username=root
+mysql.password=123456
+
+# clickhouse
+clickhouse.url=jdbc:clickhouse://Ding202:8123/user_profile0224
diff --git a/Big_data_example/user_profile/toBitmap/src/main/resources/core-site.xml b/Big_data_example/user_profile/toBitmap/src/main/resources/core-site.xml
new file mode 100644
index 0000000..e32dcfe
--- /dev/null
+++ b/Big_data_example/user_profile/toBitmap/src/main/resources/core-site.xml
@@ -0,0 +1,19 @@
+
+
+
+
+ io.compression.codecs
+
+ org.apache.hadoop.io.compress.GzipCodec,
+ org.apache.hadoop.io.compress.DefaultCodec,
+ org.apache.hadoop.io.compress.BZip2Codec,
+ org.apache.hadoop.io.compress.SnappyCodec,
+ com.hadoop.compression.lzo.LzoCodec,
+ com.hadoop.compression.lzo.LzopCodec
+
+
+
+ io.compression.codec.lzo.class
+ com.hadoop.compression.lzo.LzoCodec
+
+
diff --git a/Big_data_example/user_profile/toBitmap/src/main/resources/hdfs-site.xml b/Big_data_example/user_profile/toBitmap/src/main/resources/hdfs-site.xml
new file mode 100644
index 0000000..671652f
--- /dev/null
+++ b/Big_data_example/user_profile/toBitmap/src/main/resources/hdfs-site.xml
@@ -0,0 +1,19 @@
+
+
+
+
+ dfs.namenode.http-address
+ Ding202:9870
+
+
+
+ dfs.namenode.secondary.http-address
+ Ding204:9868
+
+
+ dfs.client.use.datanode.hostname
+ true
+
+
+
+
diff --git a/Big_data_example/user_profile/toBitmap/src/main/resources/hive-site.xml b/Big_data_example/user_profile/toBitmap/src/main/resources/hive-site.xml
new file mode 100644
index 0000000..32ece85
--- /dev/null
+++ b/Big_data_example/user_profile/toBitmap/src/main/resources/hive-site.xml
@@ -0,0 +1,41 @@
+
+
+
+
+ javax.jdo.option.ConnectionURL
+ jdbc:mysql://Ding202:3306/metastore?createDatabaseIfNotExist=true&characterEncoding=utf-8&useSSL=false
+ JDBC connect string for a JDBC metastore
+
+
+
+ javax.jdo.option.ConnectionDriverName
+ com.mysql.jdbc.Driver
+ Driver class name for a JDBC metastore
+
+
+
+ javax.jdo.option.ConnectionUserName
+ root
+ username to use against metastore database
+
+
+
+ javax.jdo.option.ConnectionPassword
+ 123456
+ password to use against metastore database
+
+
+ hive.cli.print.header
+ true
+
+
+
+ hive.cli.print.current.db
+ true
+
+
+ hive.metastore.schema.verification
+ false
+
+
+
diff --git a/Big_data_example/user_profile/toBitmap/src/main/resources/log4j.properties b/Big_data_example/user_profile/toBitmap/src/main/resources/log4j.properties
new file mode 100644
index 0000000..48f5794
--- /dev/null
+++ b/Big_data_example/user_profile/toBitmap/src/main/resources/log4j.properties
@@ -0,0 +1,12 @@
+log4j.rootLogger=error, stdout,R
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%50t] %-80c(line:%5L) : %m%n
+
+log4j.appender.R=org.apache.log4j.RollingFileAppender
+log4j.appender.R.File=../log/agent.log
+log4j.appender.R.MaxFileSize=1024KB
+log4j.appender.R.MaxBackupIndex=1
+
+log4j.appender.R.layout=org.apache.log4j.PatternLayout
+log4j.appender.R.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %5p --- [%50t] %-80c(line:%6L) : %m%n
diff --git a/Big_data_example/user_profile/toBitmap/src/main/scala/com/atguigu/userfile/app/ToBitmapApp.scala b/Big_data_example/user_profile/toBitmap/src/main/scala/com/atguigu/userfile/app/ToBitmapApp.scala
new file mode 100644
index 0000000..cce74b0
--- /dev/null
+++ b/Big_data_example/user_profile/toBitmap/src/main/scala/com/atguigu/userfile/app/ToBitmapApp.scala
@@ -0,0 +1,119 @@
+package com.atguigu.userfile.app
+
+import com.atguigu.userprofile.common.bean.TagInfo
+import com.atguigu.userprofile.common.constants.ConstCode
+import com.atguigu.userprofile.common.dao.TagInfoDAO
+import com.atguigu.userprofile.common.utils.ClickhouseUtils
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.SparkSession
+
+import scala.collection.mutable.ListBuffer
+
+/**
+ * 1 建表 4个表 string decimal long date
+ * 2 要查询标签清单列表 查mysql
+ * 把标签列表 根据标签的值类型不同 ,分成四个列表
+ * 每个列表组成独立的insert select 语句 ,一个四个insert
+ * 3 insert into select
+ */
+object ToBitmapApp {
+
+
+ def main(args: Array[String]): Unit = {
+
+ //声明环境
+ val sparkConf: SparkConf = new SparkConf().setAppName("bitmap_app")//.setMaster("local[*]")
+ val sparkSession: SparkSession = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
+
+ val taskId: String = args(0);
+ val taskDate: String = args(1);
+
+ //TODO 1 建表 4个表 string decimal long date(提前一次手工创建)
+
+ //clickhouse 上的表
+
+ //TODO 2 要查询标签清单列表 查mysql
+ // * 把标签列表 根据标签的值类型不同 ,分成四个列表
+ // * 每个列表组成独立的insert select 语句 ,一个四个insert
+
+ val tagList: List[TagInfo] = TagInfoDAO.getTagInfoList()
+ val tagInfoStringList: ListBuffer[TagInfo] = new ListBuffer[TagInfo]
+ val tagInfoDecimalList: ListBuffer[TagInfo] = new ListBuffer[TagInfo]
+ val tagInfoLongList: ListBuffer[TagInfo] = new ListBuffer[TagInfo]
+ val tagInfoDateList: ListBuffer[TagInfo] = new ListBuffer[TagInfo]
+
+ tagList.foreach(
+ tagInfo => {
+ if (tagInfo.tagValueType == ConstCode.TAG_VALUE_TYPE_STRING) {
+ tagInfoStringList.append(tagInfo)
+ }
+ if (tagInfo.tagValueType == ConstCode.TAG_VALUE_TYPE_DECIMAL) {
+ tagInfoDecimalList.append(tagInfo)
+ }
+ if (tagInfo.tagValueType == ConstCode.TAG_VALUE_TYPE_LONG) {
+ tagInfoLongList.append(tagInfo)
+ }
+ if (tagInfo.tagValueType == ConstCode.TAG_VALUE_TYPE_DATE) {
+ tagInfoDateList.append(tagInfo)
+ }
+ }
+ )
+
+ //TODO 3 分别根据不同的标签插入到map表 insert into select
+ insertTag(tagInfoStringList, "user_tag_value_string", taskDate.replace("-", ""))
+ insertTag(tagInfoDecimalList, "user_tag_value_decimal", taskDate.replace("-", ""))
+ insertTag(tagInfoLongList, "user_tag_value_long", taskDate.replace("-", ""))
+ insertTag(tagInfoDateList, "user_tag_value_date", taskDate.replace("-", ""))
+
+
+ }
+
+ /**
+ * insert into $tableName
+ * select tv.1,tv.2,groupBitmapState(uid),'$taskDate' as dt
+ * from(
+ * select uid,arrayJoin([('gender',gender),('agegroup',agegroup),('favor',favor)]) tv from user_tag_merge
+ * ) user_tag
+ * group by tv.1,tv.2;
+ *
+ */
+
+ def insertTag(tagList: ListBuffer[TagInfo], bitmapTableName: String, taskDate: String): Unit = {
+
+ //TODO 需要做幂等性处理,每次插入数据前,需要做分区清理
+ //alter table ${bitmapTableName} delete where dt='$taskDate'
+
+ val clearSQL=s"alter table ${bitmapTableName} delete where dt='$taskDate'"
+ println(clearSQL)
+ ClickhouseUtils.executeSql(clearSQL)
+
+
+
+ if(tagList.size >0){
+ //('gender',gender),('agegroup',agegroup),('favor',favor)
+ val tagCodeSQL: String = tagList.map(
+ tagInfo => s"('${tagInfo.tagCode}',${tagInfo.tagCode.toLowerCase()})"
+ ).mkString(",")
+
+ val insertIntoSQL =
+ s"""
+ |insert into ${bitmapTableName}
+ |select tv.1,tv.2,groupBitmapState(cast (uid as UInt64) ),'${taskDate}' as dt
+ |from(
+ |select uid,arrayJoin([$tagCodeSQL]) tv from user_tag_merge_${taskDate}
+ |) user_tag
+ |where tv.2<>''
+ | group by tv.1,tv.2
+ |""".stripMargin
+
+ println(insertIntoSQL)
+
+ ClickhouseUtils.executeSql(insertIntoSQL)
+
+ }
+
+
+
+ }
+
+}