暑假更新

This commit is contained in:
dingjiawen 2022-08-31 17:08:21 +08:00
parent ce889c6057
commit bc796b7885
17 changed files with 97 additions and 50 deletions

View File

@ -10,31 +10,32 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn
/**
* 插件性质的开发:
* 1.实现接口或者继承类
* 2.重写相应的方法
* 3.打包
*
*
* 1.实现接口或者继承类
* 2.重写相应的方法
* 3.打包
* <p>
* <p>
* 自定义UDF函数类
* 继承Hive提供的GenericUDF类
* 继承Hive提供的GenericUDF类
*/
public class CalStringLengthUDF extends GenericUDF {
/**
* 初始化方法
* @param objectInspectors 传入到函数中的参数对应的类型的鉴别器对象
* @return 指定函数的返回值类型对象的鉴别器对象
*
* @param objectInspectors 传入到函数中的参数对应的类型的鉴别器对象
* @return 指定函数的返回值类型对象的鉴别器对象
* @throws UDFArgumentException
*/
@Override
public ObjectInspector initialize(ObjectInspector[] objectInspectors) throws UDFArgumentException {
//1.校验函数的参数个数
if(objectInspectors==null||objectInspectors.length!=1){
if (objectInspectors == null || objectInspectors.length != 1) {
throw new UDFArgumentLengthException("函数的参数个数不正确");
}
//2.校验函数的参数类型,getCategory()返回的是传入的类型,PRIMITIVE表示基本类型
if(!objectInspectors[0].getCategory().equals(ObjectInspector.Category.PRIMITIVE)){
throw new UDFArgumentTypeException(0,"磺酸钠会参数类型不正确");
if (!objectInspectors[0].getCategory().equals(ObjectInspector.Category.PRIMITIVE)) {
throw new UDFArgumentTypeException(0, "函数的参数类型不正确");
}
//3.返回函数的返回值类型对应的鉴别器类型
return PrimitiveObjectInspectorFactory.javaIntObjectInspector;
@ -42,15 +43,16 @@ public class CalStringLengthUDF extends GenericUDF {
/**
* 函数核心处理方法
* @param deferredObjects 传入到函数的参数
* @return 函数的返回值
*
* @param deferredObjects 传入到函数的参数
* @return 函数的返回值
* @throws HiveException
*/
public Object evaluate(DeferredObject[] deferredObjects) throws HiveException {
//1.获取参数
Object argument = deferredObjects[0].get();
if(argument==null){
if (argument == null) {
return 0;
}
return argument.toString().length();
@ -58,6 +60,7 @@ public class CalStringLengthUDF extends GenericUDF {
/**
* 用于以后sql函数需要显示哪些内容
*
* @param strings
* @return
*/

View File

@ -25,7 +25,10 @@ object Spark010_RDD_Operator_Transform {
//(null.null) =>null null
//(1,null) =>1
val rdd1: RDD[Int] = rdd.distinct()
val rdd2=rdd.map(x=>(x,null)).reduceByKey((x, _) => x, 7)
rdd1.collect().foreach(println(_))
println("==========================")
rdd2.collect().foreach(println(_))
sc.stop()
}

View File

@ -39,6 +39,27 @@ object Spark01_RDD_Operator_Transform_Par {
}
)
mapRDD1.collect()
/*
结果:
>>>>>>1
>>>>>>3
######1
######3
>>>>>>2
######2
>>>>>>4
######4
第二次:
>>>>>>3
>>>>>>1
######1
######3
>>>>>>2
>>>>>>4
######4
######2
TODO 即分区内的1和2一定是先1后2分区内的3和4一定是先3后4但是1和3谁先不一定
*/
sc.stop()
}

View File

@ -17,7 +17,7 @@ object Spark01_RDD_Serial {
//不加Serializable特质之前
// NotSerializableException: com.atguigu.spark.core.rdd.serial.Spark01_RDD_Serial$Search
//search.getMatch1(rdd).collect().foreach(println(_))
// search.getMatch1(rdd).collect().foreach(println(_))
search.getMatch2(rdd).collect().foreach(println(_))
sc.stop()

View File

@ -113,7 +113,7 @@ object SparkStreaming11_Req1_blackList1 {
println(s"${day} ${user} ${ad} ${sum}")
if(sum >= 30){
//拉入黑名单
val conn = JDBCUtil.getConnection
// val conn = JDBCUtil.getConnection
val sql =
"""
|insert into black_list (userid) values (?)
@ -126,7 +126,7 @@ object SparkStreaming11_Req1_blackList1 {
//将当天的广告点击数量进行更新
//查询统计表数据
val conn = JDBCUtil.getConnection
// val conn = JDBCUtil.getConnection
val sql =
"""
|select *

View File

@ -17,6 +17,15 @@ object Scala04_Function_Normal_1 {
//调用时如果不想使用默认值直接传值即可
fun3("111111") //111111
def fun4(username:String="dingjiawen",password:String="123456")={
println(s"username:$username,password:$password")
}
fun4()
fun4(password = "qazwsx")
fun4(password = "qazwsx",username = "123")
}
}

View File

@ -28,6 +28,7 @@ object Scala05_Object_Field {
//而在scala中给属性提供的set,set方法不遵循bean规范为了统一用于则有了下述写法
test.setEmail("xxx")
test.getEmail()
println(test.email)
}

View File

@ -29,7 +29,8 @@ object Scala12_Object_Trait_4 {
trait Log extends Operator{
override def operDate(): Unit ={
print("向文件中")
super.operDate() //如果想跳过DB直接访问Operator则使用super[Operator].operDate()
// super[Operator].operDate() //如果想跳过DB直接访问Operator则使用super[Operator].operDate() //向文件中操作数据...
super.operDate() //如果想跳过DB直接访问Operator则使用super[Operator].operDate() //向文件中向数据库中操作数据
}
}
class MySQL extends DB with Log {

View File

@ -1,5 +1,7 @@
package com.atguigu.scala.chapter07
import scala.+:
object Scala01_Collection_1 {
def main(args: Array[String]): Unit = {
@ -27,9 +29,13 @@ object Scala01_Collection_1 {
//val ints1: Array[Int] = array1.:+(5)
val ints1 = array1 :+ 5
val ints3 =5 +: array1
val ints2 = array1 ++ array3
//val ints3 = array1 ++: array3
// println(ints)
// println(ints1)
// println(ints2)
println(array1 eq ints) //false
println(array1 eq ints1) //false
@ -38,9 +44,9 @@ object Scala01_Collection_1 {
// 遍历
//把集合中的元素用逗号分割形成一个字符串
// println(ints.mkString(",")) //5,1,2,3,4
// println(ints1.mkString(",")) //1,2,3,4,5
// println(ints2.mkString(",")) //1,2,3,4,5,6,7,8
println(ints.mkString(",")) //5,1,2,3,4
println(ints1.mkString(",")) //1,2,3,4,5
println(ints2.mkString(",")) //1,2,3,4,5,6,7,8
//foreach方法是一个循环的方法需要穿第一个函数这个传参数的类型是函数类型
// 函数类型 Int => U

View File

@ -9,33 +9,33 @@ object Scala06_Collection_Seq {
//TODO - 集合 - Seq
//一般会采用List
val seq = Seq(1,2,3,4)
val list = List(1,2,3,4)
val list1 = List(5,6,7,8)
val seq = Seq(1, 2, 3, 4)
val list = List(1, 2, 3, 4)
val list1 = List(5, 6, 7, 8)
println(seq) //List(1, 2, 3, 4)
println(list) //List(1, 2, 3, 4)
println(seq) //List(1, 2, 3, 4)
println(list) //List(1, 2, 3, 4)
//TODO 常见数据操作
val ints: List[Int] = list :+ 5
val ints1: List[Int] = 5 +: list
//Nil 在集合中表示空集合,增加集合的方式
val ints2 = 1 :: 2 ::3 :: Nil
val ints2 = 1 :: 2 :: 3 :: Nil
//准确写法是
//Nil.::(3).::(2).::(1)
val ints3 = 1 :: 2 ::3 ::list1 :: Nil
val ints4 = 1 :: 2 ::3 ::list1 ::: Nil
val ints3 = 1 :: 2 :: 3 :: list1 :: Nil
val ints4 = 1 :: 2 :: 3 :: list1 ::: Nil
println(list eq ints) //false
println(list) //List(1, 2, 3, 4)
println(ints) //List(1, 2, 3, 4, 5)
println(ints1) //List(5, 1, 2, 3, 4)
println(Nil) //List()
println(ints2) //List(1, 2, 3)
println(ints3) //List(1, 2, 3, List(5, 6, 7, 8))
println(ints4) //List(1, 2, 3, 5, 6, 7, 8)
println(list eq ints) //false
println(list) //List(1, 2, 3, 4)
println(ints) //List(1, 2, 3, 4, 5)
println(ints1) //List(5, 1, 2, 3, 4)
println(Nil) //List()
println(ints2) //List(1, 2, 3)
println(ints3) //List(1, 2, 3, List(5, 6, 7, 8))
println(ints4) //List(1, 2, 3, 5, 6, 7, 8)
}
}

View File

@ -21,7 +21,8 @@ object Scala06_Collection_Seq_2 {
// println(list) //ListBuffer(5, 3, 4, 2, 1)
//
// list.remove(1)
// list.remove(1,2)
println(list)
println(list.remove(1,2))
//
// list.mkString()
// list.iterator

View File

@ -18,7 +18,7 @@ object Scala01_Transform {
}
val age :Int = thirdPart() //Double ? Int?
println(age)
println(age.isInstanceOf[Double])
}
def thirdPart(): Double ={

View File

@ -3,14 +3,14 @@ package com.atguigu.scala.test;
public class TestAdd {
public static void main(String[] args) {
//int i=0;
//int j=i++;
int i=0;
// int j=i++;
//赋值是 等号右边的计算结果给左边
//i++不是原子性操作中间会有临时的结果
//i=i++; //print之后是1 _tmp=0; i=1; i=_tmp=0
i=i++; //print之后是1 _tmp=0; i=1; i=_tmp=0
//System.out.println("i="+i); // 1
//System.out.println("j="+j); // 0
System.out.println("i="+i); // 1
// System.out.println("j="+j); // 0
//TODO 阶乘一个大于1的数的阶乘等于这个数乘以他减一的数的阶乘
//5!=5*4!=5*4*3!=....=5*4*3*2*1

View File

@ -2,6 +2,8 @@ package com.atguigu.scala.test;
//import com.atguigu.scala.chapter09.Scala02_Exception;
import com.atguigu.scala.chapter09.Scala02_Exception;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
@ -38,7 +40,7 @@ public class TestException {
// }
// System.out.println("xxxxxx");
// Scala02_Exception.test();
Scala02_Exception.test();
}

View File

@ -22,7 +22,7 @@ object ToBitmapApp {
def main(args: Array[String]): Unit = {
//声明环境
val sparkConf: SparkConf = new SparkConf().setAppName("bitmap_app")//.setMaster("local[*]")
val sparkConf: SparkConf = new SparkConf().setAppName("bitmap_app") //.setMaster("local[*]")
val sparkSession: SparkSession = SparkSession.builder().config(sparkConf).enableHiveSupport().getOrCreate()
val taskId: String = args(0);
@ -83,13 +83,12 @@ object ToBitmapApp {
//TODO 需要做幂等性处理,每次插入数据前需要做分区清理
//alter table ${bitmapTableName} delete where dt='$taskDate'
val clearSQL=s"alter table ${bitmapTableName} delete where dt='$taskDate'"
val clearSQL = s"alter table ${bitmapTableName} delete where dt='$taskDate'"
println(clearSQL)
ClickhouseUtils.executeSql(clearSQL)
if(tagList.size >0){
if (tagList.size > 0) {
//('gender',gender),('agegroup',agegroup),('favor',favor)
val tagCodeSQL: String = tagList.map(
tagInfo => s"('${tagInfo.tagCode}',${tagInfo.tagCode.toLowerCase()})"
@ -113,7 +112,6 @@ object ToBitmapApp {
}
}
}

View File

@ -34,6 +34,7 @@ public class ZooKeeperTest {
* @throws KeeperException
* @throws InterruptedException
*/
@Test
public void deleteAll(String path,ZooKeeper zk) throws KeeperException, InterruptedException {
//判断当前节点是否存在获取stat

View File

@ -12,6 +12,7 @@
<artifactId>spring_aop</artifactId>
<packaging>war</packaging>
<dependencies>
<dependency>
<groupId>org.springframework</groupId>