增加一些scala和python相关
This commit is contained in:
parent
aa79b3d094
commit
a9c30c8a91
|
|
@ -0,0 +1,20 @@
|
|||
package com.atguigu.spark.core.rdd.operator.transform
|
||||
|
||||
import org.apache.spark.api.java.function.FilterFunction
|
||||
|
||||
/**
|
||||
* @ClassName aaa.java
|
||||
* @author dingjiawen@xiaomi.com
|
||||
* @version 1.0.0
|
||||
* @Description TODO
|
||||
* @createTime 2023-07-19 18:44:00
|
||||
*/
|
||||
class AAA(bbb: Int) extends FilterFunction[Int] {
|
||||
|
||||
val cc = bbb
|
||||
|
||||
override def call(t: Int): Boolean = {
|
||||
println(cc)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
package com.atguigu.spark.core.rdd.operator.transform
|
||||
|
||||
/**
|
||||
* @ClassName aaa.java
|
||||
* @author dingjiawen@xiaomi.com
|
||||
* @version 1.0.0
|
||||
* @Description TODO
|
||||
* @createTime 2023-07-19 18:44:00
|
||||
*/
|
||||
class CCC(bbb: Int) extends (Int => Boolean) with Serializable {
|
||||
|
||||
val cc = bbb
|
||||
|
||||
override def apply(t: Int): Boolean = {
|
||||
println(cc)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,69 @@
|
|||
package com.atguigu.spark.core.rdd.operator.transform
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper
|
||||
import net.jpountz.xxhash.{XXHash64, XXHashFactory}
|
||||
import org.apache.log4j.Logger
|
||||
import org.apache.spark.api.java.function.FilterFunction
|
||||
|
||||
import java.nio.ByteBuffer
|
||||
|
||||
/**
|
||||
* created by likunyi@xiaomi.com
|
||||
* at 2022-04-29 15:32:00
|
||||
* 该算子会对收到的数据进行抽样, 留下一部分数据, 丢掉另一部分数据
|
||||
*
|
||||
* samplingRate是采样率, 只能是整数
|
||||
* 当samplingRate = 10时, 代表采样率是10%
|
||||
* 当samplingRate = 64时, 代表采样率是64%
|
||||
*/
|
||||
class HdfsToHolo_MessageSampler1(samplingRate: Int) extends FilterFunction[String] {
|
||||
|
||||
@transient private var objectMapper: ObjectMapper = new ObjectMapper()
|
||||
@transient private val greatestCommonDivisor = 100
|
||||
@transient private val NUMERATOR: Int = samplingRate / greatestCommonDivisor
|
||||
@transient private val DENOMINATOR: Int = 100 / greatestCommonDivisor
|
||||
@transient private val XXHASH_SEED: Long = 0x9747b28c
|
||||
@transient private val hasher: XXHash64 = XXHashFactory.fastestInstance().hash64()
|
||||
@transient private val logger: Logger = Logger.getLogger(this.getClass.getName)
|
||||
|
||||
//初始化各项参数
|
||||
// def open(): Tuple5[ObjectMapper, Int, Int, Long, XXHash64] = {
|
||||
//
|
||||
// objectMapper = new ObjectMapper()
|
||||
//
|
||||
// val numerator = samplingRate // 分子就是采样率
|
||||
// val denominator = 100 // 分母永远是100
|
||||
// val greatestCommonDivisor = MathUtils.getGCD(numerator, denominator)
|
||||
//
|
||||
// NUMERATOR = numerator / greatestCommonDivisor
|
||||
// DENOMINATOR = denominator / greatestCommonDivisor
|
||||
// XXHASH_SEED = 0x9747b28c
|
||||
// hasher = XXHashFactory.fastestInstance().hash64()
|
||||
// (objectMapper,NUMERATOR,DENOMINATOR,XXHASH_SEED,hasher)
|
||||
// } // open
|
||||
|
||||
override def call(input: String): Boolean = {
|
||||
logger.info(s"所有参数:【objectMapper:${objectMapper}】,【NUMERATOR:${NUMERATOR}】,【DENOMINATOR:${DENOMINATOR}】,【hasher:${hasher}】")
|
||||
logger.info(s"${input}")
|
||||
|
||||
|
||||
// 如果不抽样(即抽样率是100%), 则直接将数据传递给下游, 不做任何处理
|
||||
if (samplingRate == 100) {
|
||||
return true
|
||||
}
|
||||
|
||||
val currentMessage = objectMapper.readTree(input)
|
||||
|
||||
// 如需抽样(即抽样率不是100%), 则使用distinct_id的哈希值去做抽样
|
||||
if (hash(currentMessage.get("distinct_id").asText()) % DENOMINATOR < NUMERATOR) { // 粗略地讲: 若采样率是64%, 则就将distinct_id的哈希值对100取余, 余数为0到63的都留下, 余数为64到99的都扔掉
|
||||
return true
|
||||
} else {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
private def hash(distinct_id: String): Long = {
|
||||
Math.abs(hasher.hash(ByteBuffer.wrap(distinct_id.getBytes("UTF-8")), XXHASH_SEED))
|
||||
} // hash
|
||||
|
||||
}
|
||||
|
|
@ -1,9 +1,5 @@
|
|||
package com.atguigu.spark.core.rdd.operator.transform
|
||||
|
||||
import java.text.SimpleDateFormat
|
||||
import java.util.Date
|
||||
|
||||
import org.apache.spark.rdd.RDD
|
||||
import org.apache.spark.{SparkConf, SparkContext}
|
||||
|
||||
/**
|
||||
|
|
@ -15,14 +11,18 @@ object Spark08_RDD_Operator_Transform {
|
|||
|
||||
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator")
|
||||
//创建上下文环境对象
|
||||
val sc =new SparkContext(sparkConf)
|
||||
val sc = new SparkContext(sparkConf)
|
||||
|
||||
//TODO 算子 - filter
|
||||
val rdd = sc.makeRDD(
|
||||
List(1,2,3,4)
|
||||
List(1, 2, 3, 4)
|
||||
)
|
||||
|
||||
val filterRDD = rdd.filter(_ % 2 == 0)
|
||||
val filterRDD = rdd.filter(
|
||||
new CCC(10)
|
||||
)
|
||||
|
||||
// val filterRDD = rdd.filter(_ % 2 == 0)
|
||||
|
||||
filterRDD.collect().foreach(println(_))
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,102 @@
|
|||
package com.atguigu.gmall.realtime.app.dwm
|
||||
|
||||
import scala.collection.mutable
|
||||
|
||||
/**
|
||||
* @ClassName GenerateRPN.java
|
||||
* @author dingjiawen@xiaomi.com
|
||||
* @version 1.0.0
|
||||
* @Description TODO
|
||||
* @createTime 2023-06-30 17:27:00
|
||||
*/
|
||||
object GenerateRPN {
|
||||
|
||||
def main(args: Array[String]): Unit = {
|
||||
//测试用例
|
||||
//String str = "1+2*3-4*5-6+7*8-9"; //123*+45*-6-78*+9-
|
||||
// var str = "1 + 2 * 3 - 4 * 5 - (6 + 7 * 8 - 9)"; //123*+45*-6-78*+9-
|
||||
// var str = "5 + 2 * 3"; //123*+45*-6-78*+9-
|
||||
var str = "6 * ( 5 + ( 2 + 3 ) * 8 + 3 )"; //6523+8*+3+*
|
||||
var RPNStack: Array[String] = generateRPN(str)
|
||||
println(RPNStack.mkString(","))
|
||||
println(evalRPN(RPNStack))
|
||||
}
|
||||
|
||||
def generateRPN(expression: String): Array[String] = {
|
||||
val precedence = Map("+" -> 1, "-" -> 1, "*" -> 2, "/" -> 2)
|
||||
val output = mutable.Stack[String]()
|
||||
val stack = mutable.Stack[String]()
|
||||
|
||||
def isOperator(token: String): Boolean = {
|
||||
precedence.contains(token) }
|
||||
|
||||
def hasHigherPrecedence(op1: String, op2: String): Boolean = {
|
||||
precedence(op1) >= precedence(op2)
|
||||
}
|
||||
|
||||
def processOperator(op: String): Unit = {
|
||||
while (stack.nonEmpty && isOperator(stack.top) && hasHigherPrecedence(stack.top, op)) {
|
||||
output.push(stack.pop())
|
||||
}
|
||||
stack.push(op)
|
||||
}
|
||||
|
||||
def processOperand(operand: String): Unit = {
|
||||
output.push(operand)
|
||||
}
|
||||
|
||||
def processParenthesis(): Unit = {
|
||||
while (stack.nonEmpty && stack.top != "(") {
|
||||
output.push(stack.pop())
|
||||
}
|
||||
stack.pop() // 弹出左括号
|
||||
}
|
||||
|
||||
for (token <- expression.split("\\s+")) {
|
||||
token match {
|
||||
case "(" => stack.push(token)
|
||||
case ")" => processParenthesis()
|
||||
case t if isOperator(t) => processOperator(t)
|
||||
case _ => processOperand(token)
|
||||
}
|
||||
}
|
||||
|
||||
while (stack.nonEmpty) {
|
||||
output.push(stack.pop())
|
||||
}
|
||||
|
||||
output.toArray.reverse
|
||||
}
|
||||
|
||||
def evalRPN(tokens: Array[String]): Int = {
|
||||
val stack = mutable.Stack[Int]()
|
||||
|
||||
for (token <- tokens) {
|
||||
if (isOperator(token)) {
|
||||
val operand2 = stack.pop()
|
||||
val operand1 = stack.pop()
|
||||
val result = performOperation(token, operand1, operand2)
|
||||
stack.push(result)
|
||||
} else {
|
||||
stack.push(token.toInt)
|
||||
}
|
||||
}
|
||||
|
||||
stack.pop()
|
||||
}
|
||||
|
||||
def isOperator(token: String): Boolean = {
|
||||
token == "+" || token == "-" || token == "*" || token == "/"
|
||||
}
|
||||
|
||||
def performOperation(operator: String, operand1: Int, operand2: Int): Int = {
|
||||
operator match {
|
||||
case "+" => operand1 + operand2
|
||||
case "-" => operand1 - operand2
|
||||
case "*" => operand1 * operand2
|
||||
case "/" => operand1 / operand2
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
package com.atguigu.scala.chapter03
|
||||
|
||||
import com.atguigu.scala.test.User
|
||||
import com.atguigu.scala.test1.User
|
||||
|
||||
object Scala02_Oper {
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
package com.atguigu.scala.chapter06
|
||||
|
||||
import com.atguigu.scala.test.ScalaUser
|
||||
import com.atguigu.scala.test1.ScalaUser
|
||||
|
||||
object Scala09_Object_Instance_4 {
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
package com.atguigu.scala.chapter06
|
||||
|
||||
import com.atguigu.scala.test.ScalaUser
|
||||
import com.atguigu.scala.test1.ScalaUser
|
||||
|
||||
object Scala09_Object_Instance_5 {
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ object Scala05_Transform extends Parent with MyTrait {
|
|||
//TODO 3.特征或伴生对象
|
||||
//TODO 4.其他地方声明(包对象)
|
||||
//TODO 5.直接导入
|
||||
import com.atguigu.scala.test.TestTransform._
|
||||
import com.atguigu.scala.test1.TestTransform._
|
||||
val user=new User()
|
||||
user.insertUser()
|
||||
user.updateUser()
|
||||
|
|
|
|||
|
|
@ -1,38 +1,35 @@
|
|||
package com.markilue.leecode.test;
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Arrays;
|
||||
import java.util.Scanner;
|
||||
|
||||
public class Fibonaqi {
|
||||
|
||||
|
||||
/**
|
||||
* 测试使用时间复杂度为n的斐波那契数列递归法
|
||||
*
|
||||
*/
|
||||
// @Test
|
||||
// public static void testFibonaqi(){
|
||||
//
|
||||
// }
|
||||
|
||||
public static void main(String[] args) {
|
||||
int n=5;
|
||||
System.out.println(fibonacci(1,1,10));
|
||||
int n = 5;
|
||||
System.out.println(fibonacci(1, 1, 10));
|
||||
Arrays.copyOf()
|
||||
}
|
||||
|
||||
public static int fibonacci(int first,int second,int n){
|
||||
if(n<=0){
|
||||
public static int fibonacci(int first, int second, int n) {
|
||||
if (n <= 0) {
|
||||
return 0;
|
||||
}
|
||||
if(n <3){
|
||||
if (n < 3) {
|
||||
return 1;
|
||||
}else if(n==3){
|
||||
return first+second;
|
||||
}
|
||||
else {
|
||||
return fibonacci(second,first+second,n-1);
|
||||
} else if (n == 3) {
|
||||
return first + second;
|
||||
} else {
|
||||
return fibonacci(second, first + second, n - 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,188 @@
|
|||
#!/usr/bin/env python3
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
|
||||
import os
|
||||
import sys
|
||||
import pandas as pd
|
||||
from math import sqrt
|
||||
import matplotlib.pyplot as plt
|
||||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
from tensorflow.keras import Model, Input, layers
|
||||
from tensorflow.keras.models import load_model
|
||||
from tensorflow.keras.optimizers import Adam
|
||||
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
|
||||
from tensorflow.keras.layers import LSTM, SimpleRNN, GRU
|
||||
from tensorflow.keras.layers import Dense, Dropout, concatenate
|
||||
|
||||
from sklearn.metrics import mean_squared_error as mse
|
||||
from sklearn.metrics import mean_absolute_error as mae
|
||||
|
||||
|
||||
# adding data_processing to the system path
|
||||
sys.path.insert(0, '/aul/homes/jshi008/IvyProjects/SimDL/data_processing')
|
||||
from preprocessing import ws_preprocessing, pm25_preprocessing, ele_preprocessing
|
||||
|
||||
|
||||
|
||||
'''
|
||||
参考:
|
||||
[1] https://arxiv.org/pdf/2305.04876v3.pdf
|
||||
[2] https://github.com/JimengShi/ParaRCNN-Time-Series-Forecasting/blob/main/models/ParaRCNN_shifting_sc.py
|
||||
'''
|
||||
|
||||
|
||||
# set gpu
|
||||
os.environ["CUDA_VISIBLE_DEVICES"]="3"
|
||||
|
||||
|
||||
# get the preprocessed train_X, train_y, test_X, test_y from the dataset
|
||||
n_hours = 72
|
||||
K = 24
|
||||
S = 24
|
||||
|
||||
|
||||
### pick one of datasets
|
||||
# train_X, train_y, test_X, test_y, scaler = ws_preprocessing(n_hours, K, S)
|
||||
# train_X, train_y, test_X, test_y, scaler = pm25_preprocessing(n_hours, K)
|
||||
train_X, train_y, test_X, test_y, scaler = ele_preprocessing(n_hours, K)
|
||||
|
||||
|
||||
# build model
|
||||
model_input = Input(shape=(train_X.shape[1], train_X.shape[2]))
|
||||
|
||||
### 4 cnn skip connection
|
||||
x1 = layers.Conv1D(filters=256,
|
||||
kernel_size=2,
|
||||
activation='relu',
|
||||
padding='same',
|
||||
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(model_input)
|
||||
|
||||
|
||||
x1_merge1 = concatenate([model_input, x1])
|
||||
x1 = layers.Conv1D(filters=128,
|
||||
kernel_size=2,
|
||||
activation='relu',
|
||||
padding='same',
|
||||
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x1_merge1)
|
||||
|
||||
|
||||
x1_merge2 = concatenate([model_input, x1])
|
||||
x1 = layers.Conv1D(filters=64,
|
||||
kernel_size=2,
|
||||
activation='relu',
|
||||
padding='same',
|
||||
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x1_merge2)
|
||||
|
||||
|
||||
x1_merge3 = concatenate([model_input, x1])
|
||||
x1 = layers.Conv1D(filters=32,
|
||||
kernel_size=2,
|
||||
activation='relu',
|
||||
padding='same',
|
||||
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x1_merge3)
|
||||
|
||||
|
||||
|
||||
### 4 rnn skip connection
|
||||
x2 = layers.SimpleRNN(128,
|
||||
activation='relu',
|
||||
return_sequences=True,
|
||||
recurrent_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
|
||||
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(model_input)
|
||||
|
||||
|
||||
x2_merge1 = concatenate([model_input, x2])
|
||||
x2 = layers.SimpleRNN(64,
|
||||
activation='relu',
|
||||
return_sequences=True,
|
||||
recurrent_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
|
||||
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x2_merge1)
|
||||
|
||||
|
||||
x2_merge2 = concatenate([model_input, x2])
|
||||
x2 = layers.SimpleRNN(32,
|
||||
activation='relu',
|
||||
return_sequences=True,
|
||||
recurrent_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
|
||||
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x2_merge2)
|
||||
|
||||
|
||||
x2_merge3 = concatenate([model_input, x2])
|
||||
x2 = layers.SimpleRNN(16,
|
||||
activation='relu',
|
||||
return_sequences=True,
|
||||
recurrent_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
|
||||
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x2_merge3)
|
||||
|
||||
|
||||
merge = concatenate([model_input, x1, x2])
|
||||
x = layers.Flatten()(merge)
|
||||
|
||||
### uncomment the 3 rows below for pm25 and energy electricity dataset, comment them for water stage dataset
|
||||
x = Dense(256, activation='relu', kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x)
|
||||
x = Dense(128, activation='relu', kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x)
|
||||
x = Dense(128, activation='relu', kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x)
|
||||
|
||||
|
||||
x = Dense(train_y.shape[1])(x)
|
||||
|
||||
model = Model(model_input, x)
|
||||
model.summary()
|
||||
|
||||
|
||||
# training
|
||||
lr = 0.00001
|
||||
EPOCHS = 8000
|
||||
|
||||
lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=5e-4,
|
||||
decay_steps=10000,
|
||||
decay_rate=0.99)
|
||||
|
||||
model.compile(
|
||||
# optimizer=Adam(learning_rate=lr, decay=lr/EPOCHS),
|
||||
optimizer=Adam(learning_rate=lr_schedule),
|
||||
loss='mse',
|
||||
metrics=['mae'])
|
||||
|
||||
|
||||
es = EarlyStopping(monitor='val_loss', mode='min', verbose=2, patience=1500)
|
||||
mc = ModelCheckpoint('saved_model/ParaRCNN_shifting_s{}_k{}_sc.h5'.format(S, K), monitor='val_mae', mode='min', verbose=2, save_best_only=True)
|
||||
|
||||
history = model.fit(train_X, train_y,
|
||||
batch_size=512,
|
||||
epochs=EPOCHS,
|
||||
validation_data=(test_X, test_y),
|
||||
verbose=2,
|
||||
shuffle=True,
|
||||
callbacks=[es, mc])
|
||||
|
||||
plt.rcParams["figure.figsize"] = (8, 6)
|
||||
plt.plot(history.history['loss'], label='train')
|
||||
plt.plot(history.history['val_loss'], label='test')
|
||||
plt.xticks(fontsize=14)
|
||||
plt.yticks(fontsize=14)
|
||||
plt.xlabel('Epoch', fontsize=16)
|
||||
plt.ylabel('Loss', fontsize=16)
|
||||
plt.legend(fontsize=14)
|
||||
plt.title("Training loss vs Testing loss", fontsize=18)
|
||||
# plt.savefig('graph/rnn_loss.png', dpi=300)
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
# Performance on test data
|
||||
saved_model = load_model('saved_model/ParaRCNN_shifting_s{}_k{}_sc.h5'.format(S, K))
|
||||
yhat = saved_model.predict(test_X)
|
||||
|
||||
|
||||
inv_yhat = scaler.inverse_transform(yhat)
|
||||
inv_y = scaler.inverse_transform(test_y)
|
||||
|
||||
inv_yhat = pd.DataFrame(inv_yhat)
|
||||
inv_y = pd.DataFrame(inv_y)
|
||||
|
||||
|
||||
print('MAE = {}'.format(float("{:.4f}".format(mae(inv_yhat.iloc[:, :], inv_y.iloc[:, :])))))
|
||||
print('RMSE = {}'.format(float("{:.4f}".format(sqrt(mse(inv_yhat.iloc[:, :], inv_y.iloc[:, :]))))))
|
||||
|
|
@ -0,0 +1,320 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import numpy as np
|
||||
|
||||
from math import sqrt
|
||||
from utils.masking import TriangularCausalMask, ProbMask
|
||||
|
||||
'''
|
||||
参考:
|
||||
[1]https://arxiv.org/pdf/2307.00493v1.pdf
|
||||
[2]https://github.com/nhatthanhtran/FWin2023/blob/main/models/model.py
|
||||
'''
|
||||
class FullAttention(nn.Module):
|
||||
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
|
||||
super(FullAttention, self).__init__()
|
||||
self.scale = scale
|
||||
self.mask_flag = mask_flag
|
||||
self.output_attention = output_attention
|
||||
self.dropout = nn.Dropout(attention_dropout)
|
||||
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
B, L, H, E = queries.shape
|
||||
_, S, _, D = values.shape
|
||||
scale = self.scale or 1./sqrt(E)
|
||||
|
||||
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
|
||||
if self.mask_flag:
|
||||
if attn_mask is None:
|
||||
attn_mask = TriangularCausalMask(B, L, device=queries.device)
|
||||
|
||||
scores.masked_fill_(attn_mask.mask, -np.inf)
|
||||
|
||||
A = self.dropout(torch.softmax(scale * scores, dim=-1))
|
||||
V = torch.einsum("bhls,bshd->blhd", A, values)
|
||||
|
||||
if self.output_attention:
|
||||
return (V.contiguous(), A)
|
||||
else:
|
||||
return (V.contiguous(), None)
|
||||
|
||||
|
||||
class ProbAttention(nn.Module):
|
||||
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
|
||||
super(ProbAttention, self).__init__()
|
||||
self.factor = factor
|
||||
self.scale = scale
|
||||
self.mask_flag = mask_flag
|
||||
self.output_attention = output_attention
|
||||
self.dropout = nn.Dropout(attention_dropout)
|
||||
|
||||
def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
|
||||
# Q [B, H, L, D]
|
||||
B, H, L_K, E = K.shape
|
||||
_, _, L_Q, _ = Q.shape
|
||||
|
||||
# calculate the sampled Q_K
|
||||
K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
|
||||
|
||||
|
||||
|
||||
# real U = U_part(factor*ln(L_k))*L_q
|
||||
index_sample = torch.randint(L_K, (L_Q, sample_k))
|
||||
# print(index_sample.shape)
|
||||
K_sample = K_expand[:, :, torch.arange(
|
||||
L_Q).unsqueeze(1), index_sample, :]
|
||||
Q_K_sample = torch.matmul(
|
||||
Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze(-2)
|
||||
|
||||
# find the Top_k query with sparisty measurement
|
||||
M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
|
||||
M_top = M.topk(n_top, sorted=False)[1]
|
||||
|
||||
# use the reduced Q to calculate Q_K
|
||||
Q_reduce = Q[torch.arange(B)[:, None, None],
|
||||
torch.arange(H)[None, :, None],
|
||||
M_top, :] # factor*ln(L_q)
|
||||
|
||||
Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
|
||||
|
||||
return Q_K, M_top
|
||||
|
||||
def _get_initial_context(self, V, L_Q):
|
||||
B, H, L_V, D = V.shape
|
||||
if not self.mask_flag:
|
||||
# V_sum = V.sum(dim=-2)
|
||||
V_sum = V.mean(dim=-2)
|
||||
contex = V_sum.unsqueeze(-2).expand(B, H,
|
||||
L_Q, V_sum.shape[-1]).clone()
|
||||
else: # use mask
|
||||
# requires that L_Q == L_V, i.e. for self-attention only
|
||||
assert(L_Q == L_V)
|
||||
contex = V.cumsum(dim=-2)
|
||||
return contex
|
||||
|
||||
def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
|
||||
B, H, L_V, D = V.shape
|
||||
|
||||
if self.mask_flag:
|
||||
attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
|
||||
scores.masked_fill_(attn_mask.mask, -np.inf)
|
||||
|
||||
attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
|
||||
|
||||
context_in[torch.arange(B)[:, None, None],
|
||||
torch.arange(H)[None, :, None],
|
||||
index, :] = torch.matmul(attn, V).type_as(context_in)
|
||||
if self.output_attention:
|
||||
attns = (torch.ones([B, H, L_V, L_V]) /
|
||||
L_V).type_as(attn).to(attn.device)
|
||||
attns[torch.arange(B)[:, None, None], torch.arange(H)[
|
||||
None, :, None], index, :] = attn
|
||||
return (context_in, attns)
|
||||
else:
|
||||
return (context_in, None)
|
||||
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
B, L_Q, H, D = queries.shape
|
||||
_, L_K, _, _ = keys.shape
|
||||
|
||||
queries = queries.transpose(2, 1)
|
||||
keys = keys.transpose(2, 1)
|
||||
values = values.transpose(2, 1)
|
||||
|
||||
U_part = self.factor * \
|
||||
np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
|
||||
u = self.factor * \
|
||||
np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q)
|
||||
|
||||
U_part = U_part if U_part < L_K else L_K
|
||||
u = u if u < L_Q else L_Q
|
||||
|
||||
scores_top, index = self._prob_QK(
|
||||
queries, keys, sample_k=U_part, n_top=u)
|
||||
|
||||
# add scale factor
|
||||
scale = self.scale or 1./sqrt(D)
|
||||
if scale is not None:
|
||||
scores_top = scores_top * scale
|
||||
# get the context
|
||||
context = self._get_initial_context(values, L_Q)
|
||||
|
||||
# update the context with selected top_k queries
|
||||
context, attn = self._update_context(
|
||||
context, values, scores_top, index, L_Q, attn_mask)
|
||||
|
||||
return context.transpose(2, 1).contiguous(), attn
|
||||
|
||||
|
||||
class AttentionLayer(nn.Module):
|
||||
def __init__(self, attention, d_model, n_heads,
|
||||
d_keys=None, d_values=None, mix=False, layer_num=0):
|
||||
super(AttentionLayer, self).__init__()
|
||||
|
||||
d_keys = d_keys or (d_model//n_heads)
|
||||
d_values = d_values or (d_model//n_heads)
|
||||
|
||||
self.inner_attention = attention
|
||||
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
|
||||
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
|
||||
self.value_projection = nn.Linear(d_model, d_values * n_heads)
|
||||
self.out_projection = nn.Linear(d_values * n_heads, d_model)
|
||||
self.n_heads = n_heads
|
||||
self.mix = mix
|
||||
self.layer_num = layer_num
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
|
||||
B, L, _ = queries.shape
|
||||
_, S, _ = keys.shape
|
||||
H = self.n_heads
|
||||
|
||||
queries = self.query_projection(queries).view(B, L, H, -1)
|
||||
keys = self.key_projection(keys).view(B, S, H, -1)
|
||||
values = self.value_projection(values).view(B, S, H, -1)
|
||||
|
||||
|
||||
out, attn = self.inner_attention(
|
||||
queries,
|
||||
keys,
|
||||
values,
|
||||
attn_mask
|
||||
)
|
||||
|
||||
if self.mix:
|
||||
out = out.transpose(2, 1).contiguous()
|
||||
out = out.view(B, L, -1)
|
||||
|
||||
return self.out_projection(out), attn
|
||||
|
||||
class AttentionLayerWin(nn.Module):
|
||||
def __init__(self, attention, d_model, n_heads,
|
||||
d_keys=None, d_values=None, mix=False, layer_num=0,
|
||||
window_size=8, output_attention=False):
|
||||
super(AttentionLayerWin, self).__init__()
|
||||
d_keys = d_keys or (d_model//n_heads)
|
||||
d_values = d_values or (d_model//n_heads)
|
||||
|
||||
self.inner_attention = attention
|
||||
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
|
||||
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
|
||||
self.value_projection = nn.Linear(d_model, d_values * n_heads)
|
||||
self.out_projection = nn.Linear(d_values * n_heads, d_model)
|
||||
|
||||
self.n_heads = n_heads
|
||||
self.mix = mix
|
||||
self.layer_num = layer_num
|
||||
self.window_size = window_size
|
||||
self.output_attn = output_attention
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
|
||||
B, L, _ = queries.shape
|
||||
_, S, _ = keys.shape
|
||||
H = self.n_heads
|
||||
|
||||
queries = self.query_projection(queries).view(B, L, H, -1)
|
||||
keys = self.key_projection(keys).view(B, S, H, -1)
|
||||
values = self.value_projection(values).view(B, S, H, -1)
|
||||
|
||||
#Partition the vectors into windows
|
||||
queries = queries.view(B*(L//self.window_size), self.window_size, H, -1)
|
||||
keys = keys.view(B*(S//self.window_size), self.window_size, H, -1)
|
||||
values = values.view(B*(S//self.window_size), self.window_size, H, -1)
|
||||
|
||||
|
||||
out, attn = self.inner_attention(
|
||||
queries,
|
||||
keys,
|
||||
values,
|
||||
attn_mask
|
||||
)
|
||||
|
||||
if self.output_attn:
|
||||
attn = self._output_attn(L, attn)
|
||||
|
||||
out = out.view(B, L, H, -1)
|
||||
|
||||
if self.mix:
|
||||
out = out.transpose(2, 1).contiguous()
|
||||
out = out.view(B, L, -1)
|
||||
|
||||
return self.out_projection(out), attn
|
||||
|
||||
def _output_attn(self,L, attn):
|
||||
num_window = L//self.window_size
|
||||
|
||||
for k in range(num_window):
|
||||
if k==0:
|
||||
p2d = (0,((num_window-(k+1))*self.window_size))
|
||||
attn_tmp = F.pad(attn[:self.window_size,:,:,:],p2d)
|
||||
else:
|
||||
p2d = (k*self.window_size, (num_window-(k+1))*self.window_size)
|
||||
attn_tmp = torch.cat((attn_tmp, F.pad(attn[k*self.window_size:(k+1)*self.window_size,:,:,:],p2d)),dim=2)
|
||||
|
||||
return attn_tmp
|
||||
|
||||
|
||||
class AttentionLayerCrossWin(nn.Module):
|
||||
def __init__(self, attention, d_model, n_heads,
|
||||
d_keys=None, d_values=None, mix=False, layer_num=0,
|
||||
num_windows=4, output_attention=False):
|
||||
super(AttentionLayerCrossWin, self).__init__()
|
||||
d_keys = d_keys or (d_model//n_heads)
|
||||
d_values = d_values or (d_model//n_heads)
|
||||
|
||||
self.inner_attention = attention
|
||||
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
|
||||
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
|
||||
self.value_projection = nn.Linear(d_model, d_values * n_heads)
|
||||
self.out_projection = nn.Linear(d_values * n_heads, d_model)
|
||||
|
||||
self.n_heads = n_heads
|
||||
self.mix = mix
|
||||
self.layer_num = layer_num
|
||||
self.num_windows = num_windows
|
||||
self.output_attn = output_attention
|
||||
def forward(self, queries, keys, values, attn_mask):
|
||||
|
||||
B, L, _ = queries.shape
|
||||
_, S, _ = keys.shape
|
||||
H = self.n_heads
|
||||
|
||||
queries = self.query_projection(queries).view(B, L, H, -1)
|
||||
keys = self.key_projection(keys).view(B, S, H, -1)
|
||||
values = self.value_projection(values).view(B, S, H, -1)
|
||||
|
||||
#Partition the vectors into windows
|
||||
queries = queries.view(B*self.num_windows, L//self.num_windows, H, -1)
|
||||
keys = keys.view(B*self.num_windows, S//self.num_windows, H, -1)
|
||||
values = values.view(B*self.num_windows, S//self.num_windows, H, -1)
|
||||
|
||||
out, attn = self.inner_attention(
|
||||
queries,
|
||||
keys,
|
||||
values,
|
||||
attn_mask
|
||||
)
|
||||
|
||||
if self.output_attn:
|
||||
attn = self._output_attn(L, attn)
|
||||
|
||||
out = out.view(B, L, H, -1)
|
||||
|
||||
if self.mix:
|
||||
out = out.transpose(2, 1).contiguous()
|
||||
out = out.view(B, L, -1)
|
||||
|
||||
return self.out_projection(out), attn
|
||||
|
||||
def _output_attn(self,L, attn):
|
||||
window_size = L//self.num_windows
|
||||
|
||||
for k in range(self.num_window):
|
||||
if k==0:
|
||||
p2d = (0,((self.num_windows-(k+1))*window_size))
|
||||
attn_tmp = F.pad(attn[:window_size,:,:,:],p2d)
|
||||
else:
|
||||
p2d = (k*window_size, (self.num_windows-(k+1))*window_size)
|
||||
attn_tmp = torch.cat((attn_tmp, F.pad(attn[k*window_size:(k+1)*window_size,:,:,:],p2d)),dim=2)
|
||||
|
||||
return attn_tmp
|
||||
|
|
@ -0,0 +1,130 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow.keras import layers
|
||||
|
||||
class AttentionLayerWin(tf.keras.layers.Layer):
|
||||
def __init__(self, attention, d_model, n_heads,
|
||||
d_keys=None, d_values=None, mix=False, layer_num=0,
|
||||
window_size=8, output_attention=False):
|
||||
super(AttentionLayerWin, self).__init__()
|
||||
d_keys = d_keys or (d_model // n_heads)
|
||||
d_values = d_values or (d_model // n_heads)
|
||||
|
||||
self.inner_attention = attention
|
||||
self.query_projection = layers.Dense(d_keys * n_heads)
|
||||
self.key_projection = layers.Dense(d_keys * n_heads)
|
||||
self.value_projection = layers.Dense(d_values * n_heads)
|
||||
self.out_projection = layers.Dense(d_model)
|
||||
|
||||
self.n_heads = n_heads
|
||||
self.mix = mix
|
||||
self.layer_num = layer_num
|
||||
self.window_size = window_size
|
||||
self.output_attn = output_attention
|
||||
|
||||
def call(self, queries, keys, values, attn_mask):
|
||||
B, L, _ = queries.shape
|
||||
_, S, _ = keys.shape
|
||||
H = self.n_heads
|
||||
|
||||
queries = tf.reshape(self.query_projection(queries), (B, L, H, -1))
|
||||
keys = tf.reshape(self.key_projection(keys), (B, S, H, -1))
|
||||
values = tf.reshape(self.value_projection(values), (B, S, H, -1))
|
||||
|
||||
queries = tf.reshape(queries, (B * (L // self.window_size), self.window_size, H, -1))
|
||||
keys = tf.reshape(keys, (B * (S // self.window_size), self.window_size, H, -1))
|
||||
values = tf.reshape(values, (B * (S // self.window_size), self.window_size, H, -1))
|
||||
|
||||
out, attn = self.inner_attention(
|
||||
queries,
|
||||
keys,
|
||||
values,
|
||||
attn_mask
|
||||
)
|
||||
|
||||
if self.output_attn:
|
||||
attn = self._output_attn(L, attn)
|
||||
|
||||
out = tf.reshape(out, (B, L, H, -1))
|
||||
|
||||
if self.mix:
|
||||
out = tf.transpose(out, (0, 2, 1, 3))
|
||||
out = tf.reshape(out, (B, L, -1))
|
||||
|
||||
return self.out_projection(out), attn
|
||||
|
||||
def _output_attn(self, L, attn):
|
||||
num_window = L // self.window_size
|
||||
|
||||
for k in range(num_window):
|
||||
if k == 0:
|
||||
p2d = ((0, ((num_window - (k + 1)) * self.window_size)), (0, 0))
|
||||
attn_tmp = tf.pad(attn[:self.window_size, :, :, :], p2d)
|
||||
else:
|
||||
p2d = ((k * self.window_size, (num_window - (k + 1)) * self.window_size), (0, 0))
|
||||
attn_tmp = tf.concat((attn_tmp, tf.pad(attn[k * self.window_size:(k + 1) * self.window_size, :, :, :], p2d)), axis=2)
|
||||
|
||||
return attn_tmp
|
||||
|
||||
class AttentionLayerCrossWin(tf.keras.layers.Layer):
|
||||
def __init__(self, attention, d_model, n_heads,
|
||||
d_keys=None, d_values=None, mix=False, layer_num=0,
|
||||
num_windows=4, output_attention=False):
|
||||
super(AttentionLayerCrossWin, self).__init__()
|
||||
d_keys = d_keys or (d_model // n_heads)
|
||||
d_values = d_values or (d_model // n_heads)
|
||||
|
||||
self.inner_attention = attention
|
||||
self.query_projection = layers.Dense(d_keys * n_heads)
|
||||
self.key_projection = layers.Dense(d_keys * n_heads)
|
||||
self.value_projection = layers.Dense(d_values * n_heads)
|
||||
self.out_projection = layers.Dense(d_model)
|
||||
|
||||
self.n_heads = n_heads
|
||||
self.mix = mix
|
||||
self.layer_num = layer_num
|
||||
self.num_windows = num_windows
|
||||
self.output_attn = output_attention
|
||||
|
||||
def call(self, queries, keys, values, attn_mask):
|
||||
B, L, _ = queries.shape
|
||||
_, S, _ = keys.shape
|
||||
H = self.n_heads
|
||||
|
||||
queries = tf.reshape(self.query_projection(queries), (B, L, H, -1))
|
||||
keys = tf.reshape(self.key_projection(keys), (B, S, H, -1))
|
||||
values = tf.reshape(self.value_projection(values), (B, S, H, -1))
|
||||
|
||||
queries = tf.reshape(queries, (B * self.num_windows, L // self.num_windows, H, -1))
|
||||
keys = tf.reshape(keys, (B * self.num_windows, S // self.num_windows, H, -1))
|
||||
values = tf.reshape(values, (B * self.num_windows, S // self.num_windows, H, -1))
|
||||
|
||||
out, attn = self.inner_attention(
|
||||
queries,
|
||||
keys,
|
||||
values,
|
||||
attn_mask
|
||||
)
|
||||
|
||||
if self.output_attn:
|
||||
attn = self._output_attn(L, attn)
|
||||
|
||||
out = tf.reshape(out, (B, L, H, -1))
|
||||
|
||||
if self.mix:
|
||||
out = tf.transpose(out, (0, 2, 1, 3))
|
||||
out = tf.reshape(out, (B, L, -1))
|
||||
|
||||
return self.out_projection(out), attn
|
||||
|
||||
def _output_attn(self, L, attn):
|
||||
window_size = L // self.num_windows
|
||||
|
||||
for k in range(self.num_windows):
|
||||
if k == 0:
|
||||
p2d = ((0, ((self.num_windows - (k + 1)) * window_size)), (0, 0))
|
||||
attn_tmp = tf.pad(attn[:window_size, :, :, :], p2d)
|
||||
else:
|
||||
p2d = ((k * window_size, (self.num_windows - (k + 1)) * window_size), (0, 0))
|
||||
attn_tmp = tf.concat((attn_tmp, tf.pad(attn[k * window_size:(k + 1) * window_size, :, :, :], p2d)), axis=2)
|
||||
|
||||
return attn_tmp
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
import numpy as np
|
||||
|
||||
'''
|
||||
参考:
|
||||
[1]https://arxiv.org/pdf/2307.00493v1.pdf
|
||||
[2]https://github.com/nhatthanhtran/FWin2023/blob/main/models/model.py
|
||||
'''
|
||||
|
||||
class FourierMix(nn.Module):
|
||||
def __init__(self, d_model):
|
||||
super().__init__()
|
||||
self.fourier = FourierLayer(1, 2)
|
||||
self.norm = nn.LayerNorm(d_model)
|
||||
|
||||
def forward(self, x, attn_mask=None):
|
||||
x = self.fourier(x)
|
||||
x = self.norm(x)
|
||||
return x, None
|
||||
|
||||
|
||||
class FourierLayer(nn.Module):
|
||||
def __init__(self, seq_dim=1, hidden_dim=2):
|
||||
super().__init__()
|
||||
self.seq_dim = seq_dim
|
||||
self.hidden_dim = hidden_dim
|
||||
|
||||
def forward(self, x):
|
||||
B, L, D = x.shape
|
||||
return torch.real(torch.fft.fft(torch.fft.fft(x, dim=self.hidden_dim), dim=self.seq_dim))
|
||||
|
||||
|
||||
class FeedFoward(nn.Module):
|
||||
def __init__(self, d_model, dropout=0.1, activation="relu") -> None:
|
||||
super().__init__()
|
||||
self.fc1 = nn.Linear(d_model, d_model)
|
||||
self.activation = F.relu if activation == "relu" else F.gelu
|
||||
self.fc2 = nn.Linear(d_model, d_model)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.fc1(x)
|
||||
x = self.dropout(x)
|
||||
x = self.activation(x)
|
||||
x = self.dropout(self.fc2(x))
|
||||
|
||||
return x
|
||||
|
||||
|
||||
class FNetLayer(nn.Module):
|
||||
def __init__(self, d_model, dropout=0.1, activation="relu"):
|
||||
super().__init__()
|
||||
self.fourier = FourierLayer(1, 2)
|
||||
self.feedforward = FeedFoward(d_model, dropout, activation)
|
||||
self.dropout = nn.Dropout(dropout)
|
||||
self.norm1 = nn.LayerNorm(d_model)
|
||||
self.norm2 = nn.LayerNorm(d_model)
|
||||
|
||||
def forward(self, x, attn_mask=None):
|
||||
new_x = self.fourier(x)
|
||||
x = x + self.dropout(new_x)
|
||||
|
||||
x = self.norm1(x)
|
||||
x = x + self.feedforward(x)
|
||||
return self.norm2(x), None
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
import tensorflow as tf
|
||||
from tensorflow.keras import layers
|
||||
|
||||
class FourierLayer(tf.keras.layers.Layer):
|
||||
def __init__(self, seq_dim=1, hidden_dim=2):
|
||||
super(FourierLayer, self).__init__()
|
||||
self.seq_dim = seq_dim
|
||||
self.hidden_dim = hidden_dim
|
||||
|
||||
def call(self, x):
|
||||
B, L, D = x.shape
|
||||
return tf.math.real(
|
||||
tf.signal.fft(tf.signal.fft(x, axis=self.hidden_dim), axis=self.seq_dim)
|
||||
)
|
||||
|
||||
class FeedForward(tf.keras.layers.Layer):
|
||||
def __init__(self, d_model, dropout=0.1, activation="relu"):
|
||||
super(FeedForward, self).__init__()
|
||||
self.fc1 = layers.Dense(d_model)
|
||||
self.activation = layers.ReLU() if activation == "relu" else layers.GELU()
|
||||
self.fc2 = layers.Dense(d_model)
|
||||
self.dropout = layers.Dropout(dropout)
|
||||
|
||||
def call(self, x):
|
||||
x = self.fc1(x)
|
||||
x = self.dropout(x)
|
||||
x = self.activation(x)
|
||||
x = self.dropout(self.fc2(x))
|
||||
return x
|
||||
|
||||
class FNetLayer(tf.keras.layers.Layer):
|
||||
def __init__(self, d_model, dropout=0.1, activation="relu"):
|
||||
super(FNetLayer, self).__init__()
|
||||
self.fourier = FourierLayer(seq_dim=1, hidden_dim=2)
|
||||
self.feedforward = FeedForward(d_model, dropout, activation)
|
||||
self.dropout = layers.Dropout(dropout)
|
||||
self.norm1 = layers.LayerNormalization()
|
||||
self.norm2 = layers.LayerNormalization()
|
||||
|
||||
def call(self, x, attn_mask=None):
|
||||
new_x = self.fourier(x)
|
||||
x = x + self.dropout(new_x)
|
||||
|
||||
x = self.norm1(x)
|
||||
x = x + self.feedforward(x)
|
||||
return self.norm2(x), None
|
||||
|
||||
class FourierMix(tf.keras.layers.Layer):
|
||||
def __init__(self, d_model):
|
||||
super(FourierMix, self).__init__()
|
||||
self.fourier = FourierLayer(seq_dim=1, hidden_dim=2)
|
||||
self.norm = layers.LayerNormalization()
|
||||
|
||||
def call(self, x, attn_mask=None):
|
||||
x = self.fourier(x)
|
||||
x = self.norm(x)
|
||||
return x, None
|
||||
Loading…
Reference in New Issue