增加一些scala和python相关

This commit is contained in:
dingjiawen@xiaomi.com 2023-08-16 15:36:26 +08:00
parent aa79b3d094
commit a9c30c8a91
16 changed files with 1007 additions and 35 deletions

View File

@ -0,0 +1,20 @@
package com.atguigu.spark.core.rdd.operator.transform
import org.apache.spark.api.java.function.FilterFunction
/**
* @ClassName aaa.java
* @author dingjiawen@xiaomi.com
* @version 1.0.0
* @Description TODO
* @createTime 2023-07-19 18:44:00
*/
class AAA(bbb: Int) extends FilterFunction[Int] {
val cc = bbb
override def call(t: Int): Boolean = {
println(cc)
return true
}
}

View File

@ -0,0 +1,18 @@
package com.atguigu.spark.core.rdd.operator.transform
/**
* @ClassName aaa.java
* @author dingjiawen@xiaomi.com
* @version 1.0.0
* @Description TODO
* @createTime 2023-07-19 18:44:00
*/
class CCC(bbb: Int) extends (Int => Boolean) with Serializable {
val cc = bbb
override def apply(t: Int): Boolean = {
println(cc)
return true
}
}

View File

@ -0,0 +1,69 @@
package com.atguigu.spark.core.rdd.operator.transform
import com.fasterxml.jackson.databind.ObjectMapper
import net.jpountz.xxhash.{XXHash64, XXHashFactory}
import org.apache.log4j.Logger
import org.apache.spark.api.java.function.FilterFunction
import java.nio.ByteBuffer
/**
* created by likunyi@xiaomi.com
* at 2022-04-29 15:32:00
* 该算子会对收到的数据进行抽样, 留下一部分数据, 丢掉另一部分数据
*
* samplingRate是采样率, 只能是整数
* 当samplingRate = 10, 代表采样率是10%
* 当samplingRate = 64, 代表采样率是64%
*/
class HdfsToHolo_MessageSampler1(samplingRate: Int) extends FilterFunction[String] {
@transient private var objectMapper: ObjectMapper = new ObjectMapper()
@transient private val greatestCommonDivisor = 100
@transient private val NUMERATOR: Int = samplingRate / greatestCommonDivisor
@transient private val DENOMINATOR: Int = 100 / greatestCommonDivisor
@transient private val XXHASH_SEED: Long = 0x9747b28c
@transient private val hasher: XXHash64 = XXHashFactory.fastestInstance().hash64()
@transient private val logger: Logger = Logger.getLogger(this.getClass.getName)
//初始化各项参数
// def open(): Tuple5[ObjectMapper, Int, Int, Long, XXHash64] = {
//
// objectMapper = new ObjectMapper()
//
// val numerator = samplingRate // 分子就是采样率
// val denominator = 100 // 分母永远是100
// val greatestCommonDivisor = MathUtils.getGCD(numerator, denominator)
//
// NUMERATOR = numerator / greatestCommonDivisor
// DENOMINATOR = denominator / greatestCommonDivisor
// XXHASH_SEED = 0x9747b28c
// hasher = XXHashFactory.fastestInstance().hash64()
// (objectMapper,NUMERATOR,DENOMINATOR,XXHASH_SEED,hasher)
// } // open
override def call(input: String): Boolean = {
logger.info(s"所有参数【objectMapper${objectMapper}【NUMERATOR${NUMERATOR}【DENOMINATOR${DENOMINATOR}【hasher${hasher}")
logger.info(s"${input}")
// 如果不抽样(即抽样率是100%), 则直接将数据传递给下游, 不做任何处理
if (samplingRate == 100) {
return true
}
val currentMessage = objectMapper.readTree(input)
// 如需抽样(即抽样率不是100%), 则使用distinct_id的哈希值去做抽样
if (hash(currentMessage.get("distinct_id").asText()) % DENOMINATOR < NUMERATOR) { // 粗略地讲: 若采样率是64%, 则就将distinct_id的哈希值对100取余, 余数为0到63的都留下, 余数为64到99的都扔掉
return true
} else {
return false
}
}
private def hash(distinct_id: String): Long = {
Math.abs(hasher.hash(ByteBuffer.wrap(distinct_id.getBytes("UTF-8")), XXHASH_SEED))
} // hash
}

View File

@ -1,9 +1,5 @@
package com.atguigu.spark.core.rdd.operator.transform
import java.text.SimpleDateFormat
import java.util.Date
import org.apache.spark.rdd.RDD
import org.apache.spark.{SparkConf, SparkContext}
/**
@ -15,14 +11,18 @@ object Spark08_RDD_Operator_Transform {
val sparkConf = new SparkConf().setMaster("local[*]").setAppName("Operator")
//创建上下文环境对象
val sc =new SparkContext(sparkConf)
val sc = new SparkContext(sparkConf)
//TODO 算子 - filter
val rdd = sc.makeRDD(
List(1,2,3,4)
List(1, 2, 3, 4)
)
val filterRDD = rdd.filter(_ % 2 == 0)
val filterRDD = rdd.filter(
new CCC(10)
)
// val filterRDD = rdd.filter(_ % 2 == 0)
filterRDD.collect().foreach(println(_))

View File

@ -0,0 +1,102 @@
package com.atguigu.gmall.realtime.app.dwm
import scala.collection.mutable
/**
* @ClassName GenerateRPN.java
* @author dingjiawen@xiaomi.com
* @version 1.0.0
* @Description TODO
* @createTime 2023-06-30 17:27:00
*/
object GenerateRPN {
def main(args: Array[String]): Unit = {
//测试用例
//String str = "1+2*3-4*5-6+7*8-9"; //123*+45*-6-78*+9-
// var str = "1 + 2 * 3 - 4 * 5 - (6 + 7 * 8 - 9)"; //123*+45*-6-78*+9-
// var str = "5 + 2 * 3"; //123*+45*-6-78*+9-
var str = "6 * ( 5 + ( 2 + 3 ) * 8 + 3 )"; //6523+8*+3+*
var RPNStack: Array[String] = generateRPN(str)
println(RPNStack.mkString(","))
println(evalRPN(RPNStack))
}
def generateRPN(expression: String): Array[String] = {
val precedence = Map("+" -> 1, "-" -> 1, "*" -> 2, "/" -> 2)
val output = mutable.Stack[String]()
val stack = mutable.Stack[String]()
def isOperator(token: String): Boolean = {
precedence.contains(token) }
def hasHigherPrecedence(op1: String, op2: String): Boolean = {
precedence(op1) >= precedence(op2)
}
def processOperator(op: String): Unit = {
while (stack.nonEmpty && isOperator(stack.top) && hasHigherPrecedence(stack.top, op)) {
output.push(stack.pop())
}
stack.push(op)
}
def processOperand(operand: String): Unit = {
output.push(operand)
}
def processParenthesis(): Unit = {
while (stack.nonEmpty && stack.top != "(") {
output.push(stack.pop())
}
stack.pop() // 弹出左括号
}
for (token <- expression.split("\\s+")) {
token match {
case "(" => stack.push(token)
case ")" => processParenthesis()
case t if isOperator(t) => processOperator(t)
case _ => processOperand(token)
}
}
while (stack.nonEmpty) {
output.push(stack.pop())
}
output.toArray.reverse
}
def evalRPN(tokens: Array[String]): Int = {
val stack = mutable.Stack[Int]()
for (token <- tokens) {
if (isOperator(token)) {
val operand2 = stack.pop()
val operand1 = stack.pop()
val result = performOperation(token, operand1, operand2)
stack.push(result)
} else {
stack.push(token.toInt)
}
}
stack.pop()
}
def isOperator(token: String): Boolean = {
token == "+" || token == "-" || token == "*" || token == "/"
}
def performOperation(operator: String, operand1: Int, operand2: Int): Int = {
operator match {
case "+" => operand1 + operand2
case "-" => operand1 - operand2
case "*" => operand1 * operand2
case "/" => operand1 / operand2
}
}
}

View File

@ -1,6 +1,6 @@
package com.atguigu.scala.chapter03
import com.atguigu.scala.test.User
import com.atguigu.scala.test1.User
object Scala02_Oper {

View File

@ -1,6 +1,6 @@
package com.atguigu.scala.chapter06
import com.atguigu.scala.test.ScalaUser
import com.atguigu.scala.test1.ScalaUser
object Scala09_Object_Instance_4 {

View File

@ -1,6 +1,6 @@
package com.atguigu.scala.chapter06
import com.atguigu.scala.test.ScalaUser
import com.atguigu.scala.test1.ScalaUser
object Scala09_Object_Instance_5 {

View File

@ -14,7 +14,7 @@ object Scala05_Transform extends Parent with MyTrait {
//TODO 3.特征或伴生对象
//TODO 4.其他地方声明(包对象)
//TODO 5.直接导入
import com.atguigu.scala.test.TestTransform._
import com.atguigu.scala.test1.TestTransform._
val user=new User()
user.insertUser()
user.updateUser()

View File

@ -1,38 +1,35 @@
package com.markilue.leecode.test;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Scanner;
public class Fibonaqi {
/**
* 测试使用时间复杂度为n的斐波那契数列递归法
*
*/
// @Test
// public static void testFibonaqi(){
//
// }
public static void main(String[] args) {
int n=5;
System.out.println(fibonacci(1,1,10));
int n = 5;
System.out.println(fibonacci(1, 1, 10));
Arrays.copyOf()
}
public static int fibonacci(int first,int second,int n){
if(n<=0){
public static int fibonacci(int first, int second, int n) {
if (n <= 0) {
return 0;
}
if(n <3){
if (n < 3) {
return 1;
}else if(n==3){
return first+second;
}
else {
return fibonacci(second,first+second,n-1);
} else if (n == 3) {
return first + second;
} else {
return fibonacci(second, first + second, n - 1);
}
}
}

View File

@ -0,0 +1,188 @@
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import os
import sys
import pandas as pd
from math import sqrt
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model, Input, layers
from tensorflow.keras.models import load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import LSTM, SimpleRNN, GRU
from tensorflow.keras.layers import Dense, Dropout, concatenate
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
# adding data_processing to the system path
sys.path.insert(0, '/aul/homes/jshi008/IvyProjects/SimDL/data_processing')
from preprocessing import ws_preprocessing, pm25_preprocessing, ele_preprocessing
'''
参考
[1] https://arxiv.org/pdf/2305.04876v3.pdf
[2] https://github.com/JimengShi/ParaRCNN-Time-Series-Forecasting/blob/main/models/ParaRCNN_shifting_sc.py
'''
# set gpu
os.environ["CUDA_VISIBLE_DEVICES"]="3"
# get the preprocessed train_X, train_y, test_X, test_y from the dataset
n_hours = 72
K = 24
S = 24
### pick one of datasets
# train_X, train_y, test_X, test_y, scaler = ws_preprocessing(n_hours, K, S)
# train_X, train_y, test_X, test_y, scaler = pm25_preprocessing(n_hours, K)
train_X, train_y, test_X, test_y, scaler = ele_preprocessing(n_hours, K)
# build model
model_input = Input(shape=(train_X.shape[1], train_X.shape[2]))
### 4 cnn skip connection
x1 = layers.Conv1D(filters=256,
kernel_size=2,
activation='relu',
padding='same',
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(model_input)
x1_merge1 = concatenate([model_input, x1])
x1 = layers.Conv1D(filters=128,
kernel_size=2,
activation='relu',
padding='same',
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x1_merge1)
x1_merge2 = concatenate([model_input, x1])
x1 = layers.Conv1D(filters=64,
kernel_size=2,
activation='relu',
padding='same',
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x1_merge2)
x1_merge3 = concatenate([model_input, x1])
x1 = layers.Conv1D(filters=32,
kernel_size=2,
activation='relu',
padding='same',
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x1_merge3)
### 4 rnn skip connection
x2 = layers.SimpleRNN(128,
activation='relu',
return_sequences=True,
recurrent_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(model_input)
x2_merge1 = concatenate([model_input, x2])
x2 = layers.SimpleRNN(64,
activation='relu',
return_sequences=True,
recurrent_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x2_merge1)
x2_merge2 = concatenate([model_input, x2])
x2 = layers.SimpleRNN(32,
activation='relu',
return_sequences=True,
recurrent_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x2_merge2)
x2_merge3 = concatenate([model_input, x2])
x2 = layers.SimpleRNN(16,
activation='relu',
return_sequences=True,
recurrent_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5),
kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x2_merge3)
merge = concatenate([model_input, x1, x2])
x = layers.Flatten()(merge)
### uncomment the 3 rows below for pm25 and energy electricity dataset, comment them for water stage dataset
x = Dense(256, activation='relu', kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x)
x = Dense(128, activation='relu', kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x)
x = Dense(128, activation='relu', kernel_regularizer=keras.regularizers.L1L2(l1=1e-5, l2=1e-5))(x)
x = Dense(train_y.shape[1])(x)
model = Model(model_input, x)
model.summary()
# training
lr = 0.00001
EPOCHS = 8000
lr_schedule = keras.optimizers.schedules.ExponentialDecay(initial_learning_rate=5e-4,
decay_steps=10000,
decay_rate=0.99)
model.compile(
# optimizer=Adam(learning_rate=lr, decay=lr/EPOCHS),
optimizer=Adam(learning_rate=lr_schedule),
loss='mse',
metrics=['mae'])
es = EarlyStopping(monitor='val_loss', mode='min', verbose=2, patience=1500)
mc = ModelCheckpoint('saved_model/ParaRCNN_shifting_s{}_k{}_sc.h5'.format(S, K), monitor='val_mae', mode='min', verbose=2, save_best_only=True)
history = model.fit(train_X, train_y,
batch_size=512,
epochs=EPOCHS,
validation_data=(test_X, test_y),
verbose=2,
shuffle=True,
callbacks=[es, mc])
plt.rcParams["figure.figsize"] = (8, 6)
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xlabel('Epoch', fontsize=16)
plt.ylabel('Loss', fontsize=16)
plt.legend(fontsize=14)
plt.title("Training loss vs Testing loss", fontsize=18)
# plt.savefig('graph/rnn_loss.png', dpi=300)
plt.show()
# Performance on test data
saved_model = load_model('saved_model/ParaRCNN_shifting_s{}_k{}_sc.h5'.format(S, K))
yhat = saved_model.predict(test_X)
inv_yhat = scaler.inverse_transform(yhat)
inv_y = scaler.inverse_transform(test_y)
inv_yhat = pd.DataFrame(inv_yhat)
inv_y = pd.DataFrame(inv_y)
print('MAE = {}'.format(float("{:.4f}".format(mae(inv_yhat.iloc[:, :], inv_y.iloc[:, :])))))
print('RMSE = {}'.format(float("{:.4f}".format(sqrt(mse(inv_yhat.iloc[:, :], inv_y.iloc[:, :]))))))

View File

@ -0,0 +1,320 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from math import sqrt
from utils.masking import TriangularCausalMask, ProbMask
'''
参考
[1]https://arxiv.org/pdf/2307.00493v1.pdf
[2]https://github.com/nhatthanhtran/FWin2023/blob/main/models/model.py
'''
class FullAttention(nn.Module):
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
super(FullAttention, self).__init__()
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def forward(self, queries, keys, values, attn_mask):
B, L, H, E = queries.shape
_, S, _, D = values.shape
scale = self.scale or 1./sqrt(E)
scores = torch.einsum("blhe,bshe->bhls", queries, keys)
if self.mask_flag:
if attn_mask is None:
attn_mask = TriangularCausalMask(B, L, device=queries.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
A = self.dropout(torch.softmax(scale * scores, dim=-1))
V = torch.einsum("bhls,bshd->blhd", A, values)
if self.output_attention:
return (V.contiguous(), A)
else:
return (V.contiguous(), None)
class ProbAttention(nn.Module):
def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
super(ProbAttention, self).__init__()
self.factor = factor
self.scale = scale
self.mask_flag = mask_flag
self.output_attention = output_attention
self.dropout = nn.Dropout(attention_dropout)
def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
# Q [B, H, L, D]
B, H, L_K, E = K.shape
_, _, L_Q, _ = Q.shape
# calculate the sampled Q_K
K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
# real U = U_part(factor*ln(L_k))*L_q
index_sample = torch.randint(L_K, (L_Q, sample_k))
# print(index_sample.shape)
K_sample = K_expand[:, :, torch.arange(
L_Q).unsqueeze(1), index_sample, :]
Q_K_sample = torch.matmul(
Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze(-2)
# find the Top_k query with sparisty measurement
M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
M_top = M.topk(n_top, sorted=False)[1]
# use the reduced Q to calculate Q_K
Q_reduce = Q[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
M_top, :] # factor*ln(L_q)
Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
return Q_K, M_top
def _get_initial_context(self, V, L_Q):
B, H, L_V, D = V.shape
if not self.mask_flag:
# V_sum = V.sum(dim=-2)
V_sum = V.mean(dim=-2)
contex = V_sum.unsqueeze(-2).expand(B, H,
L_Q, V_sum.shape[-1]).clone()
else: # use mask
# requires that L_Q == L_V, i.e. for self-attention only
assert(L_Q == L_V)
contex = V.cumsum(dim=-2)
return contex
def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
B, H, L_V, D = V.shape
if self.mask_flag:
attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
scores.masked_fill_(attn_mask.mask, -np.inf)
attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
context_in[torch.arange(B)[:, None, None],
torch.arange(H)[None, :, None],
index, :] = torch.matmul(attn, V).type_as(context_in)
if self.output_attention:
attns = (torch.ones([B, H, L_V, L_V]) /
L_V).type_as(attn).to(attn.device)
attns[torch.arange(B)[:, None, None], torch.arange(H)[
None, :, None], index, :] = attn
return (context_in, attns)
else:
return (context_in, None)
def forward(self, queries, keys, values, attn_mask):
B, L_Q, H, D = queries.shape
_, L_K, _, _ = keys.shape
queries = queries.transpose(2, 1)
keys = keys.transpose(2, 1)
values = values.transpose(2, 1)
U_part = self.factor * \
np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
u = self.factor * \
np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q)
U_part = U_part if U_part < L_K else L_K
u = u if u < L_Q else L_Q
scores_top, index = self._prob_QK(
queries, keys, sample_k=U_part, n_top=u)
# add scale factor
scale = self.scale or 1./sqrt(D)
if scale is not None:
scores_top = scores_top * scale
# get the context
context = self._get_initial_context(values, L_Q)
# update the context with selected top_k queries
context, attn = self._update_context(
context, values, scores_top, index, L_Q, attn_mask)
return context.transpose(2, 1).contiguous(), attn
class AttentionLayer(nn.Module):
def __init__(self, attention, d_model, n_heads,
d_keys=None, d_values=None, mix=False, layer_num=0):
super(AttentionLayer, self).__init__()
d_keys = d_keys or (d_model//n_heads)
d_values = d_values or (d_model//n_heads)
self.inner_attention = attention
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Linear(d_model, d_values * n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
self.mix = mix
self.layer_num = layer_num
def forward(self, queries, keys, values, attn_mask):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = self.query_projection(queries).view(B, L, H, -1)
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
out, attn = self.inner_attention(
queries,
keys,
values,
attn_mask
)
if self.mix:
out = out.transpose(2, 1).contiguous()
out = out.view(B, L, -1)
return self.out_projection(out), attn
class AttentionLayerWin(nn.Module):
def __init__(self, attention, d_model, n_heads,
d_keys=None, d_values=None, mix=False, layer_num=0,
window_size=8, output_attention=False):
super(AttentionLayerWin, self).__init__()
d_keys = d_keys or (d_model//n_heads)
d_values = d_values or (d_model//n_heads)
self.inner_attention = attention
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Linear(d_model, d_values * n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
self.mix = mix
self.layer_num = layer_num
self.window_size = window_size
self.output_attn = output_attention
def forward(self, queries, keys, values, attn_mask):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = self.query_projection(queries).view(B, L, H, -1)
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
#Partition the vectors into windows
queries = queries.view(B*(L//self.window_size), self.window_size, H, -1)
keys = keys.view(B*(S//self.window_size), self.window_size, H, -1)
values = values.view(B*(S//self.window_size), self.window_size, H, -1)
out, attn = self.inner_attention(
queries,
keys,
values,
attn_mask
)
if self.output_attn:
attn = self._output_attn(L, attn)
out = out.view(B, L, H, -1)
if self.mix:
out = out.transpose(2, 1).contiguous()
out = out.view(B, L, -1)
return self.out_projection(out), attn
def _output_attn(self,L, attn):
num_window = L//self.window_size
for k in range(num_window):
if k==0:
p2d = (0,((num_window-(k+1))*self.window_size))
attn_tmp = F.pad(attn[:self.window_size,:,:,:],p2d)
else:
p2d = (k*self.window_size, (num_window-(k+1))*self.window_size)
attn_tmp = torch.cat((attn_tmp, F.pad(attn[k*self.window_size:(k+1)*self.window_size,:,:,:],p2d)),dim=2)
return attn_tmp
class AttentionLayerCrossWin(nn.Module):
def __init__(self, attention, d_model, n_heads,
d_keys=None, d_values=None, mix=False, layer_num=0,
num_windows=4, output_attention=False):
super(AttentionLayerCrossWin, self).__init__()
d_keys = d_keys or (d_model//n_heads)
d_values = d_values or (d_model//n_heads)
self.inner_attention = attention
self.query_projection = nn.Linear(d_model, d_keys * n_heads)
self.key_projection = nn.Linear(d_model, d_keys * n_heads)
self.value_projection = nn.Linear(d_model, d_values * n_heads)
self.out_projection = nn.Linear(d_values * n_heads, d_model)
self.n_heads = n_heads
self.mix = mix
self.layer_num = layer_num
self.num_windows = num_windows
self.output_attn = output_attention
def forward(self, queries, keys, values, attn_mask):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = self.query_projection(queries).view(B, L, H, -1)
keys = self.key_projection(keys).view(B, S, H, -1)
values = self.value_projection(values).view(B, S, H, -1)
#Partition the vectors into windows
queries = queries.view(B*self.num_windows, L//self.num_windows, H, -1)
keys = keys.view(B*self.num_windows, S//self.num_windows, H, -1)
values = values.view(B*self.num_windows, S//self.num_windows, H, -1)
out, attn = self.inner_attention(
queries,
keys,
values,
attn_mask
)
if self.output_attn:
attn = self._output_attn(L, attn)
out = out.view(B, L, H, -1)
if self.mix:
out = out.transpose(2, 1).contiguous()
out = out.view(B, L, -1)
return self.out_projection(out), attn
def _output_attn(self,L, attn):
window_size = L//self.num_windows
for k in range(self.num_window):
if k==0:
p2d = (0,((self.num_windows-(k+1))*window_size))
attn_tmp = F.pad(attn[:window_size,:,:,:],p2d)
else:
p2d = (k*window_size, (self.num_windows-(k+1))*window_size)
attn_tmp = torch.cat((attn_tmp, F.pad(attn[k*window_size:(k+1)*window_size,:,:,:],p2d)),dim=2)
return attn_tmp

View File

@ -0,0 +1,130 @@
import tensorflow as tf
from tensorflow.keras import layers
class AttentionLayerWin(tf.keras.layers.Layer):
def __init__(self, attention, d_model, n_heads,
d_keys=None, d_values=None, mix=False, layer_num=0,
window_size=8, output_attention=False):
super(AttentionLayerWin, self).__init__()
d_keys = d_keys or (d_model // n_heads)
d_values = d_values or (d_model // n_heads)
self.inner_attention = attention
self.query_projection = layers.Dense(d_keys * n_heads)
self.key_projection = layers.Dense(d_keys * n_heads)
self.value_projection = layers.Dense(d_values * n_heads)
self.out_projection = layers.Dense(d_model)
self.n_heads = n_heads
self.mix = mix
self.layer_num = layer_num
self.window_size = window_size
self.output_attn = output_attention
def call(self, queries, keys, values, attn_mask):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = tf.reshape(self.query_projection(queries), (B, L, H, -1))
keys = tf.reshape(self.key_projection(keys), (B, S, H, -1))
values = tf.reshape(self.value_projection(values), (B, S, H, -1))
queries = tf.reshape(queries, (B * (L // self.window_size), self.window_size, H, -1))
keys = tf.reshape(keys, (B * (S // self.window_size), self.window_size, H, -1))
values = tf.reshape(values, (B * (S // self.window_size), self.window_size, H, -1))
out, attn = self.inner_attention(
queries,
keys,
values,
attn_mask
)
if self.output_attn:
attn = self._output_attn(L, attn)
out = tf.reshape(out, (B, L, H, -1))
if self.mix:
out = tf.transpose(out, (0, 2, 1, 3))
out = tf.reshape(out, (B, L, -1))
return self.out_projection(out), attn
def _output_attn(self, L, attn):
num_window = L // self.window_size
for k in range(num_window):
if k == 0:
p2d = ((0, ((num_window - (k + 1)) * self.window_size)), (0, 0))
attn_tmp = tf.pad(attn[:self.window_size, :, :, :], p2d)
else:
p2d = ((k * self.window_size, (num_window - (k + 1)) * self.window_size), (0, 0))
attn_tmp = tf.concat((attn_tmp, tf.pad(attn[k * self.window_size:(k + 1) * self.window_size, :, :, :], p2d)), axis=2)
return attn_tmp
class AttentionLayerCrossWin(tf.keras.layers.Layer):
def __init__(self, attention, d_model, n_heads,
d_keys=None, d_values=None, mix=False, layer_num=0,
num_windows=4, output_attention=False):
super(AttentionLayerCrossWin, self).__init__()
d_keys = d_keys or (d_model // n_heads)
d_values = d_values or (d_model // n_heads)
self.inner_attention = attention
self.query_projection = layers.Dense(d_keys * n_heads)
self.key_projection = layers.Dense(d_keys * n_heads)
self.value_projection = layers.Dense(d_values * n_heads)
self.out_projection = layers.Dense(d_model)
self.n_heads = n_heads
self.mix = mix
self.layer_num = layer_num
self.num_windows = num_windows
self.output_attn = output_attention
def call(self, queries, keys, values, attn_mask):
B, L, _ = queries.shape
_, S, _ = keys.shape
H = self.n_heads
queries = tf.reshape(self.query_projection(queries), (B, L, H, -1))
keys = tf.reshape(self.key_projection(keys), (B, S, H, -1))
values = tf.reshape(self.value_projection(values), (B, S, H, -1))
queries = tf.reshape(queries, (B * self.num_windows, L // self.num_windows, H, -1))
keys = tf.reshape(keys, (B * self.num_windows, S // self.num_windows, H, -1))
values = tf.reshape(values, (B * self.num_windows, S // self.num_windows, H, -1))
out, attn = self.inner_attention(
queries,
keys,
values,
attn_mask
)
if self.output_attn:
attn = self._output_attn(L, attn)
out = tf.reshape(out, (B, L, H, -1))
if self.mix:
out = tf.transpose(out, (0, 2, 1, 3))
out = tf.reshape(out, (B, L, -1))
return self.out_projection(out), attn
def _output_attn(self, L, attn):
window_size = L // self.num_windows
for k in range(self.num_windows):
if k == 0:
p2d = ((0, ((self.num_windows - (k + 1)) * window_size)), (0, 0))
attn_tmp = tf.pad(attn[:window_size, :, :, :], p2d)
else:
p2d = ((k * window_size, (self.num_windows - (k + 1)) * window_size), (0, 0))
attn_tmp = tf.concat((attn_tmp, tf.pad(attn[k * window_size:(k + 1) * window_size, :, :, :], p2d)), axis=2)
return attn_tmp

View File

@ -0,0 +1,71 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
'''
参考
[1]https://arxiv.org/pdf/2307.00493v1.pdf
[2]https://github.com/nhatthanhtran/FWin2023/blob/main/models/model.py
'''
class FourierMix(nn.Module):
def __init__(self, d_model):
super().__init__()
self.fourier = FourierLayer(1, 2)
self.norm = nn.LayerNorm(d_model)
def forward(self, x, attn_mask=None):
x = self.fourier(x)
x = self.norm(x)
return x, None
class FourierLayer(nn.Module):
def __init__(self, seq_dim=1, hidden_dim=2):
super().__init__()
self.seq_dim = seq_dim
self.hidden_dim = hidden_dim
def forward(self, x):
B, L, D = x.shape
return torch.real(torch.fft.fft(torch.fft.fft(x, dim=self.hidden_dim), dim=self.seq_dim))
class FeedFoward(nn.Module):
def __init__(self, d_model, dropout=0.1, activation="relu") -> None:
super().__init__()
self.fc1 = nn.Linear(d_model, d_model)
self.activation = F.relu if activation == "relu" else F.gelu
self.fc2 = nn.Linear(d_model, d_model)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
x = self.fc1(x)
x = self.dropout(x)
x = self.activation(x)
x = self.dropout(self.fc2(x))
return x
class FNetLayer(nn.Module):
def __init__(self, d_model, dropout=0.1, activation="relu"):
super().__init__()
self.fourier = FourierLayer(1, 2)
self.feedforward = FeedFoward(d_model, dropout, activation)
self.dropout = nn.Dropout(dropout)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
def forward(self, x, attn_mask=None):
new_x = self.fourier(x)
x = x + self.dropout(new_x)
x = self.norm1(x)
x = x + self.feedforward(x)
return self.norm2(x), None

View File

@ -0,0 +1,57 @@
import tensorflow as tf
from tensorflow.keras import layers
class FourierLayer(tf.keras.layers.Layer):
def __init__(self, seq_dim=1, hidden_dim=2):
super(FourierLayer, self).__init__()
self.seq_dim = seq_dim
self.hidden_dim = hidden_dim
def call(self, x):
B, L, D = x.shape
return tf.math.real(
tf.signal.fft(tf.signal.fft(x, axis=self.hidden_dim), axis=self.seq_dim)
)
class FeedForward(tf.keras.layers.Layer):
def __init__(self, d_model, dropout=0.1, activation="relu"):
super(FeedForward, self).__init__()
self.fc1 = layers.Dense(d_model)
self.activation = layers.ReLU() if activation == "relu" else layers.GELU()
self.fc2 = layers.Dense(d_model)
self.dropout = layers.Dropout(dropout)
def call(self, x):
x = self.fc1(x)
x = self.dropout(x)
x = self.activation(x)
x = self.dropout(self.fc2(x))
return x
class FNetLayer(tf.keras.layers.Layer):
def __init__(self, d_model, dropout=0.1, activation="relu"):
super(FNetLayer, self).__init__()
self.fourier = FourierLayer(seq_dim=1, hidden_dim=2)
self.feedforward = FeedForward(d_model, dropout, activation)
self.dropout = layers.Dropout(dropout)
self.norm1 = layers.LayerNormalization()
self.norm2 = layers.LayerNormalization()
def call(self, x, attn_mask=None):
new_x = self.fourier(x)
x = x + self.dropout(new_x)
x = self.norm1(x)
x = x + self.feedforward(x)
return self.norm2(x), None
class FourierMix(tf.keras.layers.Layer):
def __init__(self, d_model):
super(FourierMix, self).__init__()
self.fourier = FourierLayer(seq_dim=1, hidden_dim=2)
self.norm = layers.LayerNormalization()
def call(self, x, attn_mask=None):
x = self.fourier(x)
x = self.norm(x)
return x, None