更新adaRNN
This commit is contained in:
parent
14b7cb72a6
commit
1d7bfec0f8
|
|
@ -0,0 +1,361 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
from loss_transfer import TransferLoss
|
||||
import torch.nn.functional as F
|
||||
import tensorflow as tf
|
||||
|
||||
|
||||
'''
|
||||
参考:
|
||||
[1] https://arxiv.org/pdf/2108.04443.pdf
|
||||
[2] https://github.com/kevinding1125/transferlearning/blob/master/code/deep/adarnn/base/AdaRNN.py
|
||||
'''
|
||||
|
||||
|
||||
class AdaRNN(nn.Module):
|
||||
"""
|
||||
model_type: 'Boosting', 'AdaRNN'
|
||||
"""
|
||||
|
||||
def __init__(self, use_bottleneck=False, bottleneck_width=256, n_input=128, n_hiddens=[64, 64], n_output=6, dropout=0.0, len_seq=9, model_type='AdaRNN', trans_loss='mmd'):
|
||||
super(AdaRNN, self).__init__()
|
||||
self.use_bottleneck = use_bottleneck
|
||||
self.n_input = n_input
|
||||
self.num_layers = len(n_hiddens)
|
||||
self.hiddens = n_hiddens
|
||||
self.n_output = n_output
|
||||
self.model_type = model_type
|
||||
self.trans_loss = trans_loss
|
||||
self.len_seq = len_seq
|
||||
in_size = self.n_input
|
||||
|
||||
features = nn.ModuleList()
|
||||
for hidden in n_hiddens:
|
||||
rnn = nn.GRU(
|
||||
input_size=in_size,
|
||||
num_layers=1,
|
||||
hidden_size=hidden,
|
||||
batch_first=True,
|
||||
dropout=dropout
|
||||
)
|
||||
features.append(rnn)
|
||||
in_size = hidden
|
||||
self.features = nn.Sequential(*features)
|
||||
|
||||
if use_bottleneck == True: # finance
|
||||
self.bottleneck = nn.Sequential(
|
||||
nn.Linear(n_hiddens[-1], bottleneck_width),
|
||||
nn.Linear(bottleneck_width, bottleneck_width),
|
||||
nn.BatchNorm1d(bottleneck_width),
|
||||
nn.ReLU(),
|
||||
nn.Dropout(),
|
||||
)
|
||||
self.bottleneck[0].weight.data.normal_(0, 0.005)
|
||||
self.bottleneck[0].bias.data.fill_(0.1)
|
||||
self.bottleneck[1].weight.data.normal_(0, 0.005)
|
||||
self.bottleneck[1].bias.data.fill_(0.1)
|
||||
self.fc = nn.Linear(bottleneck_width, n_output)
|
||||
torch.nn.init.xavier_normal_(self.fc.weight)
|
||||
else:
|
||||
self.fc_out = nn.Linear(n_hiddens[-1], self.n_output)
|
||||
|
||||
if self.model_type == 'AdaRNN':
|
||||
gate = nn.ModuleList()
|
||||
for i in range(len(n_hiddens)):
|
||||
gate_weight = nn.Linear(
|
||||
len_seq * self.hiddens[i]*2, len_seq)
|
||||
gate.append(gate_weight)
|
||||
self.gate = gate
|
||||
|
||||
bnlst = nn.ModuleList()
|
||||
for i in range(len(n_hiddens)):
|
||||
bnlst.append(nn.BatchNorm1d(len_seq))
|
||||
self.bn_lst = bnlst
|
||||
self.softmax = torch.nn.Softmax(dim=0)
|
||||
self.init_layers()
|
||||
|
||||
def init_layers(self):
|
||||
for i in range(len(self.hiddens)):
|
||||
self.gate[i].weight.data.normal_(0, 0.05)
|
||||
self.gate[i].bias.data.fill_(0.0)
|
||||
|
||||
def forward_pre_train(self, x, len_win=0):
|
||||
out = self.gru_features(x)
|
||||
fea = out[0]
|
||||
if self.use_bottleneck == True:
|
||||
fea_bottleneck = self.bottleneck(fea[:, -1, :])
|
||||
fc_out = self.fc(fea_bottleneck).squeeze()
|
||||
else:
|
||||
fc_out = self.fc_out(fea[:, -1, :]).squeeze()
|
||||
|
||||
out_list_all, out_weight_list = out[1], out[2]
|
||||
out_list_s, out_list_t = self.get_features(out_list_all)
|
||||
loss_transfer = torch.zeros((1,)).cuda()
|
||||
for i in range(len(out_list_s)):
|
||||
criterion_transder = TransferLoss(
|
||||
loss_type=self.trans_loss, input_dim=out_list_s[i].shape[2])
|
||||
h_start = 0
|
||||
for j in range(h_start, self.len_seq, 1):
|
||||
i_start = j - len_win if j - len_win >= 0 else 0
|
||||
i_end = j + len_win if j + len_win < self.len_seq else self.len_seq - 1
|
||||
for k in range(i_start, i_end + 1):
|
||||
weight = out_weight_list[i][j] if self.model_type == 'AdaRNN' else 1 / (
|
||||
self.len_seq - h_start) * (2 * len_win + 1)
|
||||
loss_transfer = loss_transfer + weight * criterion_transder.compute(
|
||||
out_list_s[i][:, j, :], out_list_t[i][:, k, :])
|
||||
return fc_out, loss_transfer, out_weight_list
|
||||
|
||||
def gru_features(self, x, predict=False):
|
||||
x_input = x
|
||||
out = None
|
||||
out_lis = []
|
||||
out_weight_list = [] if (
|
||||
self.model_type == 'AdaRNN') else None
|
||||
for i in range(self.num_layers):
|
||||
out, _ = self.features[i](x_input.float())
|
||||
x_input = out
|
||||
out_lis.append(out)
|
||||
if self.model_type == 'AdaRNN' and predict == False:
|
||||
out_gate = self.process_gate_weight(x_input, i)
|
||||
out_weight_list.append(out_gate)
|
||||
return out, out_lis, out_weight_list
|
||||
|
||||
def process_gate_weight(self, out, index):
|
||||
x_s = out[0: int(out.shape[0]//2)]
|
||||
x_t = out[out.shape[0]//2: out.shape[0]]
|
||||
x_all = torch.cat((x_s, x_t), 2)
|
||||
x_all = x_all.view(x_all.shape[0], -1)
|
||||
weight = torch.sigmoid(self.bn_lst[index](
|
||||
self.gate[index](x_all.float())))
|
||||
weight = torch.mean(weight, dim=0)
|
||||
res = self.softmax(weight).squeeze()
|
||||
return res
|
||||
|
||||
def get_features(self, output_list):
|
||||
fea_list_src, fea_list_tar = [], []
|
||||
for fea in output_list:
|
||||
fea_list_src.append(fea[0: fea.size(0) // 2])
|
||||
fea_list_tar.append(fea[fea.size(0) // 2:])
|
||||
return fea_list_src, fea_list_tar
|
||||
|
||||
# For Boosting-based
|
||||
def forward_Boosting(self, x, weight_mat=None):
|
||||
out = self.gru_features(x)
|
||||
fea = out[0]
|
||||
if self.use_bottleneck:
|
||||
fea_bottleneck = self.bottleneck(fea[:, -1, :])
|
||||
fc_out = self.fc(fea_bottleneck).squeeze()
|
||||
else:
|
||||
fc_out = self.fc_out(fea[:, -1, :]).squeeze()
|
||||
|
||||
out_list_all = out[1]
|
||||
out_list_s, out_list_t = self.get_features(out_list_all)
|
||||
loss_transfer = torch.zeros((1,)).cuda()
|
||||
if weight_mat is None:
|
||||
weight = (1.0 / self.len_seq *
|
||||
torch.ones(self.num_layers, self.len_seq)).cuda()
|
||||
else:
|
||||
weight = weight_mat
|
||||
dist_mat = torch.zeros(self.num_layers, self.len_seq).cuda()
|
||||
for i in range(len(out_list_s)):
|
||||
criterion_transder = TransferLoss(
|
||||
loss_type=self.trans_loss, input_dim=out_list_s[i].shape[2])
|
||||
for j in range(self.len_seq):
|
||||
loss_trans = criterion_transder.compute(
|
||||
out_list_s[i][:, j, :], out_list_t[i][:, j, :])
|
||||
loss_transfer = loss_transfer + weight[i, j] * loss_trans
|
||||
dist_mat[i, j] = loss_trans
|
||||
return fc_out, loss_transfer, dist_mat, weight
|
||||
|
||||
# For Boosting-based
|
||||
def update_weight_Boosting(self, weight_mat, dist_old, dist_new):
|
||||
epsilon = 1e-12
|
||||
dist_old = dist_old.detach()
|
||||
dist_new = dist_new.detach()
|
||||
ind = dist_new > dist_old + epsilon
|
||||
weight_mat[ind] = weight_mat[ind] * \
|
||||
(1 + torch.sigmoid(dist_new[ind] - dist_old[ind]))
|
||||
weight_norm = torch.norm(weight_mat, dim=1, p=1)
|
||||
weight_mat = weight_mat / weight_norm.t().unsqueeze(1).repeat(1, self.len_seq)
|
||||
return weight_mat
|
||||
|
||||
def predict(self, x):
|
||||
out = self.gru_features(x, predict=True)
|
||||
fea = out[0]
|
||||
if self.use_bottleneck == True:
|
||||
fea_bottleneck = self.bottleneck(fea[:, -1, :])
|
||||
fc_out = self.fc(fea_bottleneck).squeeze()
|
||||
else:
|
||||
fc_out = self.fc_out(fea[:, -1, :]).squeeze()
|
||||
return fc_out
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class AdaRNN_tensorflow(tf.keras.Model):
|
||||
def __init__(self, use_bottleneck=False, bottleneck_width=256, n_input=128, n_hiddens=[64, 64], n_output=6, dropout=0.0, len_seq=9, model_type='AdaRNN', trans_loss='mmd'):
|
||||
super(AdaRNN, self).__init__()
|
||||
self.use_bottleneck = use_bottleneck
|
||||
self.n_input = n_input
|
||||
self.num_layers = len(n_hiddens)
|
||||
self.hiddens = n_hiddens
|
||||
self.n_output = n_output
|
||||
self.model_type = model_type
|
||||
self.trans_loss = trans_loss
|
||||
self.len_seq = len_seq
|
||||
|
||||
self.features = tf.keras.Sequential()
|
||||
for hidden in n_hiddens:
|
||||
rnn = tf.keras.layers.GRU(
|
||||
units=hidden,
|
||||
return_sequences=True,
|
||||
dropout=dropout
|
||||
)
|
||||
self.features.add(rnn)
|
||||
|
||||
if use_bottleneck == True:
|
||||
self.bottleneck = tf.keras.Sequential([
|
||||
tf.keras.layers.Dense(bottleneck_width),
|
||||
tf.keras.layers.Dense(bottleneck_width),
|
||||
tf.keras.layers.BatchNormalization(),
|
||||
tf.keras.layers.ReLU(),
|
||||
tf.keras.layers.Dropout(dropout)
|
||||
])
|
||||
self.fc = tf.keras.layers.Dense(n_output, activation=None)
|
||||
|
||||
else:
|
||||
self.fc_out = tf.keras.layers.Dense(n_output, activation=None)
|
||||
|
||||
if self.model_type == 'AdaRNN':
|
||||
self.gate = []
|
||||
for _ in range(len(n_hiddens)):
|
||||
gate_weight = tf.keras.layers.Dense(len_seq, activation=None)
|
||||
self.gate.append(gate_weight)
|
||||
self.bn_lst = [tf.keras.layers.BatchNormalization() for _ in range(len(n_hiddens))]
|
||||
self.softmax = tf.keras.layers.Softmax(axis=0)
|
||||
# self.init_layers() # 省去了很多初始化相关的工作
|
||||
|
||||
# def init_layers(self):
|
||||
# for gate_layer in self.gate:
|
||||
# gate_layer.build((None, self.len_seq * self.hiddens[i] * 2))
|
||||
|
||||
def forward_pre_train(self, x, len_win=0):
|
||||
# 两层GRU之后的结果,每层GRU之后的结果,每层GRU前后权重归一化之后的结果
|
||||
out, out_list_all, out_weight_list = self.gru_features(x)
|
||||
fea =out
|
||||
if self.use_bottleneck == True:
|
||||
fea_bottleneck = self.bottleneck(fea[:, -1, :])
|
||||
fc_out = self.fc(fea_bottleneck).squeeze()
|
||||
else:
|
||||
fc_out = self.fc_out(fea[:, -1, :]).squeeze()
|
||||
|
||||
|
||||
out_list_s, out_list_t = self.get_features(out_list_all)
|
||||
|
||||
loss_transfer = tf.zeros((1,))
|
||||
for i in range(len(out_list_s)):
|
||||
criterion_transder = TransferLoss(
|
||||
loss_type=self.trans_loss, input_dim=out_list_s[i].shape[2])
|
||||
h_start = 0
|
||||
for j in range(h_start, self.len_seq, 1):
|
||||
i_start = j - len_win if j - len_win >= 0 else 0
|
||||
i_end = j + len_win if j + len_win < self.len_seq else self.len_seq - 1
|
||||
for k in range(i_start, i_end + 1):
|
||||
weight = out_weight_list[i][j] if self.model_type == 'AdaRNN' else 1 / (
|
||||
self.len_seq - h_start) * (2 * len_win + 1)
|
||||
loss_transfer = loss_transfer + weight * criterion_transder.compute(
|
||||
out_list_s[i][:, j, :], out_list_t[i][:, k, :])
|
||||
return fc_out, loss_transfer, out_weight_list
|
||||
|
||||
def call(self, x, len_win=0, training=False):
|
||||
# 两层GRU之后的结果,每层GRU之后的结果,每层GRU前后权重归一化之后的结果
|
||||
out, out_list_all, out_weight_list = self.gru_features(x, training=training)
|
||||
fea = out
|
||||
if self.use_bottleneck == True:
|
||||
fea_bottleneck = self.bottleneck(fea[:, -1, :])
|
||||
fc_out = self.fc(fea_bottleneck)
|
||||
else:
|
||||
fc_out = self.fc_out(fea[:, -1, :])
|
||||
|
||||
loss_transfer = tf.zeros((1,))
|
||||
for i in range(len(out_list_all)):
|
||||
criterion_transder = TransferLoss(
|
||||
loss_type=self.trans_loss, input_dim=out_list_all[i].shape[2])
|
||||
h_start = 0
|
||||
for j in range(h_start, self.len_seq, 1):
|
||||
i_start = j - len_win if j - len_win >= 0 else 0
|
||||
i_end = j + len_win if j + len_win < self.len_seq else self.len_seq - 1
|
||||
for k in range(i_start, i_end + 1):
|
||||
weight = out_weight_list[i][j] if self.model_type == 'AdaRNN' else 1 / (
|
||||
self.len_seq - h_start) * (2 * len_win + 1)
|
||||
loss_transfer = loss_transfer + weight * criterion_transder.compute(
|
||||
out_list_all[i][:, j, :], out_list_all[i][:, k, :])
|
||||
return fc_out, loss_transfer, out_weight_list
|
||||
|
||||
def gru_features(self, x, training=False):
|
||||
x_input = x
|
||||
out = None
|
||||
out_lis = []
|
||||
out_weight_list = [] if (
|
||||
self.model_type == 'AdaRNN') else None
|
||||
for i in range(self.num_layers):
|
||||
out = self.features[i](x_input, training=training)
|
||||
x_input = out
|
||||
out_lis.append(out)
|
||||
if self.model_type == 'AdaRNN':
|
||||
out_gate = self.process_gate_weight(x_input, i, training=training)
|
||||
out_weight_list.append(out_gate)
|
||||
return out, out_lis, out_weight_list
|
||||
|
||||
def process_gate_weight(self, out, index, training=False):
|
||||
x_s = out[:, :out.shape[1] // 2]# 可以理解为LSTM的前半段
|
||||
x_t = out[:, out.shape[1] // 2:]# 可以理解为LSTM的后半段
|
||||
x_all = tf.concat((x_s, x_t), 2)
|
||||
x_all = tf.reshape(x_all, (x_all.shape[0], -1))
|
||||
weight = tf.sigmoid(self.bn_lst[index](self.gate[index](x_all)), training=training)
|
||||
weight = tf.reduce_mean(weight, axis=0)
|
||||
res = self.softmax(weight)
|
||||
return res
|
||||
|
||||
def get_features(self, output_list):
|
||||
fea_list_src, fea_list_tar = [], []
|
||||
for fea in output_list:
|
||||
fea_list_src.append(fea[:, :fea.shape[1] // 2])
|
||||
fea_list_tar.append(fea[:, fea.shape[1] // 2:])
|
||||
return fea_list_src, fea_list_tar
|
||||
|
||||
def forward_Boosting(self, x, weight_mat=None):
|
||||
out, out_list_all, _ = self.gru_features(x, training=False)
|
||||
fea = out
|
||||
if self.use_bottleneck:
|
||||
fea_bottleneck = self.bottleneck(fea[:, -1, :])
|
||||
fc_out = self.fc(fea_bottleneck)
|
||||
else:
|
||||
fc_out = self.fc_out(fea[:, -1, :])
|
||||
|
||||
out_list_all = out_list_all
|
||||
out_list_s, out_list_t = self.get_features(out_list_all)
|
||||
loss_transfer = tf.zeros((1,))
|
||||
if weight_mat is None:
|
||||
weight = (1.0 / self.len_seq *
|
||||
tf.ones((self.num_layers, self.len_seq), dtype=tf.float32))
|
||||
else:
|
||||
weight = weight_mat
|
||||
dist_mat = tf.zeros((self.num_layers, self.len_seq), dtype=tf.float32)
|
||||
for i in range(len(out_list_s)):
|
||||
criterion_transder = TransferLoss(
|
||||
loss_type=self.trans_loss, input_dim=out_list_s[i].shape[2])
|
||||
for j in range(self.len_seq):
|
||||
loss_trans = criterion_transder.compute(
|
||||
out_list_s[i][:, j, :], out_list_t[i][:, j, :])
|
||||
loss_transfer = loss_transfer + weight[i, j] * loss_trans
|
||||
dist_mat[i, j] = loss_trans
|
||||
return fc_out, loss_transfer, dist_mat, weight
|
||||
|
||||
def update_weight_Boosting(self, weight_mat, dist_old, dist_new):
|
||||
epsilon = 1e-12
|
||||
dist_old = tf.stop_gradient(dist_old)
|
||||
dist_new = tf.stop_gradient(dist_new)
|
||||
ind = dist_new
|
||||
|
|
@ -0,0 +1,108 @@
|
|||
# encoding=utf-8
|
||||
|
||||
import numpy as np
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
from torchvision import transforms
|
||||
import os
|
||||
# from config import config_info
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
|
||||
# This is for parsing the X data, you can ignore it if you do not need preprocessing
|
||||
def format_data_x(datafile):
|
||||
x_data = None
|
||||
for item in datafile:
|
||||
item_data = np.loadtxt(item, dtype=np.float)
|
||||
if x_data is None:
|
||||
x_data = np.zeros((len(item_data), 1))
|
||||
x_data = np.hstack((x_data, item_data))
|
||||
x_data = x_data[:, 1:]
|
||||
print(x_data.shape)
|
||||
X = None
|
||||
for i in range(len(x_data)):
|
||||
row = np.asarray(x_data[i, :])
|
||||
row = row.reshape(9, 128).T
|
||||
if X is None:
|
||||
X = np.zeros((len(x_data), 128, 9))
|
||||
X[i] = row
|
||||
print(X.shape)
|
||||
return X
|
||||
|
||||
|
||||
# This is for parsing the Y data, you can ignore it if you do not need preprocessing
|
||||
def format_data_y(datafile):
|
||||
data = np.loadtxt(datafile, dtype=np.int) - 1
|
||||
YY = np.eye(6)[data]
|
||||
return YY
|
||||
|
||||
|
||||
# Load data function, if there exists parsed data file, then use it
|
||||
# If not, parse the original dataset from scratch
|
||||
def load_data(data_folder, domain):
|
||||
import os
|
||||
domain = '1_20' if domain == 'A' else '21_30'
|
||||
data_file = os.path.join(data_folder, 'data_har_' + domain + '.npz')
|
||||
if os.path.exists(data_file):
|
||||
data = np.load(data_file)
|
||||
X_train = data['X_train']
|
||||
Y_train = data['Y_train']
|
||||
X_test = data['X_test']
|
||||
Y_test = data['Y_test']
|
||||
else:
|
||||
# This for processing the dataset from scratch
|
||||
# After downloading the dataset, put it to somewhere that str_folder can find
|
||||
str_folder = config_info['data_folder_raw'] + 'UCI HAR Dataset/'
|
||||
INPUT_SIGNAL_TYPES = [
|
||||
"body_acc_x_",
|
||||
"body_acc_y_",
|
||||
"body_acc_z_",
|
||||
"body_gyro_x_",
|
||||
"body_gyro_y_",
|
||||
"body_gyro_z_",
|
||||
"total_acc_x_",
|
||||
"total_acc_y_",
|
||||
"total_acc_z_"
|
||||
]
|
||||
|
||||
str_train_files = [str_folder + 'train/' + 'Inertial Signals/' + item + 'train.txt' for item in
|
||||
INPUT_SIGNAL_TYPES]
|
||||
str_test_files = [str_folder + 'test/' + 'Inertial Signals/' +
|
||||
item + 'test.txt' for item in INPUT_SIGNAL_TYPES]
|
||||
str_train_y = str_folder + 'train/y_train.txt'
|
||||
str_test_y = str_folder + 'test/y_test.txt'
|
||||
|
||||
X_train = format_data_x(str_train_files)
|
||||
X_test = format_data_x(str_test_files)
|
||||
Y_train = format_data_y(str_train_y)
|
||||
Y_test = format_data_y(str_test_y)
|
||||
|
||||
return X_train, onehot_to_label(Y_train), X_test, onehot_to_label(Y_test)
|
||||
|
||||
|
||||
def onehot_to_label(y_onehot):
|
||||
a = np.argwhere(y_onehot == 1)
|
||||
return a[:, -1]
|
||||
|
||||
|
||||
class data_loader(Dataset):
|
||||
def __init__(self, samples, labels, t):
|
||||
self.samples = samples
|
||||
self.labels = labels
|
||||
self.T = t
|
||||
|
||||
def __getitem__(self, index):
|
||||
sample, target = self.samples[index], self.labels[index]
|
||||
if self.T:
|
||||
return self.T(sample), target
|
||||
else:
|
||||
return sample, target
|
||||
|
||||
def __len__(self):
|
||||
return len(self.samples)
|
||||
|
||||
|
||||
def normalize(x):
|
||||
x_min = x.min(axis=(0, 2, 3), keepdims=True)
|
||||
x_max = x.max(axis=(0, 2, 3), keepdims=True)
|
||||
x_norm = (x - x_min) / (x_max - x_min)
|
||||
return x_norm
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
# encoding=utf-8
|
||||
import os
|
||||
import data_act as data_act
|
||||
import pandas as pd
|
||||
import data_weather as data_weather
|
||||
import datetime
|
||||
from ..loss_transfer import TransferLoss
|
||||
import torch
|
||||
import math
|
||||
import data_process
|
||||
|
||||
|
||||
def load_act_data(data_folder, batch_size=64, domain="1_20"):
|
||||
x_train, y_train, x_test, y_test = data_act.load_data(data_folder, domain)
|
||||
x_train, x_test = x_train.reshape(
|
||||
(-1, x_train.shape[2], 1, x_train.shape[1])), x_test.reshape((-1, x_train.shape[2], 1, x_train.shape[1]))
|
||||
transform = None
|
||||
train_set = data_act.data_loader(x_train, y_train, transform)
|
||||
test_set = data_act.data_loader(x_test, y_test, transform)
|
||||
train_loader = data_act.DataLoader(
|
||||
train_set, batch_size=batch_size, shuffle=True, drop_last=True)
|
||||
test_loader = data_act.DataLoader(
|
||||
test_set, batch_size=batch_size, shuffle=False)
|
||||
return train_loader, train_loader, test_loader
|
||||
|
||||
|
||||
def load_weather_data(file_path, batch_size=6, station='Changping'):
|
||||
data_file = os.path.join(file_path, "PRSA_Data_1.pkl")
|
||||
mean_train, std_train = data_weather.get_weather_data_statistic(data_file, station=station,
|
||||
start_time='2013-3-1 0:0',
|
||||
end_time='2016-10-30 23:0')
|
||||
train_loader = data_weather.get_weather_data(data_file, station=station, start_time='2013-3-6 0:0',
|
||||
end_time='2015-5-31 23:0', batch_size=batch_size, mean=mean_train,
|
||||
std=std_train)
|
||||
valid_train_loader = data_weather.get_weather_data(data_file, station=station, start_time='2015-6-2 0:0',
|
||||
end_time='2016-6-30 23:0', batch_size=batch_size,
|
||||
mean=mean_train, std=std_train)
|
||||
valid_vld_loader = data_weather.get_weather_data(data_file, station=station, start_time='2016-7-2 0:0',
|
||||
end_time='2016-10-30 23:0', batch_size=batch_size, mean=mean_train,
|
||||
std=std_train)
|
||||
test_loader = data_weather.get_weather_data(data_file, station=station, start_time='2016-11-2 0:0',
|
||||
end_time='2017-2-28 23:0', batch_size=batch_size, mean=mean_train,
|
||||
std=std_train)
|
||||
return train_loader, valid_train_loader, valid_vld_loader, test_loader
|
||||
|
||||
|
||||
def get_split_time(num_domain=2, mode='pre_process', data_file=None, station=None, dis_type='coral'):
|
||||
spilt_time = {
|
||||
'2': [('2013-3-6 0:0', '2015-5-31 23:0'), ('2015-6-2 0:0', '2016-6-30 23:0')]
|
||||
}
|
||||
if mode == 'pre_process':
|
||||
return spilt_time[str(num_domain)]
|
||||
if mode == 'tdc':
|
||||
return TDC(num_domain, data_file, station, dis_type=dis_type)
|
||||
else:
|
||||
print("error in mode")
|
||||
|
||||
|
||||
def TDC(num_domain, data_file, station, dis_type='coral'):
|
||||
start_time = datetime.datetime.strptime(
|
||||
'2013-03-01 00:00:00', '%Y-%m-%d %H:%M:%S')
|
||||
end_time = datetime.datetime.strptime(
|
||||
'2016-06-30 23:00:00', '%Y-%m-%d %H:%M:%S')
|
||||
num_day = (end_time - start_time).days
|
||||
split_N = 10
|
||||
data = pd.read_pickle(data_file)[station]
|
||||
feat = data[0][0:num_day]
|
||||
feat = torch.tensor(feat, dtype=torch.float32)
|
||||
feat_shape_1 = feat.shape[1]
|
||||
feat = feat.reshape(-1, feat.shape[2])
|
||||
feat = feat.cuda()
|
||||
# num_day_new = feat.shape[0]
|
||||
|
||||
selected = [0, 10]
|
||||
candidate = [1, 2, 3, 4, 5, 6, 7, 8, 9]
|
||||
start = 0
|
||||
|
||||
if num_domain in [2, 3, 5, 7, 10]:
|
||||
while len(selected) - 2 < num_domain - 1:
|
||||
distance_list = []
|
||||
for can in candidate:
|
||||
selected.append(can)
|
||||
selected.sort()
|
||||
dis_temp = 0
|
||||
for i in range(1, len(selected) - 1):
|
||||
for j in range(i, len(selected) - 1):
|
||||
index_part1_start = start + math.floor(selected[i - 1] / split_N * num_day) * feat_shape_1
|
||||
index_part1_end = start + math.floor(selected[i] / split_N * num_day) * feat_shape_1
|
||||
feat_part1 = feat[index_part1_start: index_part1_end]
|
||||
index_part2_start = start + math.floor(selected[j] / split_N * num_day) * feat_shape_1
|
||||
index_part2_end = start + math.floor(selected[j + 1] / split_N * num_day) * feat_shape_1
|
||||
feat_part2 = feat[index_part2_start:index_part2_end]
|
||||
criterion_transder = TransferLoss(loss_type=dis_type, input_dim=feat_part1.shape[1])
|
||||
dis_temp += criterion_transder.compute(feat_part1, feat_part2)
|
||||
distance_list.append(dis_temp)
|
||||
selected.remove(can)
|
||||
can_index = distance_list.index(max(distance_list))
|
||||
selected.append(candidate[can_index])
|
||||
candidate.remove(candidate[can_index])
|
||||
selected.sort()
|
||||
res = []
|
||||
for i in range(1, len(selected)):
|
||||
if i == 1:
|
||||
sel_start_time = start_time + datetime.timedelta(days=int(num_day / split_N * selected[i - 1]), hours=0)
|
||||
else:
|
||||
sel_start_time = start_time + datetime.timedelta(days=int(num_day / split_N * selected[i - 1]) + 1,
|
||||
hours=0)
|
||||
sel_end_time = start_time + datetime.timedelta(days=int(num_day / split_N * selected[i]), hours=23)
|
||||
sel_start_time = datetime.datetime.strftime(sel_start_time, '%Y-%m-%d %H:%M')
|
||||
sel_end_time = datetime.datetime.strftime(sel_end_time, '%Y-%m-%d %H:%M')
|
||||
res.append((sel_start_time, sel_end_time))
|
||||
return res
|
||||
else:
|
||||
print("error in number of domain")
|
||||
|
||||
|
||||
def load_weather_data_multi_domain(file_path, batch_size=6, station='Changping', number_domain=2, mode='pre_process',
|
||||
dis_type='coral'):
|
||||
# mode: 'tdc', 'pre_process'
|
||||
data_file = os.path.join(file_path, "PRSA_Data_1.pkl")
|
||||
mean_train, std_train = data_weather.get_weather_data_statistic(data_file, station=station,
|
||||
start_time='2013-3-1 0:0',
|
||||
end_time='2016-10-30 23:0')
|
||||
split_time_list = get_split_time(number_domain, mode=mode, data_file=data_file, station=station, dis_type=dis_type)
|
||||
train_list = []
|
||||
for i in range(len(split_time_list)):
|
||||
time_temp = split_time_list[i]
|
||||
train_loader = data_weather.get_weather_data(data_file, station=station, start_time=time_temp[0],
|
||||
end_time=time_temp[1], batch_size=batch_size, mean=mean_train,
|
||||
std=std_train)
|
||||
train_list.append(train_loader)
|
||||
|
||||
valid_vld_loader = data_weather.get_weather_data(data_file, station=station, start_time='2016-7-2 0:0',
|
||||
end_time='2016-10-30 23:0', batch_size=batch_size, mean=mean_train,
|
||||
std=std_train)
|
||||
test_loader = data_weather.get_weather_data(data_file, station=station, start_time='2016-11-2 0:0',
|
||||
end_time='2017-2-28 23:0', batch_size=batch_size, mean=mean_train,
|
||||
std=std_train, shuffle=False)
|
||||
return train_list, valid_vld_loader, test_loader
|
||||
|
|
@ -0,0 +1,132 @@
|
|||
import math
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import os
|
||||
from pandas.core.frame import DataFrame
|
||||
from torch.utils.data import Dataset, DataLoader
|
||||
import torch
|
||||
import pickle
|
||||
import datetime
|
||||
|
||||
|
||||
class data_loader(Dataset):
|
||||
def __init__(self, df_feature, df_label, df_label_reg, t=None):
|
||||
|
||||
assert len(df_feature) == len(df_label)
|
||||
assert len(df_feature) == len(df_label_reg)
|
||||
|
||||
# df_feature = df_feature.reshape(df_feature.shape[0], df_feature.shape[1] // 6, df_feature.shape[2] * 6)
|
||||
self.df_feature = df_feature
|
||||
self.df_label = df_label
|
||||
self.df_label_reg = df_label_reg
|
||||
|
||||
self.T = t
|
||||
self.df_feature = torch.tensor(
|
||||
self.df_feature, dtype=torch.float32)
|
||||
self.df_label = torch.tensor(
|
||||
self.df_label, dtype=torch.float32)
|
||||
self.df_label_reg = torch.tensor(
|
||||
self.df_label_reg, dtype=torch.float32)
|
||||
|
||||
def __getitem__(self, index):
|
||||
sample, target, label_reg = self.df_feature[index], self.df_label[index], self.df_label_reg[index]
|
||||
if self.T:
|
||||
return self.T(sample), target
|
||||
else:
|
||||
return sample, target, label_reg
|
||||
|
||||
def __len__(self):
|
||||
return len(self.df_feature)
|
||||
|
||||
|
||||
def create_dataset(df, station, start_date, end_date, mean=None, std=None):
|
||||
data = df[station]
|
||||
feat, label, label_reg = data[0], data[1], data[2]
|
||||
referece_start_time = datetime.datetime(2013, 3, 1, 0, 0)
|
||||
referece_end_time = datetime.datetime(2017, 2, 28, 0, 0)
|
||||
|
||||
assert (pd.to_datetime(start_date) - referece_start_time).days >= 0
|
||||
assert (pd.to_datetime(end_date) - referece_end_time).days <= 0
|
||||
assert (pd.to_datetime(end_date) - pd.to_datetime(start_date)).days >= 0
|
||||
index_start = (pd.to_datetime(start_date) - referece_start_time).days
|
||||
index_end = (pd.to_datetime(end_date) - referece_start_time).days
|
||||
feat = feat[index_start: index_end + 1]
|
||||
label = label[index_start: index_end + 1]
|
||||
label_reg = label_reg[index_start: index_end + 1]
|
||||
|
||||
# ori_shape_1, ori_shape_2=feat.shape[1], feat.shape[2]
|
||||
# feat=feat.reshape(-1, feat.shape[2])
|
||||
# feat=(feat - mean) / std
|
||||
# feat=feat.reshape(-1, ori_shape_1, ori_shape_2)
|
||||
|
||||
return data_loader(feat, label, label_reg)
|
||||
|
||||
|
||||
def create_dataset_shallow(df, station, start_date, end_date, mean=None, std=None):
|
||||
data = df[station]
|
||||
feat, label, label_reg = data[0], data[1], data[2]
|
||||
referece_start_time = datetime.datetime(2013, 3, 1, 0, 0)
|
||||
referece_end_time = datetime.datetime(2017, 2, 28, 0, 0)
|
||||
|
||||
assert (pd.to_datetime(start_date) - referece_start_time).days >= 0
|
||||
assert (pd.to_datetime(end_date) - referece_end_time).days <= 0
|
||||
assert (pd.to_datetime(end_date) - pd.to_datetime(start_date)).days >= 0
|
||||
index_start = (pd.to_datetime(start_date) - referece_start_time).days
|
||||
index_end = (pd.to_datetime(end_date) - referece_start_time).days
|
||||
feat = feat[index_start: index_end + 1]
|
||||
label = label[index_start: index_end + 1]
|
||||
label_reg = label_reg[index_start: index_end + 1]
|
||||
|
||||
# ori_shape_1, ori_shape_2=feat.shape[1], feat.shape[2]
|
||||
# feat=feat.reshape(-1, feat.shape[2])
|
||||
# feat=(feat - mean) / std
|
||||
# feat=feat.reshape(-1, ori_shape_1, ori_shape_2)
|
||||
|
||||
return feat, label_reg
|
||||
|
||||
|
||||
def get_dataset_statistic(df, station, start_date, end_date):
|
||||
data = df[station]
|
||||
feat, label = data[0], data[1]
|
||||
referece_start_time = datetime.datetime(2013, 3, 1, 0, 0)
|
||||
referece_end_time = datetime.datetime(2017, 2, 28, 0, 0)
|
||||
|
||||
assert (pd.to_datetime(start_date) - referece_start_time).days >= 0
|
||||
assert (pd.to_datetime(end_date) - referece_end_time).days <= 0
|
||||
assert (pd.to_datetime(end_date) - pd.to_datetime(start_date)).days >= 0
|
||||
index_start = (pd.to_datetime(start_date) - referece_start_time).days
|
||||
index_end = (pd.to_datetime(end_date) - referece_start_time).days
|
||||
feat = feat[index_start: index_end + 1]
|
||||
label = label[index_start: index_end + 1]
|
||||
feat = feat.reshape(-1, feat.shape[2])
|
||||
mu_train = np.mean(feat, axis=0)
|
||||
sigma_train = np.std(feat, axis=0)
|
||||
|
||||
return mu_train, sigma_train
|
||||
|
||||
|
||||
def get_weather_data(data_file, station, start_time, end_time, batch_size, shuffle=True, mean=None, std=None):
|
||||
df = pd.read_pickle(data_file)
|
||||
|
||||
dataset = create_dataset(df, station, start_time,
|
||||
end_time, mean=mean, std=std)
|
||||
train_loader = DataLoader(
|
||||
dataset, batch_size=batch_size, shuffle=shuffle)
|
||||
return train_loader
|
||||
|
||||
|
||||
def get_weather_data_shallow(data_file, station, start_time, end_time, batch_size, shuffle=True, mean=None, std=None):
|
||||
df = pd.read_pickle(data_file)
|
||||
|
||||
feat, label_reg = create_dataset_shallow(df, station, start_time,
|
||||
end_time, mean=mean, std=std)
|
||||
|
||||
return feat, label_reg
|
||||
|
||||
|
||||
def get_weather_data_statistic(data_file, station, start_time, end_time):
|
||||
df = pd.read_pickle(data_file)
|
||||
mean_train, std_train = get_dataset_statistic(
|
||||
df, station, start_time, end_time)
|
||||
return mean_train, std_train
|
||||
|
|
@ -0,0 +1,115 @@
|
|||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
class MMD_loss(nn.Module):
|
||||
def __init__(self, kernel_type='linear', kernel_mul=2.0, kernel_num=5):
|
||||
super(MMD_loss, self).__init__()
|
||||
self.kernel_num = kernel_num
|
||||
self.kernel_mul = kernel_mul
|
||||
self.fix_sigma = None
|
||||
self.kernel_type = kernel_type
|
||||
|
||||
def guassian_kernel(self, source, target, kernel_mul=2.0, kernel_num=5, fix_sigma=None):
|
||||
n_samples = int(source.size()[0]) + int(target.size()[0])
|
||||
total = torch.cat([source, target], dim=0)
|
||||
total0 = total.unsqueeze(0).expand(
|
||||
int(total.size(0)), int(total.size(0)), int(total.size(1)))
|
||||
total1 = total.unsqueeze(1).expand(
|
||||
int(total.size(0)), int(total.size(0)), int(total.size(1)))
|
||||
L2_distance = ((total0-total1)**2).sum(2)
|
||||
if fix_sigma:
|
||||
bandwidth = fix_sigma
|
||||
else:
|
||||
bandwidth = torch.sum(L2_distance.data) / (n_samples**2-n_samples)
|
||||
bandwidth /= kernel_mul ** (kernel_num // 2)
|
||||
bandwidth_list = [bandwidth * (kernel_mul**i)
|
||||
for i in range(kernel_num)]
|
||||
kernel_val = [torch.exp(-L2_distance / bandwidth_temp)
|
||||
for bandwidth_temp in bandwidth_list]
|
||||
return sum(kernel_val)
|
||||
|
||||
def linear_mmd(self, X, Y):
|
||||
delta = X.mean(axis=0) - Y.mean(axis=0)
|
||||
loss = delta.dot(delta.T)
|
||||
return loss
|
||||
|
||||
def forward(self, source, target):
|
||||
if self.kernel_type == 'linear':
|
||||
return self.linear_mmd(source, target)
|
||||
elif self.kernel_type == 'rbf':
|
||||
batch_size = int(source.size()[0])
|
||||
kernels = self.guassian_kernel(
|
||||
source, target, kernel_mul=self.kernel_mul, kernel_num=self.kernel_num, fix_sigma=self.fix_sigma)
|
||||
with torch.no_grad():
|
||||
XX = torch.mean(kernels[:batch_size, :batch_size])
|
||||
YY = torch.mean(kernels[batch_size:, batch_size:])
|
||||
XY = torch.mean(kernels[:batch_size, batch_size:])
|
||||
YX = torch.mean(kernels[batch_size:, :batch_size])
|
||||
loss = torch.mean(XX + YY - XY - YX)
|
||||
return loss
|
||||
|
||||
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
|
||||
|
||||
class MMDLoss(tf.keras.losses.Loss):
|
||||
def __init__(self, kernel_type='linear', kernel_mul=2.0, kernel_num=5):
|
||||
super(MMDLoss, self).__init__()
|
||||
self.kernel_type = kernel_type
|
||||
self.kernel_mul = kernel_mul
|
||||
self.kernel_num = kernel_num
|
||||
|
||||
def guassian_kernel(self, source, target, kernel_mul=2.0, kernel_num=5, fix_sigma=None):
|
||||
n_samples = int(source.shape[0]) + int(target.shape[0])
|
||||
total = tf.concat([source, target], axis=0)
|
||||
total0 = tf.expand_dims(total, 0)
|
||||
total0 = tf.tile(total0, [total.shape[0], 1, 1])
|
||||
total1 = tf.expand_dims(total, 1)
|
||||
total1 = tf.tile(total1, [1, total.shape[0], 1])
|
||||
L2_distance = tf.reduce_sum((total0 - total1) ** 2, axis=2)
|
||||
|
||||
if fix_sigma:
|
||||
bandwidth = fix_sigma
|
||||
else:
|
||||
bandwidth = tf.reduce_sum(L2_distance) / (n_samples ** 2 - n_samples)
|
||||
bandwidth /= kernel_mul ** (kernel_num // 2)
|
||||
bandwidth_list = [bandwidth * (kernel_mul ** i)
|
||||
for i in range(kernel_num)]
|
||||
kernel_val = [tf.exp(-L2_distance / bandwidth_temp)
|
||||
for bandwidth_temp in bandwidth_list]
|
||||
return sum(kernel_val)
|
||||
|
||||
def linear_mmd(self, X, Y):
|
||||
delta = tf.reduce_mean(X, axis=0) - tf.reduce_mean(Y, axis=0)
|
||||
loss = tf.linalg.matmul(delta, delta, transpose_b=True)
|
||||
return loss
|
||||
|
||||
def call(self, source, target):
|
||||
if self.kernel_type == 'linear':
|
||||
return self.linear_mmd(source, target)
|
||||
elif self.kernel_type == 'rbf':
|
||||
batch_size = int(source.shape[0])
|
||||
kernels = self.guassian_kernel(
|
||||
source, target, kernel_mul=self.kernel_mul, kernel_num=self.kernel_num, fix_sigma=None)
|
||||
with tf.GradientTape(persistent=True) as tape:
|
||||
tape.watch(kernels)
|
||||
XX = tf.reduce_mean(kernels[:batch_size, :batch_size])
|
||||
YY = tf.reduce_mean(kernels[batch_size:, batch_size:])
|
||||
XY = tf.reduce_mean(kernels[:batch_size, batch_size:])
|
||||
YX = tf.reduce_mean(kernels[batch_size:, :batch_size])
|
||||
loss = XX + YY - XY - YX
|
||||
return loss
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 示例用法
|
||||
source = np.random.randn(100, 128)
|
||||
target = np.random.randn(100, 128)
|
||||
source_tf = tf.convert_to_tensor(source, dtype=tf.float32)
|
||||
target_tf = tf.convert_to_tensor(target, dtype=tf.float32)
|
||||
|
||||
mmd_loss = MMDLoss(kernel_type='rbf', kernel_mul=2.0, kernel_num=5)
|
||||
loss = mmd_loss(source_tf, target_tf)
|
||||
print("MMD Loss:", loss.numpy())
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
from loss import adv_loss, CORAL, kl_js, mmd, mutual_info, cosine, pairwise_dist
|
||||
|
||||
|
||||
class TransferLoss(object):
|
||||
def __init__(self, loss_type='cosine', input_dim=512):
|
||||
"""
|
||||
Supported loss_type: mmd(mmd_lin), mmd_rbf, coral, cosine, kl, js, mine, adv
|
||||
"""
|
||||
self.loss_type = loss_type
|
||||
self.input_dim = input_dim
|
||||
|
||||
def compute(self, X, Y):
|
||||
"""Compute adaptation loss
|
||||
|
||||
Arguments:
|
||||
X {tensor} -- source matrix
|
||||
Y {tensor} -- target matrix
|
||||
|
||||
Returns:
|
||||
[tensor] -- transfer loss
|
||||
"""
|
||||
if self.loss_type == 'mmd_lin' or self.loss_type =='mmd':
|
||||
mmdloss = mmd.MMD_loss(kernel_type='linear')
|
||||
loss = mmdloss(X, Y)
|
||||
elif self.loss_type == 'coral':
|
||||
loss = CORAL(X, Y)
|
||||
elif self.loss_type == 'cosine' or self.loss_type == 'cos':
|
||||
loss = 1 - cosine(X, Y)
|
||||
elif self.loss_type == 'kl':
|
||||
loss = kl_js.kl_div(X, Y)
|
||||
elif self.loss_type == 'js':
|
||||
loss = kl_js.js(X, Y)
|
||||
elif self.loss_type == 'mine':
|
||||
mine_model = mutual_info.Mine_estimator(
|
||||
input_dim=self.input_dim, hidden_dim=60).cuda()
|
||||
loss = mine_model(X, Y)
|
||||
elif self.loss_type == 'adv':
|
||||
loss = adv_loss.adv(X, Y, input_dim=self.input_dim, hidden_dim=32)
|
||||
elif self.loss_type == 'mmd_rbf':
|
||||
mmdloss = mmd.MMD_loss(kernel_type='rbf')
|
||||
loss = mmdloss(X, Y)
|
||||
elif self.loss_type == 'pairwise':
|
||||
pair_mat = pairwise_dist(X, Y)
|
||||
import torch
|
||||
loss = torch.norm(pair_mat)
|
||||
|
||||
return loss
|
||||
|
||||
if __name__ == "__main__":
|
||||
import torch
|
||||
trans_loss = TransferLoss('adv')
|
||||
a = (torch.randn(5,512) * 10).cuda()
|
||||
b = (torch.randn(5,512) * 10).cuda()
|
||||
print(trans_loss.compute(a, b))
|
||||
|
|
@ -0,0 +1,463 @@
|
|||
import torch.nn as nn
|
||||
import torch
|
||||
import torch.optim as optim
|
||||
|
||||
import os
|
||||
import argparse
|
||||
import datetime
|
||||
import numpy as np
|
||||
|
||||
from tqdm import tqdm
|
||||
from utils import utils
|
||||
from base.AdaRNN import AdaRNN
|
||||
|
||||
import pretty_errors
|
||||
import dataset.data_process as data_process
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def pprint(*text):
|
||||
# print with UTC+8 time
|
||||
time = '[' + str(datetime.datetime.utcnow() +
|
||||
datetime.timedelta(hours=8))[:19] + '] -'
|
||||
print(time, *text, flush=True)
|
||||
if args.log_file is None:
|
||||
return
|
||||
with open(args.log_file, 'a') as f:
|
||||
print(time, *text, flush=True, file=f)
|
||||
|
||||
|
||||
def get_model(name='AdaRNN'):
|
||||
n_hiddens = [args.hidden_size for i in range(args.num_layers)]
|
||||
return AdaRNN(use_bottleneck=True, bottleneck_width=64, n_input=args.d_feat, n_hiddens=n_hiddens,
|
||||
n_output=args.class_num, dropout=args.dropout, model_type=name, len_seq=args.len_seq,
|
||||
trans_loss=args.loss_type).cuda()
|
||||
|
||||
|
||||
def train_AdaRNN(args, model, optimizer, train_loader_list, epoch, dist_old=None, weight_mat=None):
|
||||
model.train()
|
||||
criterion = nn.MSELoss()
|
||||
criterion_1 = nn.L1Loss()
|
||||
loss_all = []
|
||||
loss_1_all = []
|
||||
dist_mat = torch.zeros(args.num_layers, args.len_seq).cuda()
|
||||
len_loader = np.inf
|
||||
for loader in train_loader_list:
|
||||
if len(loader) < len_loader:
|
||||
len_loader = len(loader)
|
||||
for data_all in tqdm(zip(*train_loader_list), total=len_loader):
|
||||
optimizer.zero_grad()
|
||||
list_feat = []
|
||||
list_label = []
|
||||
for data in data_all:
|
||||
feature, label, label_reg = data[0].cuda().float(
|
||||
), data[1].cuda().long(), data[2].cuda().float()
|
||||
list_feat.append(feature)
|
||||
list_label.append(label_reg)
|
||||
flag = False
|
||||
index = get_index(len(data_all) - 1)
|
||||
for temp_index in index:
|
||||
s1 = temp_index[0]
|
||||
s2 = temp_index[1]
|
||||
if list_feat[s1].shape[0] != list_feat[s2].shape[0]:
|
||||
flag = True
|
||||
break
|
||||
if flag:
|
||||
continue
|
||||
|
||||
total_loss = torch.zeros(1).cuda()
|
||||
for i in range(len(index)):
|
||||
feature_s = list_feat[index[i][0]]
|
||||
feature_t = list_feat[index[i][1]]
|
||||
label_reg_s = list_label[index[i][0]]
|
||||
label_reg_t = list_label[index[i][1]]
|
||||
feature_all = torch.cat((feature_s, feature_t), 0)
|
||||
|
||||
if epoch < args.pre_epoch:
|
||||
pred_all, loss_transfer, out_weight_list = model.forward_pre_train(
|
||||
feature_all, len_win=args.len_win)
|
||||
else:
|
||||
pred_all, loss_transfer, dist, weight_mat = model.forward_Boosting(
|
||||
feature_all, weight_mat)
|
||||
dist_mat = dist_mat + dist
|
||||
pred_s = pred_all[0:feature_s.size(0)]
|
||||
pred_t = pred_all[feature_s.size(0):]
|
||||
|
||||
loss_s = criterion(pred_s, label_reg_s)
|
||||
loss_t = criterion(pred_t, label_reg_t)
|
||||
loss_l1 = criterion_1(pred_s, label_reg_s)
|
||||
|
||||
total_loss = total_loss + loss_s + loss_t + args.dw * loss_transfer
|
||||
loss_all.append(
|
||||
[total_loss.item(), (loss_s + loss_t).item(), loss_transfer.item()])
|
||||
loss_1_all.append(loss_l1.item())
|
||||
optimizer.zero_grad()
|
||||
total_loss.backward()
|
||||
torch.nn.utils.clip_grad_value_(model.parameters(), 3.)
|
||||
optimizer.step()
|
||||
loss = np.array(loss_all).mean(axis=0)
|
||||
loss_l1 = np.array(loss_1_all).mean()
|
||||
if epoch >= args.pre_epoch:
|
||||
if epoch > args.pre_epoch:
|
||||
weight_mat = model.update_weight_Boosting(
|
||||
weight_mat, dist_old, dist_mat)
|
||||
return loss, loss_l1, weight_mat, dist_mat
|
||||
else:
|
||||
weight_mat = transform_type(out_weight_list)
|
||||
return loss, loss_l1, weight_mat, None
|
||||
|
||||
|
||||
def train_epoch_transfer_Boosting(model, optimizer, train_loader_list, epoch, dist_old=None, weight_mat=None):
|
||||
model.train()
|
||||
criterion = nn.MSELoss()
|
||||
criterion_1 = nn.L1Loss()
|
||||
loss_all = []
|
||||
loss_1_all = []
|
||||
dist_mat = torch.zeros(args.num_layers, args.len_seq).cuda()
|
||||
len_loader = np.inf
|
||||
for loader in train_loader_list:
|
||||
if len(loader) < len_loader:
|
||||
len_loader = len(loader)
|
||||
for data_all in tqdm(zip(*train_loader_list), total=len_loader):
|
||||
optimizer.zero_grad()
|
||||
list_feat = []
|
||||
list_label = []
|
||||
for data in data_all:
|
||||
feature, label, label_reg = data[0].cuda().float(
|
||||
), data[1].cuda().long(), data[2].cuda().float()
|
||||
list_feat.append(feature)
|
||||
list_label.append(label_reg)
|
||||
flag = False
|
||||
index = get_index(len(data_all) - 1)
|
||||
for temp_index in index:
|
||||
s1 = temp_index[0]
|
||||
s2 = temp_index[1]
|
||||
if list_feat[s1].shape[0] != list_feat[s2].shape[0]:
|
||||
flag = True
|
||||
break
|
||||
if flag:
|
||||
continue
|
||||
|
||||
total_loss = torch.zeros(1).cuda()
|
||||
for i in range(len(index)):
|
||||
feature_s = list_feat[index[i][0]]
|
||||
feature_t = list_feat[index[i][1]]
|
||||
label_reg_s = list_label[index[i][0]]
|
||||
label_reg_t = list_label[index[i][1]]
|
||||
feature_all = torch.cat((feature_s, feature_t), 0)
|
||||
|
||||
pred_all, loss_transfer, dist, weight_mat = model.forward_Boosting(
|
||||
feature_all, weight_mat)
|
||||
dist_mat = dist_mat + dist
|
||||
pred_s = pred_all[0:feature_s.size(0)]
|
||||
pred_t = pred_all[feature_s.size(0):]
|
||||
|
||||
loss_s = criterion(pred_s, label_reg_s)
|
||||
loss_t = criterion(pred_t, label_reg_t)
|
||||
loss_l1 = criterion_1(pred_s, label_reg_s)
|
||||
|
||||
total_loss = total_loss + loss_s + loss_t + args.dw * loss_transfer
|
||||
|
||||
loss_all.append(
|
||||
[total_loss.item(), (loss_s + loss_t).item(), loss_transfer.item()])
|
||||
loss_1_all.append(loss_l1.item())
|
||||
optimizer.zero_grad()
|
||||
total_loss.backward()
|
||||
torch.nn.utils.clip_grad_value_(model.parameters(), 3.)
|
||||
optimizer.step()
|
||||
loss = np.array(loss_all).mean(axis=0)
|
||||
loss_l1 = np.array(loss_1_all).mean()
|
||||
if epoch > 0: # args.pre_epoch:
|
||||
weight_mat = model.update_weight_Boosting(
|
||||
weight_mat, dist_old, dist_mat)
|
||||
return loss, loss_l1, weight_mat, dist_mat
|
||||
|
||||
|
||||
def get_index(num_domain=2):
|
||||
index = []
|
||||
for i in range(num_domain):
|
||||
for j in range(i + 1, num_domain + 1):
|
||||
index.append((i, j))
|
||||
return index
|
||||
|
||||
|
||||
def train_epoch_transfer(args, model, optimizer, train_loader_list):
|
||||
model.train()
|
||||
criterion = nn.MSELoss()
|
||||
criterion_1 = nn.L1Loss()
|
||||
loss_all = []
|
||||
loss_1_all = []
|
||||
len_loader = np.inf
|
||||
for loader in train_loader_list:
|
||||
if len(loader) < len_loader:
|
||||
len_loader = len(loader)
|
||||
|
||||
for data_all in tqdm(zip(*train_loader_list), total=len_loader):
|
||||
optimizer.zero_grad()
|
||||
list_feat = []
|
||||
list_label = []
|
||||
for data in data_all:
|
||||
feature, label, label_reg = data[0].cuda().float(
|
||||
), data[1].cuda().long(), data[2].cuda().float()
|
||||
list_feat.append(feature)
|
||||
list_label.append(label_reg)
|
||||
flag = False
|
||||
index = get_index(len(data_all) - 1)
|
||||
for temp_index in index:
|
||||
s1 = temp_index[0]
|
||||
s2 = temp_index[1]
|
||||
if list_feat[s1].shape[0] != list_feat[s2].shape[0]:
|
||||
flag = True
|
||||
break
|
||||
if flag:
|
||||
continue
|
||||
|
||||
###############
|
||||
total_loss = torch.zeros(1).cuda()
|
||||
for i in range(len(index)):
|
||||
feature_s = list_feat[index[i][0]]
|
||||
feature_t = list_feat[index[i][1]]
|
||||
label_reg_s = list_label[index[i][0]]
|
||||
label_reg_t = list_label[index[i][1]]
|
||||
feature_all = torch.cat((feature_s, feature_t), 0)
|
||||
|
||||
pred_all, loss_transfer, out_weight_list = model.forward_pre_train(
|
||||
feature_all, len_win=args.len_win)
|
||||
pred_s = pred_all[0:feature_s.size(0)]
|
||||
pred_t = pred_all[feature_s.size(0):]
|
||||
|
||||
loss_s = criterion(pred_s, label_reg_s)
|
||||
loss_t = criterion(pred_t, label_reg_t)
|
||||
loss_l1 = criterion_1(pred_s, label_reg_s)
|
||||
|
||||
total_loss = total_loss + loss_s + loss_t + args.dw * loss_transfer
|
||||
loss_all.append(
|
||||
[total_loss.item(), (loss_s + loss_t).item(), loss_transfer.item()])
|
||||
loss_1_all.append(loss_l1.item())
|
||||
optimizer.zero_grad()
|
||||
total_loss.backward()
|
||||
torch.nn.utils.clip_grad_value_(model.parameters(), 3.)
|
||||
optimizer.step()
|
||||
loss = np.array(loss_all).mean(axis=0)
|
||||
loss_l1 = np.array(loss_1_all).mean()
|
||||
return loss, loss_l1, out_weight_list
|
||||
|
||||
|
||||
def count_parameters(model):
|
||||
return sum(p.numel() for p in model.parameters() if p.requires_grad)
|
||||
|
||||
|
||||
def test_epoch(model, test_loader, prefix='Test'):
|
||||
model.eval()
|
||||
total_loss = 0
|
||||
total_loss_1 = 0
|
||||
total_loss_r = 0
|
||||
correct = 0
|
||||
criterion = nn.MSELoss()
|
||||
criterion_1 = nn.L1Loss()
|
||||
for feature, label, label_reg in tqdm(test_loader, desc=prefix, total=len(test_loader)):
|
||||
feature, label_reg = feature.cuda().float(), label_reg.cuda().float()
|
||||
with torch.no_grad():
|
||||
pred = model.predict(feature)
|
||||
loss = criterion(pred, label_reg)
|
||||
loss_r = torch.sqrt(loss)
|
||||
loss_1 = criterion_1(pred, label_reg)
|
||||
total_loss += loss.item()
|
||||
total_loss_1 += loss_1.item()
|
||||
total_loss_r += loss_r.item()
|
||||
loss = total_loss / len(test_loader)
|
||||
loss_1 = total_loss_1 / len(test_loader)
|
||||
loss_r = loss_r / len(test_loader)
|
||||
return loss, loss_1, loss_r
|
||||
|
||||
|
||||
def test_epoch_inference(model, test_loader, prefix='Test'):
|
||||
model.eval()
|
||||
total_loss = 0
|
||||
total_loss_1 = 0
|
||||
total_loss_r = 0
|
||||
correct = 0
|
||||
criterion = nn.MSELoss()
|
||||
criterion_1 = nn.L1Loss()
|
||||
i = 0
|
||||
for feature, label, label_reg in tqdm(test_loader, desc=prefix, total=len(test_loader)):
|
||||
feature, label_reg = feature.cuda().float(), label_reg.cuda().float()
|
||||
with torch.no_grad():
|
||||
pred = model.predict(feature)
|
||||
loss = criterion(pred, label_reg)
|
||||
loss_r = torch.sqrt(loss)
|
||||
loss_1 = criterion_1(pred, label_reg)
|
||||
total_loss += loss.item()
|
||||
total_loss_1 += loss_1.item()
|
||||
total_loss_r += loss_r.item()
|
||||
if i == 0:
|
||||
label_list = label_reg.cpu().numpy()
|
||||
predict_list = pred.cpu().numpy()
|
||||
else:
|
||||
label_list = np.hstack((label_list, label_reg.cpu().numpy()))
|
||||
predict_list = np.hstack((predict_list, pred.cpu().numpy()))
|
||||
|
||||
i = i + 1
|
||||
loss = total_loss / len(test_loader)
|
||||
loss_1 = total_loss_1 / len(test_loader)
|
||||
loss_r = total_loss_r / len(test_loader)
|
||||
return loss, loss_1, loss_r, label_list, predict_list
|
||||
|
||||
|
||||
def inference(model, data_loader):
|
||||
loss, loss_1, loss_r, label_list, predict_list = test_epoch_inference(
|
||||
model, data_loader, prefix='Inference')
|
||||
return loss, loss_1, loss_r, label_list, predict_list
|
||||
|
||||
|
||||
def inference_all(output_path, model, model_path, loaders):
|
||||
pprint('inference...')
|
||||
loss_list = []
|
||||
loss_l1_list = []
|
||||
loss_r_list = []
|
||||
model.load_state_dict(torch.load(model_path))
|
||||
i = 0
|
||||
list_name = ['train', 'valid', 'test']
|
||||
for loader in loaders:
|
||||
loss, loss_1, loss_r, label_list, predict_list = inference(
|
||||
model, loader)
|
||||
loss_list.append(loss)
|
||||
loss_l1_list.append(loss_1)
|
||||
loss_r_list.append(loss_r)
|
||||
i = i + 1
|
||||
return loss_list, loss_l1_list, loss_r_list
|
||||
|
||||
|
||||
def transform_type(init_weight):
|
||||
weight = torch.ones(args.num_layers, args.len_seq).cuda()
|
||||
for i in range(args.num_layers):
|
||||
for j in range(args.len_seq):
|
||||
weight[i, j] = init_weight[i][j].item()
|
||||
return weight
|
||||
|
||||
|
||||
def main_transfer(args):
|
||||
print(args)
|
||||
|
||||
output_path = args.outdir + '_' + args.station + '_' + args.model_name + '_weather_' + \
|
||||
args.loss_type + '_' + str(args.pre_epoch) + \
|
||||
'_' + str(args.dw) + '_' + str(args.lr)
|
||||
save_model_name = args.model_name + '_' + args.loss_type + \
|
||||
'_' + str(args.dw) + '_' + str(args.lr) + '.pkl'
|
||||
utils.dir_exist(output_path)
|
||||
pprint('create loaders...')
|
||||
|
||||
train_loader_list, valid_loader, test_loader = data_process.load_weather_data_multi_domain(
|
||||
args.data_path, args.batch_size, args.station, args.num_domain, args.data_mode)
|
||||
|
||||
args.log_file = os.path.join(output_path, 'run.log')
|
||||
pprint('create model...')
|
||||
model = get_model(args.model_name)
|
||||
num_model = count_parameters(model)
|
||||
print('#model params:', num_model)
|
||||
|
||||
optimizer = optim.Adam(model.parameters(), lr=args.lr)
|
||||
|
||||
best_score = np.inf
|
||||
best_epoch, stop_round = 0, 0
|
||||
weight_mat, dist_mat = None, None
|
||||
|
||||
for epoch in range(args.n_epochs):
|
||||
pprint('Epoch:', epoch)
|
||||
pprint('training...')
|
||||
if args.model_name in ['Boosting']:
|
||||
loss, loss1, weight_mat, dist_mat = train_epoch_transfer_Boosting(
|
||||
model, optimizer, train_loader_list, epoch, dist_mat, weight_mat)
|
||||
elif args.model_name in ['AdaRNN']:
|
||||
loss, loss1, weight_mat, dist_mat = train_AdaRNN(
|
||||
args, model, optimizer, train_loader_list, epoch, dist_mat, weight_mat)
|
||||
else:
|
||||
print("error in model_name!")
|
||||
pprint(loss, loss1)
|
||||
|
||||
pprint('evaluating...')
|
||||
train_loss, train_loss_l1, train_loss_r = test_epoch(
|
||||
model, train_loader_list[0], prefix='Train')
|
||||
val_loss, val_loss_l1, val_loss_r = test_epoch(
|
||||
model, valid_loader, prefix='Valid')
|
||||
test_loss, test_loss_l1, test_loss_r = test_epoch(
|
||||
model, test_loader, prefix='Test')
|
||||
|
||||
pprint('valid %.6f, test %.6f' %
|
||||
(val_loss_l1, test_loss_l1))
|
||||
|
||||
if val_loss < best_score:
|
||||
best_score = val_loss
|
||||
stop_round = 0
|
||||
best_epoch = epoch
|
||||
torch.save(model.state_dict(), os.path.join(
|
||||
output_path, save_model_name))
|
||||
else:
|
||||
stop_round += 1
|
||||
if stop_round >= args.early_stop:
|
||||
pprint('early stop')
|
||||
break
|
||||
|
||||
pprint('best val score:', best_score, '@', best_epoch)
|
||||
|
||||
loaders = train_loader_list[0], valid_loader, test_loader
|
||||
loss_list, loss_l1_list, loss_r_list = inference_all(output_path, model, os.path.join(
|
||||
output_path, save_model_name), loaders)
|
||||
pprint('MSE: train %.6f, valid %.6f, test %.6f' %
|
||||
(loss_list[0], loss_list[1], loss_list[2]))
|
||||
pprint('L1: train %.6f, valid %.6f, test %.6f' %
|
||||
(loss_l1_list[0], loss_l1_list[1], loss_l1_list[2]))
|
||||
pprint('RMSE: train %.6f, valid %.6f, test %.6f' %
|
||||
(loss_r_list[0], loss_r_list[1], loss_r_list[2]))
|
||||
pprint('Finished.')
|
||||
|
||||
|
||||
def get_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
# model
|
||||
parser.add_argument('--model_name', default='AdaRNN')
|
||||
parser.add_argument('--d_feat', type=int, default=6)
|
||||
|
||||
parser.add_argument('--hidden_size', type=int, default=64)
|
||||
parser.add_argument('--num_layers', type=int, default=2)
|
||||
parser.add_argument('--dropout', type=float, default=0.0)
|
||||
parser.add_argument('--class_num', type=int, default=1)
|
||||
parser.add_argument('--pre_epoch', type=int, default=40) # 20, 30, 50
|
||||
|
||||
# training
|
||||
parser.add_argument('--n_epochs', type=int, default=200)
|
||||
parser.add_argument('--lr', type=float, default=5e-4)
|
||||
parser.add_argument('--early_stop', type=int, default=40)
|
||||
parser.add_argument('--smooth_steps', type=int, default=5)
|
||||
parser.add_argument('--batch_size', type=int, default=36)
|
||||
parser.add_argument('--dw', type=float, default=0.5) # 0.01, 0.05, 5.0
|
||||
parser.add_argument('--loss_type', type=str, default='adv')
|
||||
parser.add_argument('--station', type=str, default='Dongsi')
|
||||
parser.add_argument('--data_mode', type=str,
|
||||
default='tdc')
|
||||
parser.add_argument('--num_domain', type=int, default=2)
|
||||
parser.add_argument('--len_seq', type=int, default=24)
|
||||
|
||||
# other
|
||||
parser.add_argument('--seed', type=int, default=10)
|
||||
parser.add_argument('--data_path', default="/root/Messi_du/adarnn/")
|
||||
parser.add_argument('--outdir', default='./outputs')
|
||||
parser.add_argument('--overwrite', action='store_true')
|
||||
parser.add_argument('--log_file', type=str, default='run.log')
|
||||
parser.add_argument('--gpu_id', type=int, default=0)
|
||||
parser.add_argument('--len_win', type=int, default=0)
|
||||
args = parser.parse_args()
|
||||
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = get_args()
|
||||
np.random.seed(args.seed)
|
||||
torch.manual_seed(args.seed)
|
||||
torch.cuda.manual_seed_all(args.seed)
|
||||
torch.backends.cudnn.deterministic = True
|
||||
torch.backends.cudnn.benchmark = False
|
||||
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
|
||||
main_transfer(args)
|
||||
Loading…
Reference in New Issue