更新adaRNN
This commit is contained in:
parent
14b7cb72a6
commit
1d7bfec0f8
|
|
@ -0,0 +1,361 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
from loss_transfer import TransferLoss
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import tensorflow as tf
|
||||||
|
|
||||||
|
|
||||||
|
'''
|
||||||
|
参考:
|
||||||
|
[1] https://arxiv.org/pdf/2108.04443.pdf
|
||||||
|
[2] https://github.com/kevinding1125/transferlearning/blob/master/code/deep/adarnn/base/AdaRNN.py
|
||||||
|
'''
|
||||||
|
|
||||||
|
|
||||||
|
class AdaRNN(nn.Module):
|
||||||
|
"""
|
||||||
|
model_type: 'Boosting', 'AdaRNN'
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, use_bottleneck=False, bottleneck_width=256, n_input=128, n_hiddens=[64, 64], n_output=6, dropout=0.0, len_seq=9, model_type='AdaRNN', trans_loss='mmd'):
|
||||||
|
super(AdaRNN, self).__init__()
|
||||||
|
self.use_bottleneck = use_bottleneck
|
||||||
|
self.n_input = n_input
|
||||||
|
self.num_layers = len(n_hiddens)
|
||||||
|
self.hiddens = n_hiddens
|
||||||
|
self.n_output = n_output
|
||||||
|
self.model_type = model_type
|
||||||
|
self.trans_loss = trans_loss
|
||||||
|
self.len_seq = len_seq
|
||||||
|
in_size = self.n_input
|
||||||
|
|
||||||
|
features = nn.ModuleList()
|
||||||
|
for hidden in n_hiddens:
|
||||||
|
rnn = nn.GRU(
|
||||||
|
input_size=in_size,
|
||||||
|
num_layers=1,
|
||||||
|
hidden_size=hidden,
|
||||||
|
batch_first=True,
|
||||||
|
dropout=dropout
|
||||||
|
)
|
||||||
|
features.append(rnn)
|
||||||
|
in_size = hidden
|
||||||
|
self.features = nn.Sequential(*features)
|
||||||
|
|
||||||
|
if use_bottleneck == True: # finance
|
||||||
|
self.bottleneck = nn.Sequential(
|
||||||
|
nn.Linear(n_hiddens[-1], bottleneck_width),
|
||||||
|
nn.Linear(bottleneck_width, bottleneck_width),
|
||||||
|
nn.BatchNorm1d(bottleneck_width),
|
||||||
|
nn.ReLU(),
|
||||||
|
nn.Dropout(),
|
||||||
|
)
|
||||||
|
self.bottleneck[0].weight.data.normal_(0, 0.005)
|
||||||
|
self.bottleneck[0].bias.data.fill_(0.1)
|
||||||
|
self.bottleneck[1].weight.data.normal_(0, 0.005)
|
||||||
|
self.bottleneck[1].bias.data.fill_(0.1)
|
||||||
|
self.fc = nn.Linear(bottleneck_width, n_output)
|
||||||
|
torch.nn.init.xavier_normal_(self.fc.weight)
|
||||||
|
else:
|
||||||
|
self.fc_out = nn.Linear(n_hiddens[-1], self.n_output)
|
||||||
|
|
||||||
|
if self.model_type == 'AdaRNN':
|
||||||
|
gate = nn.ModuleList()
|
||||||
|
for i in range(len(n_hiddens)):
|
||||||
|
gate_weight = nn.Linear(
|
||||||
|
len_seq * self.hiddens[i]*2, len_seq)
|
||||||
|
gate.append(gate_weight)
|
||||||
|
self.gate = gate
|
||||||
|
|
||||||
|
bnlst = nn.ModuleList()
|
||||||
|
for i in range(len(n_hiddens)):
|
||||||
|
bnlst.append(nn.BatchNorm1d(len_seq))
|
||||||
|
self.bn_lst = bnlst
|
||||||
|
self.softmax = torch.nn.Softmax(dim=0)
|
||||||
|
self.init_layers()
|
||||||
|
|
||||||
|
def init_layers(self):
|
||||||
|
for i in range(len(self.hiddens)):
|
||||||
|
self.gate[i].weight.data.normal_(0, 0.05)
|
||||||
|
self.gate[i].bias.data.fill_(0.0)
|
||||||
|
|
||||||
|
def forward_pre_train(self, x, len_win=0):
|
||||||
|
out = self.gru_features(x)
|
||||||
|
fea = out[0]
|
||||||
|
if self.use_bottleneck == True:
|
||||||
|
fea_bottleneck = self.bottleneck(fea[:, -1, :])
|
||||||
|
fc_out = self.fc(fea_bottleneck).squeeze()
|
||||||
|
else:
|
||||||
|
fc_out = self.fc_out(fea[:, -1, :]).squeeze()
|
||||||
|
|
||||||
|
out_list_all, out_weight_list = out[1], out[2]
|
||||||
|
out_list_s, out_list_t = self.get_features(out_list_all)
|
||||||
|
loss_transfer = torch.zeros((1,)).cuda()
|
||||||
|
for i in range(len(out_list_s)):
|
||||||
|
criterion_transder = TransferLoss(
|
||||||
|
loss_type=self.trans_loss, input_dim=out_list_s[i].shape[2])
|
||||||
|
h_start = 0
|
||||||
|
for j in range(h_start, self.len_seq, 1):
|
||||||
|
i_start = j - len_win if j - len_win >= 0 else 0
|
||||||
|
i_end = j + len_win if j + len_win < self.len_seq else self.len_seq - 1
|
||||||
|
for k in range(i_start, i_end + 1):
|
||||||
|
weight = out_weight_list[i][j] if self.model_type == 'AdaRNN' else 1 / (
|
||||||
|
self.len_seq - h_start) * (2 * len_win + 1)
|
||||||
|
loss_transfer = loss_transfer + weight * criterion_transder.compute(
|
||||||
|
out_list_s[i][:, j, :], out_list_t[i][:, k, :])
|
||||||
|
return fc_out, loss_transfer, out_weight_list
|
||||||
|
|
||||||
|
def gru_features(self, x, predict=False):
|
||||||
|
x_input = x
|
||||||
|
out = None
|
||||||
|
out_lis = []
|
||||||
|
out_weight_list = [] if (
|
||||||
|
self.model_type == 'AdaRNN') else None
|
||||||
|
for i in range(self.num_layers):
|
||||||
|
out, _ = self.features[i](x_input.float())
|
||||||
|
x_input = out
|
||||||
|
out_lis.append(out)
|
||||||
|
if self.model_type == 'AdaRNN' and predict == False:
|
||||||
|
out_gate = self.process_gate_weight(x_input, i)
|
||||||
|
out_weight_list.append(out_gate)
|
||||||
|
return out, out_lis, out_weight_list
|
||||||
|
|
||||||
|
def process_gate_weight(self, out, index):
|
||||||
|
x_s = out[0: int(out.shape[0]//2)]
|
||||||
|
x_t = out[out.shape[0]//2: out.shape[0]]
|
||||||
|
x_all = torch.cat((x_s, x_t), 2)
|
||||||
|
x_all = x_all.view(x_all.shape[0], -1)
|
||||||
|
weight = torch.sigmoid(self.bn_lst[index](
|
||||||
|
self.gate[index](x_all.float())))
|
||||||
|
weight = torch.mean(weight, dim=0)
|
||||||
|
res = self.softmax(weight).squeeze()
|
||||||
|
return res
|
||||||
|
|
||||||
|
def get_features(self, output_list):
|
||||||
|
fea_list_src, fea_list_tar = [], []
|
||||||
|
for fea in output_list:
|
||||||
|
fea_list_src.append(fea[0: fea.size(0) // 2])
|
||||||
|
fea_list_tar.append(fea[fea.size(0) // 2:])
|
||||||
|
return fea_list_src, fea_list_tar
|
||||||
|
|
||||||
|
# For Boosting-based
|
||||||
|
def forward_Boosting(self, x, weight_mat=None):
|
||||||
|
out = self.gru_features(x)
|
||||||
|
fea = out[0]
|
||||||
|
if self.use_bottleneck:
|
||||||
|
fea_bottleneck = self.bottleneck(fea[:, -1, :])
|
||||||
|
fc_out = self.fc(fea_bottleneck).squeeze()
|
||||||
|
else:
|
||||||
|
fc_out = self.fc_out(fea[:, -1, :]).squeeze()
|
||||||
|
|
||||||
|
out_list_all = out[1]
|
||||||
|
out_list_s, out_list_t = self.get_features(out_list_all)
|
||||||
|
loss_transfer = torch.zeros((1,)).cuda()
|
||||||
|
if weight_mat is None:
|
||||||
|
weight = (1.0 / self.len_seq *
|
||||||
|
torch.ones(self.num_layers, self.len_seq)).cuda()
|
||||||
|
else:
|
||||||
|
weight = weight_mat
|
||||||
|
dist_mat = torch.zeros(self.num_layers, self.len_seq).cuda()
|
||||||
|
for i in range(len(out_list_s)):
|
||||||
|
criterion_transder = TransferLoss(
|
||||||
|
loss_type=self.trans_loss, input_dim=out_list_s[i].shape[2])
|
||||||
|
for j in range(self.len_seq):
|
||||||
|
loss_trans = criterion_transder.compute(
|
||||||
|
out_list_s[i][:, j, :], out_list_t[i][:, j, :])
|
||||||
|
loss_transfer = loss_transfer + weight[i, j] * loss_trans
|
||||||
|
dist_mat[i, j] = loss_trans
|
||||||
|
return fc_out, loss_transfer, dist_mat, weight
|
||||||
|
|
||||||
|
# For Boosting-based
|
||||||
|
def update_weight_Boosting(self, weight_mat, dist_old, dist_new):
|
||||||
|
epsilon = 1e-12
|
||||||
|
dist_old = dist_old.detach()
|
||||||
|
dist_new = dist_new.detach()
|
||||||
|
ind = dist_new > dist_old + epsilon
|
||||||
|
weight_mat[ind] = weight_mat[ind] * \
|
||||||
|
(1 + torch.sigmoid(dist_new[ind] - dist_old[ind]))
|
||||||
|
weight_norm = torch.norm(weight_mat, dim=1, p=1)
|
||||||
|
weight_mat = weight_mat / weight_norm.t().unsqueeze(1).repeat(1, self.len_seq)
|
||||||
|
return weight_mat
|
||||||
|
|
||||||
|
def predict(self, x):
|
||||||
|
out = self.gru_features(x, predict=True)
|
||||||
|
fea = out[0]
|
||||||
|
if self.use_bottleneck == True:
|
||||||
|
fea_bottleneck = self.bottleneck(fea[:, -1, :])
|
||||||
|
fc_out = self.fc(fea_bottleneck).squeeze()
|
||||||
|
else:
|
||||||
|
fc_out = self.fc_out(fea[:, -1, :]).squeeze()
|
||||||
|
return fc_out
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class AdaRNN_tensorflow(tf.keras.Model):
|
||||||
|
def __init__(self, use_bottleneck=False, bottleneck_width=256, n_input=128, n_hiddens=[64, 64], n_output=6, dropout=0.0, len_seq=9, model_type='AdaRNN', trans_loss='mmd'):
|
||||||
|
super(AdaRNN, self).__init__()
|
||||||
|
self.use_bottleneck = use_bottleneck
|
||||||
|
self.n_input = n_input
|
||||||
|
self.num_layers = len(n_hiddens)
|
||||||
|
self.hiddens = n_hiddens
|
||||||
|
self.n_output = n_output
|
||||||
|
self.model_type = model_type
|
||||||
|
self.trans_loss = trans_loss
|
||||||
|
self.len_seq = len_seq
|
||||||
|
|
||||||
|
self.features = tf.keras.Sequential()
|
||||||
|
for hidden in n_hiddens:
|
||||||
|
rnn = tf.keras.layers.GRU(
|
||||||
|
units=hidden,
|
||||||
|
return_sequences=True,
|
||||||
|
dropout=dropout
|
||||||
|
)
|
||||||
|
self.features.add(rnn)
|
||||||
|
|
||||||
|
if use_bottleneck == True:
|
||||||
|
self.bottleneck = tf.keras.Sequential([
|
||||||
|
tf.keras.layers.Dense(bottleneck_width),
|
||||||
|
tf.keras.layers.Dense(bottleneck_width),
|
||||||
|
tf.keras.layers.BatchNormalization(),
|
||||||
|
tf.keras.layers.ReLU(),
|
||||||
|
tf.keras.layers.Dropout(dropout)
|
||||||
|
])
|
||||||
|
self.fc = tf.keras.layers.Dense(n_output, activation=None)
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.fc_out = tf.keras.layers.Dense(n_output, activation=None)
|
||||||
|
|
||||||
|
if self.model_type == 'AdaRNN':
|
||||||
|
self.gate = []
|
||||||
|
for _ in range(len(n_hiddens)):
|
||||||
|
gate_weight = tf.keras.layers.Dense(len_seq, activation=None)
|
||||||
|
self.gate.append(gate_weight)
|
||||||
|
self.bn_lst = [tf.keras.layers.BatchNormalization() for _ in range(len(n_hiddens))]
|
||||||
|
self.softmax = tf.keras.layers.Softmax(axis=0)
|
||||||
|
# self.init_layers() # 省去了很多初始化相关的工作
|
||||||
|
|
||||||
|
# def init_layers(self):
|
||||||
|
# for gate_layer in self.gate:
|
||||||
|
# gate_layer.build((None, self.len_seq * self.hiddens[i] * 2))
|
||||||
|
|
||||||
|
def forward_pre_train(self, x, len_win=0):
|
||||||
|
# 两层GRU之后的结果,每层GRU之后的结果,每层GRU前后权重归一化之后的结果
|
||||||
|
out, out_list_all, out_weight_list = self.gru_features(x)
|
||||||
|
fea =out
|
||||||
|
if self.use_bottleneck == True:
|
||||||
|
fea_bottleneck = self.bottleneck(fea[:, -1, :])
|
||||||
|
fc_out = self.fc(fea_bottleneck).squeeze()
|
||||||
|
else:
|
||||||
|
fc_out = self.fc_out(fea[:, -1, :]).squeeze()
|
||||||
|
|
||||||
|
|
||||||
|
out_list_s, out_list_t = self.get_features(out_list_all)
|
||||||
|
|
||||||
|
loss_transfer = tf.zeros((1,))
|
||||||
|
for i in range(len(out_list_s)):
|
||||||
|
criterion_transder = TransferLoss(
|
||||||
|
loss_type=self.trans_loss, input_dim=out_list_s[i].shape[2])
|
||||||
|
h_start = 0
|
||||||
|
for j in range(h_start, self.len_seq, 1):
|
||||||
|
i_start = j - len_win if j - len_win >= 0 else 0
|
||||||
|
i_end = j + len_win if j + len_win < self.len_seq else self.len_seq - 1
|
||||||
|
for k in range(i_start, i_end + 1):
|
||||||
|
weight = out_weight_list[i][j] if self.model_type == 'AdaRNN' else 1 / (
|
||||||
|
self.len_seq - h_start) * (2 * len_win + 1)
|
||||||
|
loss_transfer = loss_transfer + weight * criterion_transder.compute(
|
||||||
|
out_list_s[i][:, j, :], out_list_t[i][:, k, :])
|
||||||
|
return fc_out, loss_transfer, out_weight_list
|
||||||
|
|
||||||
|
def call(self, x, len_win=0, training=False):
|
||||||
|
# 两层GRU之后的结果,每层GRU之后的结果,每层GRU前后权重归一化之后的结果
|
||||||
|
out, out_list_all, out_weight_list = self.gru_features(x, training=training)
|
||||||
|
fea = out
|
||||||
|
if self.use_bottleneck == True:
|
||||||
|
fea_bottleneck = self.bottleneck(fea[:, -1, :])
|
||||||
|
fc_out = self.fc(fea_bottleneck)
|
||||||
|
else:
|
||||||
|
fc_out = self.fc_out(fea[:, -1, :])
|
||||||
|
|
||||||
|
loss_transfer = tf.zeros((1,))
|
||||||
|
for i in range(len(out_list_all)):
|
||||||
|
criterion_transder = TransferLoss(
|
||||||
|
loss_type=self.trans_loss, input_dim=out_list_all[i].shape[2])
|
||||||
|
h_start = 0
|
||||||
|
for j in range(h_start, self.len_seq, 1):
|
||||||
|
i_start = j - len_win if j - len_win >= 0 else 0
|
||||||
|
i_end = j + len_win if j + len_win < self.len_seq else self.len_seq - 1
|
||||||
|
for k in range(i_start, i_end + 1):
|
||||||
|
weight = out_weight_list[i][j] if self.model_type == 'AdaRNN' else 1 / (
|
||||||
|
self.len_seq - h_start) * (2 * len_win + 1)
|
||||||
|
loss_transfer = loss_transfer + weight * criterion_transder.compute(
|
||||||
|
out_list_all[i][:, j, :], out_list_all[i][:, k, :])
|
||||||
|
return fc_out, loss_transfer, out_weight_list
|
||||||
|
|
||||||
|
def gru_features(self, x, training=False):
|
||||||
|
x_input = x
|
||||||
|
out = None
|
||||||
|
out_lis = []
|
||||||
|
out_weight_list = [] if (
|
||||||
|
self.model_type == 'AdaRNN') else None
|
||||||
|
for i in range(self.num_layers):
|
||||||
|
out = self.features[i](x_input, training=training)
|
||||||
|
x_input = out
|
||||||
|
out_lis.append(out)
|
||||||
|
if self.model_type == 'AdaRNN':
|
||||||
|
out_gate = self.process_gate_weight(x_input, i, training=training)
|
||||||
|
out_weight_list.append(out_gate)
|
||||||
|
return out, out_lis, out_weight_list
|
||||||
|
|
||||||
|
def process_gate_weight(self, out, index, training=False):
|
||||||
|
x_s = out[:, :out.shape[1] // 2]# 可以理解为LSTM的前半段
|
||||||
|
x_t = out[:, out.shape[1] // 2:]# 可以理解为LSTM的后半段
|
||||||
|
x_all = tf.concat((x_s, x_t), 2)
|
||||||
|
x_all = tf.reshape(x_all, (x_all.shape[0], -1))
|
||||||
|
weight = tf.sigmoid(self.bn_lst[index](self.gate[index](x_all)), training=training)
|
||||||
|
weight = tf.reduce_mean(weight, axis=0)
|
||||||
|
res = self.softmax(weight)
|
||||||
|
return res
|
||||||
|
|
||||||
|
def get_features(self, output_list):
|
||||||
|
fea_list_src, fea_list_tar = [], []
|
||||||
|
for fea in output_list:
|
||||||
|
fea_list_src.append(fea[:, :fea.shape[1] // 2])
|
||||||
|
fea_list_tar.append(fea[:, fea.shape[1] // 2:])
|
||||||
|
return fea_list_src, fea_list_tar
|
||||||
|
|
||||||
|
def forward_Boosting(self, x, weight_mat=None):
|
||||||
|
out, out_list_all, _ = self.gru_features(x, training=False)
|
||||||
|
fea = out
|
||||||
|
if self.use_bottleneck:
|
||||||
|
fea_bottleneck = self.bottleneck(fea[:, -1, :])
|
||||||
|
fc_out = self.fc(fea_bottleneck)
|
||||||
|
else:
|
||||||
|
fc_out = self.fc_out(fea[:, -1, :])
|
||||||
|
|
||||||
|
out_list_all = out_list_all
|
||||||
|
out_list_s, out_list_t = self.get_features(out_list_all)
|
||||||
|
loss_transfer = tf.zeros((1,))
|
||||||
|
if weight_mat is None:
|
||||||
|
weight = (1.0 / self.len_seq *
|
||||||
|
tf.ones((self.num_layers, self.len_seq), dtype=tf.float32))
|
||||||
|
else:
|
||||||
|
weight = weight_mat
|
||||||
|
dist_mat = tf.zeros((self.num_layers, self.len_seq), dtype=tf.float32)
|
||||||
|
for i in range(len(out_list_s)):
|
||||||
|
criterion_transder = TransferLoss(
|
||||||
|
loss_type=self.trans_loss, input_dim=out_list_s[i].shape[2])
|
||||||
|
for j in range(self.len_seq):
|
||||||
|
loss_trans = criterion_transder.compute(
|
||||||
|
out_list_s[i][:, j, :], out_list_t[i][:, j, :])
|
||||||
|
loss_transfer = loss_transfer + weight[i, j] * loss_trans
|
||||||
|
dist_mat[i, j] = loss_trans
|
||||||
|
return fc_out, loss_transfer, dist_mat, weight
|
||||||
|
|
||||||
|
def update_weight_Boosting(self, weight_mat, dist_old, dist_new):
|
||||||
|
epsilon = 1e-12
|
||||||
|
dist_old = tf.stop_gradient(dist_old)
|
||||||
|
dist_new = tf.stop_gradient(dist_new)
|
||||||
|
ind = dist_new
|
||||||
|
|
@ -0,0 +1,108 @@
|
||||||
|
# encoding=utf-8
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
from torch.utils.data import Dataset, DataLoader
|
||||||
|
from torchvision import transforms
|
||||||
|
import os
|
||||||
|
# from config import config_info
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
|
||||||
|
|
||||||
|
# This is for parsing the X data, you can ignore it if you do not need preprocessing
|
||||||
|
def format_data_x(datafile):
|
||||||
|
x_data = None
|
||||||
|
for item in datafile:
|
||||||
|
item_data = np.loadtxt(item, dtype=np.float)
|
||||||
|
if x_data is None:
|
||||||
|
x_data = np.zeros((len(item_data), 1))
|
||||||
|
x_data = np.hstack((x_data, item_data))
|
||||||
|
x_data = x_data[:, 1:]
|
||||||
|
print(x_data.shape)
|
||||||
|
X = None
|
||||||
|
for i in range(len(x_data)):
|
||||||
|
row = np.asarray(x_data[i, :])
|
||||||
|
row = row.reshape(9, 128).T
|
||||||
|
if X is None:
|
||||||
|
X = np.zeros((len(x_data), 128, 9))
|
||||||
|
X[i] = row
|
||||||
|
print(X.shape)
|
||||||
|
return X
|
||||||
|
|
||||||
|
|
||||||
|
# This is for parsing the Y data, you can ignore it if you do not need preprocessing
|
||||||
|
def format_data_y(datafile):
|
||||||
|
data = np.loadtxt(datafile, dtype=np.int) - 1
|
||||||
|
YY = np.eye(6)[data]
|
||||||
|
return YY
|
||||||
|
|
||||||
|
|
||||||
|
# Load data function, if there exists parsed data file, then use it
|
||||||
|
# If not, parse the original dataset from scratch
|
||||||
|
def load_data(data_folder, domain):
|
||||||
|
import os
|
||||||
|
domain = '1_20' if domain == 'A' else '21_30'
|
||||||
|
data_file = os.path.join(data_folder, 'data_har_' + domain + '.npz')
|
||||||
|
if os.path.exists(data_file):
|
||||||
|
data = np.load(data_file)
|
||||||
|
X_train = data['X_train']
|
||||||
|
Y_train = data['Y_train']
|
||||||
|
X_test = data['X_test']
|
||||||
|
Y_test = data['Y_test']
|
||||||
|
else:
|
||||||
|
# This for processing the dataset from scratch
|
||||||
|
# After downloading the dataset, put it to somewhere that str_folder can find
|
||||||
|
str_folder = config_info['data_folder_raw'] + 'UCI HAR Dataset/'
|
||||||
|
INPUT_SIGNAL_TYPES = [
|
||||||
|
"body_acc_x_",
|
||||||
|
"body_acc_y_",
|
||||||
|
"body_acc_z_",
|
||||||
|
"body_gyro_x_",
|
||||||
|
"body_gyro_y_",
|
||||||
|
"body_gyro_z_",
|
||||||
|
"total_acc_x_",
|
||||||
|
"total_acc_y_",
|
||||||
|
"total_acc_z_"
|
||||||
|
]
|
||||||
|
|
||||||
|
str_train_files = [str_folder + 'train/' + 'Inertial Signals/' + item + 'train.txt' for item in
|
||||||
|
INPUT_SIGNAL_TYPES]
|
||||||
|
str_test_files = [str_folder + 'test/' + 'Inertial Signals/' +
|
||||||
|
item + 'test.txt' for item in INPUT_SIGNAL_TYPES]
|
||||||
|
str_train_y = str_folder + 'train/y_train.txt'
|
||||||
|
str_test_y = str_folder + 'test/y_test.txt'
|
||||||
|
|
||||||
|
X_train = format_data_x(str_train_files)
|
||||||
|
X_test = format_data_x(str_test_files)
|
||||||
|
Y_train = format_data_y(str_train_y)
|
||||||
|
Y_test = format_data_y(str_test_y)
|
||||||
|
|
||||||
|
return X_train, onehot_to_label(Y_train), X_test, onehot_to_label(Y_test)
|
||||||
|
|
||||||
|
|
||||||
|
def onehot_to_label(y_onehot):
|
||||||
|
a = np.argwhere(y_onehot == 1)
|
||||||
|
return a[:, -1]
|
||||||
|
|
||||||
|
|
||||||
|
class data_loader(Dataset):
|
||||||
|
def __init__(self, samples, labels, t):
|
||||||
|
self.samples = samples
|
||||||
|
self.labels = labels
|
||||||
|
self.T = t
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
sample, target = self.samples[index], self.labels[index]
|
||||||
|
if self.T:
|
||||||
|
return self.T(sample), target
|
||||||
|
else:
|
||||||
|
return sample, target
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.samples)
|
||||||
|
|
||||||
|
|
||||||
|
def normalize(x):
|
||||||
|
x_min = x.min(axis=(0, 2, 3), keepdims=True)
|
||||||
|
x_max = x.max(axis=(0, 2, 3), keepdims=True)
|
||||||
|
x_norm = (x - x_min) / (x_max - x_min)
|
||||||
|
return x_norm
|
||||||
|
|
@ -0,0 +1,139 @@
|
||||||
|
# encoding=utf-8
|
||||||
|
import os
|
||||||
|
import data_act as data_act
|
||||||
|
import pandas as pd
|
||||||
|
import data_weather as data_weather
|
||||||
|
import datetime
|
||||||
|
from ..loss_transfer import TransferLoss
|
||||||
|
import torch
|
||||||
|
import math
|
||||||
|
import data_process
|
||||||
|
|
||||||
|
|
||||||
|
def load_act_data(data_folder, batch_size=64, domain="1_20"):
|
||||||
|
x_train, y_train, x_test, y_test = data_act.load_data(data_folder, domain)
|
||||||
|
x_train, x_test = x_train.reshape(
|
||||||
|
(-1, x_train.shape[2], 1, x_train.shape[1])), x_test.reshape((-1, x_train.shape[2], 1, x_train.shape[1]))
|
||||||
|
transform = None
|
||||||
|
train_set = data_act.data_loader(x_train, y_train, transform)
|
||||||
|
test_set = data_act.data_loader(x_test, y_test, transform)
|
||||||
|
train_loader = data_act.DataLoader(
|
||||||
|
train_set, batch_size=batch_size, shuffle=True, drop_last=True)
|
||||||
|
test_loader = data_act.DataLoader(
|
||||||
|
test_set, batch_size=batch_size, shuffle=False)
|
||||||
|
return train_loader, train_loader, test_loader
|
||||||
|
|
||||||
|
|
||||||
|
def load_weather_data(file_path, batch_size=6, station='Changping'):
|
||||||
|
data_file = os.path.join(file_path, "PRSA_Data_1.pkl")
|
||||||
|
mean_train, std_train = data_weather.get_weather_data_statistic(data_file, station=station,
|
||||||
|
start_time='2013-3-1 0:0',
|
||||||
|
end_time='2016-10-30 23:0')
|
||||||
|
train_loader = data_weather.get_weather_data(data_file, station=station, start_time='2013-3-6 0:0',
|
||||||
|
end_time='2015-5-31 23:0', batch_size=batch_size, mean=mean_train,
|
||||||
|
std=std_train)
|
||||||
|
valid_train_loader = data_weather.get_weather_data(data_file, station=station, start_time='2015-6-2 0:0',
|
||||||
|
end_time='2016-6-30 23:0', batch_size=batch_size,
|
||||||
|
mean=mean_train, std=std_train)
|
||||||
|
valid_vld_loader = data_weather.get_weather_data(data_file, station=station, start_time='2016-7-2 0:0',
|
||||||
|
end_time='2016-10-30 23:0', batch_size=batch_size, mean=mean_train,
|
||||||
|
std=std_train)
|
||||||
|
test_loader = data_weather.get_weather_data(data_file, station=station, start_time='2016-11-2 0:0',
|
||||||
|
end_time='2017-2-28 23:0', batch_size=batch_size, mean=mean_train,
|
||||||
|
std=std_train)
|
||||||
|
return train_loader, valid_train_loader, valid_vld_loader, test_loader
|
||||||
|
|
||||||
|
|
||||||
|
def get_split_time(num_domain=2, mode='pre_process', data_file=None, station=None, dis_type='coral'):
|
||||||
|
spilt_time = {
|
||||||
|
'2': [('2013-3-6 0:0', '2015-5-31 23:0'), ('2015-6-2 0:0', '2016-6-30 23:0')]
|
||||||
|
}
|
||||||
|
if mode == 'pre_process':
|
||||||
|
return spilt_time[str(num_domain)]
|
||||||
|
if mode == 'tdc':
|
||||||
|
return TDC(num_domain, data_file, station, dis_type=dis_type)
|
||||||
|
else:
|
||||||
|
print("error in mode")
|
||||||
|
|
||||||
|
|
||||||
|
def TDC(num_domain, data_file, station, dis_type='coral'):
|
||||||
|
start_time = datetime.datetime.strptime(
|
||||||
|
'2013-03-01 00:00:00', '%Y-%m-%d %H:%M:%S')
|
||||||
|
end_time = datetime.datetime.strptime(
|
||||||
|
'2016-06-30 23:00:00', '%Y-%m-%d %H:%M:%S')
|
||||||
|
num_day = (end_time - start_time).days
|
||||||
|
split_N = 10
|
||||||
|
data = pd.read_pickle(data_file)[station]
|
||||||
|
feat = data[0][0:num_day]
|
||||||
|
feat = torch.tensor(feat, dtype=torch.float32)
|
||||||
|
feat_shape_1 = feat.shape[1]
|
||||||
|
feat = feat.reshape(-1, feat.shape[2])
|
||||||
|
feat = feat.cuda()
|
||||||
|
# num_day_new = feat.shape[0]
|
||||||
|
|
||||||
|
selected = [0, 10]
|
||||||
|
candidate = [1, 2, 3, 4, 5, 6, 7, 8, 9]
|
||||||
|
start = 0
|
||||||
|
|
||||||
|
if num_domain in [2, 3, 5, 7, 10]:
|
||||||
|
while len(selected) - 2 < num_domain - 1:
|
||||||
|
distance_list = []
|
||||||
|
for can in candidate:
|
||||||
|
selected.append(can)
|
||||||
|
selected.sort()
|
||||||
|
dis_temp = 0
|
||||||
|
for i in range(1, len(selected) - 1):
|
||||||
|
for j in range(i, len(selected) - 1):
|
||||||
|
index_part1_start = start + math.floor(selected[i - 1] / split_N * num_day) * feat_shape_1
|
||||||
|
index_part1_end = start + math.floor(selected[i] / split_N * num_day) * feat_shape_1
|
||||||
|
feat_part1 = feat[index_part1_start: index_part1_end]
|
||||||
|
index_part2_start = start + math.floor(selected[j] / split_N * num_day) * feat_shape_1
|
||||||
|
index_part2_end = start + math.floor(selected[j + 1] / split_N * num_day) * feat_shape_1
|
||||||
|
feat_part2 = feat[index_part2_start:index_part2_end]
|
||||||
|
criterion_transder = TransferLoss(loss_type=dis_type, input_dim=feat_part1.shape[1])
|
||||||
|
dis_temp += criterion_transder.compute(feat_part1, feat_part2)
|
||||||
|
distance_list.append(dis_temp)
|
||||||
|
selected.remove(can)
|
||||||
|
can_index = distance_list.index(max(distance_list))
|
||||||
|
selected.append(candidate[can_index])
|
||||||
|
candidate.remove(candidate[can_index])
|
||||||
|
selected.sort()
|
||||||
|
res = []
|
||||||
|
for i in range(1, len(selected)):
|
||||||
|
if i == 1:
|
||||||
|
sel_start_time = start_time + datetime.timedelta(days=int(num_day / split_N * selected[i - 1]), hours=0)
|
||||||
|
else:
|
||||||
|
sel_start_time = start_time + datetime.timedelta(days=int(num_day / split_N * selected[i - 1]) + 1,
|
||||||
|
hours=0)
|
||||||
|
sel_end_time = start_time + datetime.timedelta(days=int(num_day / split_N * selected[i]), hours=23)
|
||||||
|
sel_start_time = datetime.datetime.strftime(sel_start_time, '%Y-%m-%d %H:%M')
|
||||||
|
sel_end_time = datetime.datetime.strftime(sel_end_time, '%Y-%m-%d %H:%M')
|
||||||
|
res.append((sel_start_time, sel_end_time))
|
||||||
|
return res
|
||||||
|
else:
|
||||||
|
print("error in number of domain")
|
||||||
|
|
||||||
|
|
||||||
|
def load_weather_data_multi_domain(file_path, batch_size=6, station='Changping', number_domain=2, mode='pre_process',
|
||||||
|
dis_type='coral'):
|
||||||
|
# mode: 'tdc', 'pre_process'
|
||||||
|
data_file = os.path.join(file_path, "PRSA_Data_1.pkl")
|
||||||
|
mean_train, std_train = data_weather.get_weather_data_statistic(data_file, station=station,
|
||||||
|
start_time='2013-3-1 0:0',
|
||||||
|
end_time='2016-10-30 23:0')
|
||||||
|
split_time_list = get_split_time(number_domain, mode=mode, data_file=data_file, station=station, dis_type=dis_type)
|
||||||
|
train_list = []
|
||||||
|
for i in range(len(split_time_list)):
|
||||||
|
time_temp = split_time_list[i]
|
||||||
|
train_loader = data_weather.get_weather_data(data_file, station=station, start_time=time_temp[0],
|
||||||
|
end_time=time_temp[1], batch_size=batch_size, mean=mean_train,
|
||||||
|
std=std_train)
|
||||||
|
train_list.append(train_loader)
|
||||||
|
|
||||||
|
valid_vld_loader = data_weather.get_weather_data(data_file, station=station, start_time='2016-7-2 0:0',
|
||||||
|
end_time='2016-10-30 23:0', batch_size=batch_size, mean=mean_train,
|
||||||
|
std=std_train)
|
||||||
|
test_loader = data_weather.get_weather_data(data_file, station=station, start_time='2016-11-2 0:0',
|
||||||
|
end_time='2017-2-28 23:0', batch_size=batch_size, mean=mean_train,
|
||||||
|
std=std_train, shuffle=False)
|
||||||
|
return train_list, valid_vld_loader, test_loader
|
||||||
|
|
@ -0,0 +1,132 @@
|
||||||
|
import math
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
from pandas.core.frame import DataFrame
|
||||||
|
from torch.utils.data import Dataset, DataLoader
|
||||||
|
import torch
|
||||||
|
import pickle
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
|
||||||
|
class data_loader(Dataset):
|
||||||
|
def __init__(self, df_feature, df_label, df_label_reg, t=None):
|
||||||
|
|
||||||
|
assert len(df_feature) == len(df_label)
|
||||||
|
assert len(df_feature) == len(df_label_reg)
|
||||||
|
|
||||||
|
# df_feature = df_feature.reshape(df_feature.shape[0], df_feature.shape[1] // 6, df_feature.shape[2] * 6)
|
||||||
|
self.df_feature = df_feature
|
||||||
|
self.df_label = df_label
|
||||||
|
self.df_label_reg = df_label_reg
|
||||||
|
|
||||||
|
self.T = t
|
||||||
|
self.df_feature = torch.tensor(
|
||||||
|
self.df_feature, dtype=torch.float32)
|
||||||
|
self.df_label = torch.tensor(
|
||||||
|
self.df_label, dtype=torch.float32)
|
||||||
|
self.df_label_reg = torch.tensor(
|
||||||
|
self.df_label_reg, dtype=torch.float32)
|
||||||
|
|
||||||
|
def __getitem__(self, index):
|
||||||
|
sample, target, label_reg = self.df_feature[index], self.df_label[index], self.df_label_reg[index]
|
||||||
|
if self.T:
|
||||||
|
return self.T(sample), target
|
||||||
|
else:
|
||||||
|
return sample, target, label_reg
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.df_feature)
|
||||||
|
|
||||||
|
|
||||||
|
def create_dataset(df, station, start_date, end_date, mean=None, std=None):
|
||||||
|
data = df[station]
|
||||||
|
feat, label, label_reg = data[0], data[1], data[2]
|
||||||
|
referece_start_time = datetime.datetime(2013, 3, 1, 0, 0)
|
||||||
|
referece_end_time = datetime.datetime(2017, 2, 28, 0, 0)
|
||||||
|
|
||||||
|
assert (pd.to_datetime(start_date) - referece_start_time).days >= 0
|
||||||
|
assert (pd.to_datetime(end_date) - referece_end_time).days <= 0
|
||||||
|
assert (pd.to_datetime(end_date) - pd.to_datetime(start_date)).days >= 0
|
||||||
|
index_start = (pd.to_datetime(start_date) - referece_start_time).days
|
||||||
|
index_end = (pd.to_datetime(end_date) - referece_start_time).days
|
||||||
|
feat = feat[index_start: index_end + 1]
|
||||||
|
label = label[index_start: index_end + 1]
|
||||||
|
label_reg = label_reg[index_start: index_end + 1]
|
||||||
|
|
||||||
|
# ori_shape_1, ori_shape_2=feat.shape[1], feat.shape[2]
|
||||||
|
# feat=feat.reshape(-1, feat.shape[2])
|
||||||
|
# feat=(feat - mean) / std
|
||||||
|
# feat=feat.reshape(-1, ori_shape_1, ori_shape_2)
|
||||||
|
|
||||||
|
return data_loader(feat, label, label_reg)
|
||||||
|
|
||||||
|
|
||||||
|
def create_dataset_shallow(df, station, start_date, end_date, mean=None, std=None):
|
||||||
|
data = df[station]
|
||||||
|
feat, label, label_reg = data[0], data[1], data[2]
|
||||||
|
referece_start_time = datetime.datetime(2013, 3, 1, 0, 0)
|
||||||
|
referece_end_time = datetime.datetime(2017, 2, 28, 0, 0)
|
||||||
|
|
||||||
|
assert (pd.to_datetime(start_date) - referece_start_time).days >= 0
|
||||||
|
assert (pd.to_datetime(end_date) - referece_end_time).days <= 0
|
||||||
|
assert (pd.to_datetime(end_date) - pd.to_datetime(start_date)).days >= 0
|
||||||
|
index_start = (pd.to_datetime(start_date) - referece_start_time).days
|
||||||
|
index_end = (pd.to_datetime(end_date) - referece_start_time).days
|
||||||
|
feat = feat[index_start: index_end + 1]
|
||||||
|
label = label[index_start: index_end + 1]
|
||||||
|
label_reg = label_reg[index_start: index_end + 1]
|
||||||
|
|
||||||
|
# ori_shape_1, ori_shape_2=feat.shape[1], feat.shape[2]
|
||||||
|
# feat=feat.reshape(-1, feat.shape[2])
|
||||||
|
# feat=(feat - mean) / std
|
||||||
|
# feat=feat.reshape(-1, ori_shape_1, ori_shape_2)
|
||||||
|
|
||||||
|
return feat, label_reg
|
||||||
|
|
||||||
|
|
||||||
|
def get_dataset_statistic(df, station, start_date, end_date):
|
||||||
|
data = df[station]
|
||||||
|
feat, label = data[0], data[1]
|
||||||
|
referece_start_time = datetime.datetime(2013, 3, 1, 0, 0)
|
||||||
|
referece_end_time = datetime.datetime(2017, 2, 28, 0, 0)
|
||||||
|
|
||||||
|
assert (pd.to_datetime(start_date) - referece_start_time).days >= 0
|
||||||
|
assert (pd.to_datetime(end_date) - referece_end_time).days <= 0
|
||||||
|
assert (pd.to_datetime(end_date) - pd.to_datetime(start_date)).days >= 0
|
||||||
|
index_start = (pd.to_datetime(start_date) - referece_start_time).days
|
||||||
|
index_end = (pd.to_datetime(end_date) - referece_start_time).days
|
||||||
|
feat = feat[index_start: index_end + 1]
|
||||||
|
label = label[index_start: index_end + 1]
|
||||||
|
feat = feat.reshape(-1, feat.shape[2])
|
||||||
|
mu_train = np.mean(feat, axis=0)
|
||||||
|
sigma_train = np.std(feat, axis=0)
|
||||||
|
|
||||||
|
return mu_train, sigma_train
|
||||||
|
|
||||||
|
|
||||||
|
def get_weather_data(data_file, station, start_time, end_time, batch_size, shuffle=True, mean=None, std=None):
|
||||||
|
df = pd.read_pickle(data_file)
|
||||||
|
|
||||||
|
dataset = create_dataset(df, station, start_time,
|
||||||
|
end_time, mean=mean, std=std)
|
||||||
|
train_loader = DataLoader(
|
||||||
|
dataset, batch_size=batch_size, shuffle=shuffle)
|
||||||
|
return train_loader
|
||||||
|
|
||||||
|
|
||||||
|
def get_weather_data_shallow(data_file, station, start_time, end_time, batch_size, shuffle=True, mean=None, std=None):
|
||||||
|
df = pd.read_pickle(data_file)
|
||||||
|
|
||||||
|
feat, label_reg = create_dataset_shallow(df, station, start_time,
|
||||||
|
end_time, mean=mean, std=std)
|
||||||
|
|
||||||
|
return feat, label_reg
|
||||||
|
|
||||||
|
|
||||||
|
def get_weather_data_statistic(data_file, station, start_time, end_time):
|
||||||
|
df = pd.read_pickle(data_file)
|
||||||
|
mean_train, std_train = get_dataset_statistic(
|
||||||
|
df, station, start_time, end_time)
|
||||||
|
return mean_train, std_train
|
||||||
|
|
@ -0,0 +1,115 @@
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
class MMD_loss(nn.Module):
|
||||||
|
def __init__(self, kernel_type='linear', kernel_mul=2.0, kernel_num=5):
|
||||||
|
super(MMD_loss, self).__init__()
|
||||||
|
self.kernel_num = kernel_num
|
||||||
|
self.kernel_mul = kernel_mul
|
||||||
|
self.fix_sigma = None
|
||||||
|
self.kernel_type = kernel_type
|
||||||
|
|
||||||
|
def guassian_kernel(self, source, target, kernel_mul=2.0, kernel_num=5, fix_sigma=None):
|
||||||
|
n_samples = int(source.size()[0]) + int(target.size()[0])
|
||||||
|
total = torch.cat([source, target], dim=0)
|
||||||
|
total0 = total.unsqueeze(0).expand(
|
||||||
|
int(total.size(0)), int(total.size(0)), int(total.size(1)))
|
||||||
|
total1 = total.unsqueeze(1).expand(
|
||||||
|
int(total.size(0)), int(total.size(0)), int(total.size(1)))
|
||||||
|
L2_distance = ((total0-total1)**2).sum(2)
|
||||||
|
if fix_sigma:
|
||||||
|
bandwidth = fix_sigma
|
||||||
|
else:
|
||||||
|
bandwidth = torch.sum(L2_distance.data) / (n_samples**2-n_samples)
|
||||||
|
bandwidth /= kernel_mul ** (kernel_num // 2)
|
||||||
|
bandwidth_list = [bandwidth * (kernel_mul**i)
|
||||||
|
for i in range(kernel_num)]
|
||||||
|
kernel_val = [torch.exp(-L2_distance / bandwidth_temp)
|
||||||
|
for bandwidth_temp in bandwidth_list]
|
||||||
|
return sum(kernel_val)
|
||||||
|
|
||||||
|
def linear_mmd(self, X, Y):
|
||||||
|
delta = X.mean(axis=0) - Y.mean(axis=0)
|
||||||
|
loss = delta.dot(delta.T)
|
||||||
|
return loss
|
||||||
|
|
||||||
|
def forward(self, source, target):
|
||||||
|
if self.kernel_type == 'linear':
|
||||||
|
return self.linear_mmd(source, target)
|
||||||
|
elif self.kernel_type == 'rbf':
|
||||||
|
batch_size = int(source.size()[0])
|
||||||
|
kernels = self.guassian_kernel(
|
||||||
|
source, target, kernel_mul=self.kernel_mul, kernel_num=self.kernel_num, fix_sigma=self.fix_sigma)
|
||||||
|
with torch.no_grad():
|
||||||
|
XX = torch.mean(kernels[:batch_size, :batch_size])
|
||||||
|
YY = torch.mean(kernels[batch_size:, batch_size:])
|
||||||
|
XY = torch.mean(kernels[:batch_size, batch_size:])
|
||||||
|
YX = torch.mean(kernels[batch_size:, :batch_size])
|
||||||
|
loss = torch.mean(XX + YY - XY - YX)
|
||||||
|
return loss
|
||||||
|
|
||||||
|
|
||||||
|
import tensorflow as tf
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class MMDLoss(tf.keras.losses.Loss):
|
||||||
|
def __init__(self, kernel_type='linear', kernel_mul=2.0, kernel_num=5):
|
||||||
|
super(MMDLoss, self).__init__()
|
||||||
|
self.kernel_type = kernel_type
|
||||||
|
self.kernel_mul = kernel_mul
|
||||||
|
self.kernel_num = kernel_num
|
||||||
|
|
||||||
|
def guassian_kernel(self, source, target, kernel_mul=2.0, kernel_num=5, fix_sigma=None):
|
||||||
|
n_samples = int(source.shape[0]) + int(target.shape[0])
|
||||||
|
total = tf.concat([source, target], axis=0)
|
||||||
|
total0 = tf.expand_dims(total, 0)
|
||||||
|
total0 = tf.tile(total0, [total.shape[0], 1, 1])
|
||||||
|
total1 = tf.expand_dims(total, 1)
|
||||||
|
total1 = tf.tile(total1, [1, total.shape[0], 1])
|
||||||
|
L2_distance = tf.reduce_sum((total0 - total1) ** 2, axis=2)
|
||||||
|
|
||||||
|
if fix_sigma:
|
||||||
|
bandwidth = fix_sigma
|
||||||
|
else:
|
||||||
|
bandwidth = tf.reduce_sum(L2_distance) / (n_samples ** 2 - n_samples)
|
||||||
|
bandwidth /= kernel_mul ** (kernel_num // 2)
|
||||||
|
bandwidth_list = [bandwidth * (kernel_mul ** i)
|
||||||
|
for i in range(kernel_num)]
|
||||||
|
kernel_val = [tf.exp(-L2_distance / bandwidth_temp)
|
||||||
|
for bandwidth_temp in bandwidth_list]
|
||||||
|
return sum(kernel_val)
|
||||||
|
|
||||||
|
def linear_mmd(self, X, Y):
|
||||||
|
delta = tf.reduce_mean(X, axis=0) - tf.reduce_mean(Y, axis=0)
|
||||||
|
loss = tf.linalg.matmul(delta, delta, transpose_b=True)
|
||||||
|
return loss
|
||||||
|
|
||||||
|
def call(self, source, target):
|
||||||
|
if self.kernel_type == 'linear':
|
||||||
|
return self.linear_mmd(source, target)
|
||||||
|
elif self.kernel_type == 'rbf':
|
||||||
|
batch_size = int(source.shape[0])
|
||||||
|
kernels = self.guassian_kernel(
|
||||||
|
source, target, kernel_mul=self.kernel_mul, kernel_num=self.kernel_num, fix_sigma=None)
|
||||||
|
with tf.GradientTape(persistent=True) as tape:
|
||||||
|
tape.watch(kernels)
|
||||||
|
XX = tf.reduce_mean(kernels[:batch_size, :batch_size])
|
||||||
|
YY = tf.reduce_mean(kernels[batch_size:, batch_size:])
|
||||||
|
XY = tf.reduce_mean(kernels[:batch_size, batch_size:])
|
||||||
|
YX = tf.reduce_mean(kernels[batch_size:, :batch_size])
|
||||||
|
loss = XX + YY - XY - YX
|
||||||
|
return loss
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# 示例用法
|
||||||
|
source = np.random.randn(100, 128)
|
||||||
|
target = np.random.randn(100, 128)
|
||||||
|
source_tf = tf.convert_to_tensor(source, dtype=tf.float32)
|
||||||
|
target_tf = tf.convert_to_tensor(target, dtype=tf.float32)
|
||||||
|
|
||||||
|
mmd_loss = MMDLoss(kernel_type='rbf', kernel_mul=2.0, kernel_num=5)
|
||||||
|
loss = mmd_loss(source_tf, target_tf)
|
||||||
|
print("MMD Loss:", loss.numpy())
|
||||||
|
|
@ -0,0 +1,54 @@
|
||||||
|
from loss import adv_loss, CORAL, kl_js, mmd, mutual_info, cosine, pairwise_dist
|
||||||
|
|
||||||
|
|
||||||
|
class TransferLoss(object):
|
||||||
|
def __init__(self, loss_type='cosine', input_dim=512):
|
||||||
|
"""
|
||||||
|
Supported loss_type: mmd(mmd_lin), mmd_rbf, coral, cosine, kl, js, mine, adv
|
||||||
|
"""
|
||||||
|
self.loss_type = loss_type
|
||||||
|
self.input_dim = input_dim
|
||||||
|
|
||||||
|
def compute(self, X, Y):
|
||||||
|
"""Compute adaptation loss
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
X {tensor} -- source matrix
|
||||||
|
Y {tensor} -- target matrix
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
[tensor] -- transfer loss
|
||||||
|
"""
|
||||||
|
if self.loss_type == 'mmd_lin' or self.loss_type =='mmd':
|
||||||
|
mmdloss = mmd.MMD_loss(kernel_type='linear')
|
||||||
|
loss = mmdloss(X, Y)
|
||||||
|
elif self.loss_type == 'coral':
|
||||||
|
loss = CORAL(X, Y)
|
||||||
|
elif self.loss_type == 'cosine' or self.loss_type == 'cos':
|
||||||
|
loss = 1 - cosine(X, Y)
|
||||||
|
elif self.loss_type == 'kl':
|
||||||
|
loss = kl_js.kl_div(X, Y)
|
||||||
|
elif self.loss_type == 'js':
|
||||||
|
loss = kl_js.js(X, Y)
|
||||||
|
elif self.loss_type == 'mine':
|
||||||
|
mine_model = mutual_info.Mine_estimator(
|
||||||
|
input_dim=self.input_dim, hidden_dim=60).cuda()
|
||||||
|
loss = mine_model(X, Y)
|
||||||
|
elif self.loss_type == 'adv':
|
||||||
|
loss = adv_loss.adv(X, Y, input_dim=self.input_dim, hidden_dim=32)
|
||||||
|
elif self.loss_type == 'mmd_rbf':
|
||||||
|
mmdloss = mmd.MMD_loss(kernel_type='rbf')
|
||||||
|
loss = mmdloss(X, Y)
|
||||||
|
elif self.loss_type == 'pairwise':
|
||||||
|
pair_mat = pairwise_dist(X, Y)
|
||||||
|
import torch
|
||||||
|
loss = torch.norm(pair_mat)
|
||||||
|
|
||||||
|
return loss
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import torch
|
||||||
|
trans_loss = TransferLoss('adv')
|
||||||
|
a = (torch.randn(5,512) * 10).cuda()
|
||||||
|
b = (torch.randn(5,512) * 10).cuda()
|
||||||
|
print(trans_loss.compute(a, b))
|
||||||
|
|
@ -0,0 +1,463 @@
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch
|
||||||
|
import torch.optim as optim
|
||||||
|
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import datetime
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from tqdm import tqdm
|
||||||
|
from utils import utils
|
||||||
|
from base.AdaRNN import AdaRNN
|
||||||
|
|
||||||
|
import pretty_errors
|
||||||
|
import dataset.data_process as data_process
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
def pprint(*text):
|
||||||
|
# print with UTC+8 time
|
||||||
|
time = '[' + str(datetime.datetime.utcnow() +
|
||||||
|
datetime.timedelta(hours=8))[:19] + '] -'
|
||||||
|
print(time, *text, flush=True)
|
||||||
|
if args.log_file is None:
|
||||||
|
return
|
||||||
|
with open(args.log_file, 'a') as f:
|
||||||
|
print(time, *text, flush=True, file=f)
|
||||||
|
|
||||||
|
|
||||||
|
def get_model(name='AdaRNN'):
|
||||||
|
n_hiddens = [args.hidden_size for i in range(args.num_layers)]
|
||||||
|
return AdaRNN(use_bottleneck=True, bottleneck_width=64, n_input=args.d_feat, n_hiddens=n_hiddens,
|
||||||
|
n_output=args.class_num, dropout=args.dropout, model_type=name, len_seq=args.len_seq,
|
||||||
|
trans_loss=args.loss_type).cuda()
|
||||||
|
|
||||||
|
|
||||||
|
def train_AdaRNN(args, model, optimizer, train_loader_list, epoch, dist_old=None, weight_mat=None):
|
||||||
|
model.train()
|
||||||
|
criterion = nn.MSELoss()
|
||||||
|
criterion_1 = nn.L1Loss()
|
||||||
|
loss_all = []
|
||||||
|
loss_1_all = []
|
||||||
|
dist_mat = torch.zeros(args.num_layers, args.len_seq).cuda()
|
||||||
|
len_loader = np.inf
|
||||||
|
for loader in train_loader_list:
|
||||||
|
if len(loader) < len_loader:
|
||||||
|
len_loader = len(loader)
|
||||||
|
for data_all in tqdm(zip(*train_loader_list), total=len_loader):
|
||||||
|
optimizer.zero_grad()
|
||||||
|
list_feat = []
|
||||||
|
list_label = []
|
||||||
|
for data in data_all:
|
||||||
|
feature, label, label_reg = data[0].cuda().float(
|
||||||
|
), data[1].cuda().long(), data[2].cuda().float()
|
||||||
|
list_feat.append(feature)
|
||||||
|
list_label.append(label_reg)
|
||||||
|
flag = False
|
||||||
|
index = get_index(len(data_all) - 1)
|
||||||
|
for temp_index in index:
|
||||||
|
s1 = temp_index[0]
|
||||||
|
s2 = temp_index[1]
|
||||||
|
if list_feat[s1].shape[0] != list_feat[s2].shape[0]:
|
||||||
|
flag = True
|
||||||
|
break
|
||||||
|
if flag:
|
||||||
|
continue
|
||||||
|
|
||||||
|
total_loss = torch.zeros(1).cuda()
|
||||||
|
for i in range(len(index)):
|
||||||
|
feature_s = list_feat[index[i][0]]
|
||||||
|
feature_t = list_feat[index[i][1]]
|
||||||
|
label_reg_s = list_label[index[i][0]]
|
||||||
|
label_reg_t = list_label[index[i][1]]
|
||||||
|
feature_all = torch.cat((feature_s, feature_t), 0)
|
||||||
|
|
||||||
|
if epoch < args.pre_epoch:
|
||||||
|
pred_all, loss_transfer, out_weight_list = model.forward_pre_train(
|
||||||
|
feature_all, len_win=args.len_win)
|
||||||
|
else:
|
||||||
|
pred_all, loss_transfer, dist, weight_mat = model.forward_Boosting(
|
||||||
|
feature_all, weight_mat)
|
||||||
|
dist_mat = dist_mat + dist
|
||||||
|
pred_s = pred_all[0:feature_s.size(0)]
|
||||||
|
pred_t = pred_all[feature_s.size(0):]
|
||||||
|
|
||||||
|
loss_s = criterion(pred_s, label_reg_s)
|
||||||
|
loss_t = criterion(pred_t, label_reg_t)
|
||||||
|
loss_l1 = criterion_1(pred_s, label_reg_s)
|
||||||
|
|
||||||
|
total_loss = total_loss + loss_s + loss_t + args.dw * loss_transfer
|
||||||
|
loss_all.append(
|
||||||
|
[total_loss.item(), (loss_s + loss_t).item(), loss_transfer.item()])
|
||||||
|
loss_1_all.append(loss_l1.item())
|
||||||
|
optimizer.zero_grad()
|
||||||
|
total_loss.backward()
|
||||||
|
torch.nn.utils.clip_grad_value_(model.parameters(), 3.)
|
||||||
|
optimizer.step()
|
||||||
|
loss = np.array(loss_all).mean(axis=0)
|
||||||
|
loss_l1 = np.array(loss_1_all).mean()
|
||||||
|
if epoch >= args.pre_epoch:
|
||||||
|
if epoch > args.pre_epoch:
|
||||||
|
weight_mat = model.update_weight_Boosting(
|
||||||
|
weight_mat, dist_old, dist_mat)
|
||||||
|
return loss, loss_l1, weight_mat, dist_mat
|
||||||
|
else:
|
||||||
|
weight_mat = transform_type(out_weight_list)
|
||||||
|
return loss, loss_l1, weight_mat, None
|
||||||
|
|
||||||
|
|
||||||
|
def train_epoch_transfer_Boosting(model, optimizer, train_loader_list, epoch, dist_old=None, weight_mat=None):
|
||||||
|
model.train()
|
||||||
|
criterion = nn.MSELoss()
|
||||||
|
criterion_1 = nn.L1Loss()
|
||||||
|
loss_all = []
|
||||||
|
loss_1_all = []
|
||||||
|
dist_mat = torch.zeros(args.num_layers, args.len_seq).cuda()
|
||||||
|
len_loader = np.inf
|
||||||
|
for loader in train_loader_list:
|
||||||
|
if len(loader) < len_loader:
|
||||||
|
len_loader = len(loader)
|
||||||
|
for data_all in tqdm(zip(*train_loader_list), total=len_loader):
|
||||||
|
optimizer.zero_grad()
|
||||||
|
list_feat = []
|
||||||
|
list_label = []
|
||||||
|
for data in data_all:
|
||||||
|
feature, label, label_reg = data[0].cuda().float(
|
||||||
|
), data[1].cuda().long(), data[2].cuda().float()
|
||||||
|
list_feat.append(feature)
|
||||||
|
list_label.append(label_reg)
|
||||||
|
flag = False
|
||||||
|
index = get_index(len(data_all) - 1)
|
||||||
|
for temp_index in index:
|
||||||
|
s1 = temp_index[0]
|
||||||
|
s2 = temp_index[1]
|
||||||
|
if list_feat[s1].shape[0] != list_feat[s2].shape[0]:
|
||||||
|
flag = True
|
||||||
|
break
|
||||||
|
if flag:
|
||||||
|
continue
|
||||||
|
|
||||||
|
total_loss = torch.zeros(1).cuda()
|
||||||
|
for i in range(len(index)):
|
||||||
|
feature_s = list_feat[index[i][0]]
|
||||||
|
feature_t = list_feat[index[i][1]]
|
||||||
|
label_reg_s = list_label[index[i][0]]
|
||||||
|
label_reg_t = list_label[index[i][1]]
|
||||||
|
feature_all = torch.cat((feature_s, feature_t), 0)
|
||||||
|
|
||||||
|
pred_all, loss_transfer, dist, weight_mat = model.forward_Boosting(
|
||||||
|
feature_all, weight_mat)
|
||||||
|
dist_mat = dist_mat + dist
|
||||||
|
pred_s = pred_all[0:feature_s.size(0)]
|
||||||
|
pred_t = pred_all[feature_s.size(0):]
|
||||||
|
|
||||||
|
loss_s = criterion(pred_s, label_reg_s)
|
||||||
|
loss_t = criterion(pred_t, label_reg_t)
|
||||||
|
loss_l1 = criterion_1(pred_s, label_reg_s)
|
||||||
|
|
||||||
|
total_loss = total_loss + loss_s + loss_t + args.dw * loss_transfer
|
||||||
|
|
||||||
|
loss_all.append(
|
||||||
|
[total_loss.item(), (loss_s + loss_t).item(), loss_transfer.item()])
|
||||||
|
loss_1_all.append(loss_l1.item())
|
||||||
|
optimizer.zero_grad()
|
||||||
|
total_loss.backward()
|
||||||
|
torch.nn.utils.clip_grad_value_(model.parameters(), 3.)
|
||||||
|
optimizer.step()
|
||||||
|
loss = np.array(loss_all).mean(axis=0)
|
||||||
|
loss_l1 = np.array(loss_1_all).mean()
|
||||||
|
if epoch > 0: # args.pre_epoch:
|
||||||
|
weight_mat = model.update_weight_Boosting(
|
||||||
|
weight_mat, dist_old, dist_mat)
|
||||||
|
return loss, loss_l1, weight_mat, dist_mat
|
||||||
|
|
||||||
|
|
||||||
|
def get_index(num_domain=2):
|
||||||
|
index = []
|
||||||
|
for i in range(num_domain):
|
||||||
|
for j in range(i + 1, num_domain + 1):
|
||||||
|
index.append((i, j))
|
||||||
|
return index
|
||||||
|
|
||||||
|
|
||||||
|
def train_epoch_transfer(args, model, optimizer, train_loader_list):
|
||||||
|
model.train()
|
||||||
|
criterion = nn.MSELoss()
|
||||||
|
criterion_1 = nn.L1Loss()
|
||||||
|
loss_all = []
|
||||||
|
loss_1_all = []
|
||||||
|
len_loader = np.inf
|
||||||
|
for loader in train_loader_list:
|
||||||
|
if len(loader) < len_loader:
|
||||||
|
len_loader = len(loader)
|
||||||
|
|
||||||
|
for data_all in tqdm(zip(*train_loader_list), total=len_loader):
|
||||||
|
optimizer.zero_grad()
|
||||||
|
list_feat = []
|
||||||
|
list_label = []
|
||||||
|
for data in data_all:
|
||||||
|
feature, label, label_reg = data[0].cuda().float(
|
||||||
|
), data[1].cuda().long(), data[2].cuda().float()
|
||||||
|
list_feat.append(feature)
|
||||||
|
list_label.append(label_reg)
|
||||||
|
flag = False
|
||||||
|
index = get_index(len(data_all) - 1)
|
||||||
|
for temp_index in index:
|
||||||
|
s1 = temp_index[0]
|
||||||
|
s2 = temp_index[1]
|
||||||
|
if list_feat[s1].shape[0] != list_feat[s2].shape[0]:
|
||||||
|
flag = True
|
||||||
|
break
|
||||||
|
if flag:
|
||||||
|
continue
|
||||||
|
|
||||||
|
###############
|
||||||
|
total_loss = torch.zeros(1).cuda()
|
||||||
|
for i in range(len(index)):
|
||||||
|
feature_s = list_feat[index[i][0]]
|
||||||
|
feature_t = list_feat[index[i][1]]
|
||||||
|
label_reg_s = list_label[index[i][0]]
|
||||||
|
label_reg_t = list_label[index[i][1]]
|
||||||
|
feature_all = torch.cat((feature_s, feature_t), 0)
|
||||||
|
|
||||||
|
pred_all, loss_transfer, out_weight_list = model.forward_pre_train(
|
||||||
|
feature_all, len_win=args.len_win)
|
||||||
|
pred_s = pred_all[0:feature_s.size(0)]
|
||||||
|
pred_t = pred_all[feature_s.size(0):]
|
||||||
|
|
||||||
|
loss_s = criterion(pred_s, label_reg_s)
|
||||||
|
loss_t = criterion(pred_t, label_reg_t)
|
||||||
|
loss_l1 = criterion_1(pred_s, label_reg_s)
|
||||||
|
|
||||||
|
total_loss = total_loss + loss_s + loss_t + args.dw * loss_transfer
|
||||||
|
loss_all.append(
|
||||||
|
[total_loss.item(), (loss_s + loss_t).item(), loss_transfer.item()])
|
||||||
|
loss_1_all.append(loss_l1.item())
|
||||||
|
optimizer.zero_grad()
|
||||||
|
total_loss.backward()
|
||||||
|
torch.nn.utils.clip_grad_value_(model.parameters(), 3.)
|
||||||
|
optimizer.step()
|
||||||
|
loss = np.array(loss_all).mean(axis=0)
|
||||||
|
loss_l1 = np.array(loss_1_all).mean()
|
||||||
|
return loss, loss_l1, out_weight_list
|
||||||
|
|
||||||
|
|
||||||
|
def count_parameters(model):
|
||||||
|
return sum(p.numel() for p in model.parameters() if p.requires_grad)
|
||||||
|
|
||||||
|
|
||||||
|
def test_epoch(model, test_loader, prefix='Test'):
|
||||||
|
model.eval()
|
||||||
|
total_loss = 0
|
||||||
|
total_loss_1 = 0
|
||||||
|
total_loss_r = 0
|
||||||
|
correct = 0
|
||||||
|
criterion = nn.MSELoss()
|
||||||
|
criterion_1 = nn.L1Loss()
|
||||||
|
for feature, label, label_reg in tqdm(test_loader, desc=prefix, total=len(test_loader)):
|
||||||
|
feature, label_reg = feature.cuda().float(), label_reg.cuda().float()
|
||||||
|
with torch.no_grad():
|
||||||
|
pred = model.predict(feature)
|
||||||
|
loss = criterion(pred, label_reg)
|
||||||
|
loss_r = torch.sqrt(loss)
|
||||||
|
loss_1 = criterion_1(pred, label_reg)
|
||||||
|
total_loss += loss.item()
|
||||||
|
total_loss_1 += loss_1.item()
|
||||||
|
total_loss_r += loss_r.item()
|
||||||
|
loss = total_loss / len(test_loader)
|
||||||
|
loss_1 = total_loss_1 / len(test_loader)
|
||||||
|
loss_r = loss_r / len(test_loader)
|
||||||
|
return loss, loss_1, loss_r
|
||||||
|
|
||||||
|
|
||||||
|
def test_epoch_inference(model, test_loader, prefix='Test'):
|
||||||
|
model.eval()
|
||||||
|
total_loss = 0
|
||||||
|
total_loss_1 = 0
|
||||||
|
total_loss_r = 0
|
||||||
|
correct = 0
|
||||||
|
criterion = nn.MSELoss()
|
||||||
|
criterion_1 = nn.L1Loss()
|
||||||
|
i = 0
|
||||||
|
for feature, label, label_reg in tqdm(test_loader, desc=prefix, total=len(test_loader)):
|
||||||
|
feature, label_reg = feature.cuda().float(), label_reg.cuda().float()
|
||||||
|
with torch.no_grad():
|
||||||
|
pred = model.predict(feature)
|
||||||
|
loss = criterion(pred, label_reg)
|
||||||
|
loss_r = torch.sqrt(loss)
|
||||||
|
loss_1 = criterion_1(pred, label_reg)
|
||||||
|
total_loss += loss.item()
|
||||||
|
total_loss_1 += loss_1.item()
|
||||||
|
total_loss_r += loss_r.item()
|
||||||
|
if i == 0:
|
||||||
|
label_list = label_reg.cpu().numpy()
|
||||||
|
predict_list = pred.cpu().numpy()
|
||||||
|
else:
|
||||||
|
label_list = np.hstack((label_list, label_reg.cpu().numpy()))
|
||||||
|
predict_list = np.hstack((predict_list, pred.cpu().numpy()))
|
||||||
|
|
||||||
|
i = i + 1
|
||||||
|
loss = total_loss / len(test_loader)
|
||||||
|
loss_1 = total_loss_1 / len(test_loader)
|
||||||
|
loss_r = total_loss_r / len(test_loader)
|
||||||
|
return loss, loss_1, loss_r, label_list, predict_list
|
||||||
|
|
||||||
|
|
||||||
|
def inference(model, data_loader):
|
||||||
|
loss, loss_1, loss_r, label_list, predict_list = test_epoch_inference(
|
||||||
|
model, data_loader, prefix='Inference')
|
||||||
|
return loss, loss_1, loss_r, label_list, predict_list
|
||||||
|
|
||||||
|
|
||||||
|
def inference_all(output_path, model, model_path, loaders):
|
||||||
|
pprint('inference...')
|
||||||
|
loss_list = []
|
||||||
|
loss_l1_list = []
|
||||||
|
loss_r_list = []
|
||||||
|
model.load_state_dict(torch.load(model_path))
|
||||||
|
i = 0
|
||||||
|
list_name = ['train', 'valid', 'test']
|
||||||
|
for loader in loaders:
|
||||||
|
loss, loss_1, loss_r, label_list, predict_list = inference(
|
||||||
|
model, loader)
|
||||||
|
loss_list.append(loss)
|
||||||
|
loss_l1_list.append(loss_1)
|
||||||
|
loss_r_list.append(loss_r)
|
||||||
|
i = i + 1
|
||||||
|
return loss_list, loss_l1_list, loss_r_list
|
||||||
|
|
||||||
|
|
||||||
|
def transform_type(init_weight):
|
||||||
|
weight = torch.ones(args.num_layers, args.len_seq).cuda()
|
||||||
|
for i in range(args.num_layers):
|
||||||
|
for j in range(args.len_seq):
|
||||||
|
weight[i, j] = init_weight[i][j].item()
|
||||||
|
return weight
|
||||||
|
|
||||||
|
|
||||||
|
def main_transfer(args):
|
||||||
|
print(args)
|
||||||
|
|
||||||
|
output_path = args.outdir + '_' + args.station + '_' + args.model_name + '_weather_' + \
|
||||||
|
args.loss_type + '_' + str(args.pre_epoch) + \
|
||||||
|
'_' + str(args.dw) + '_' + str(args.lr)
|
||||||
|
save_model_name = args.model_name + '_' + args.loss_type + \
|
||||||
|
'_' + str(args.dw) + '_' + str(args.lr) + '.pkl'
|
||||||
|
utils.dir_exist(output_path)
|
||||||
|
pprint('create loaders...')
|
||||||
|
|
||||||
|
train_loader_list, valid_loader, test_loader = data_process.load_weather_data_multi_domain(
|
||||||
|
args.data_path, args.batch_size, args.station, args.num_domain, args.data_mode)
|
||||||
|
|
||||||
|
args.log_file = os.path.join(output_path, 'run.log')
|
||||||
|
pprint('create model...')
|
||||||
|
model = get_model(args.model_name)
|
||||||
|
num_model = count_parameters(model)
|
||||||
|
print('#model params:', num_model)
|
||||||
|
|
||||||
|
optimizer = optim.Adam(model.parameters(), lr=args.lr)
|
||||||
|
|
||||||
|
best_score = np.inf
|
||||||
|
best_epoch, stop_round = 0, 0
|
||||||
|
weight_mat, dist_mat = None, None
|
||||||
|
|
||||||
|
for epoch in range(args.n_epochs):
|
||||||
|
pprint('Epoch:', epoch)
|
||||||
|
pprint('training...')
|
||||||
|
if args.model_name in ['Boosting']:
|
||||||
|
loss, loss1, weight_mat, dist_mat = train_epoch_transfer_Boosting(
|
||||||
|
model, optimizer, train_loader_list, epoch, dist_mat, weight_mat)
|
||||||
|
elif args.model_name in ['AdaRNN']:
|
||||||
|
loss, loss1, weight_mat, dist_mat = train_AdaRNN(
|
||||||
|
args, model, optimizer, train_loader_list, epoch, dist_mat, weight_mat)
|
||||||
|
else:
|
||||||
|
print("error in model_name!")
|
||||||
|
pprint(loss, loss1)
|
||||||
|
|
||||||
|
pprint('evaluating...')
|
||||||
|
train_loss, train_loss_l1, train_loss_r = test_epoch(
|
||||||
|
model, train_loader_list[0], prefix='Train')
|
||||||
|
val_loss, val_loss_l1, val_loss_r = test_epoch(
|
||||||
|
model, valid_loader, prefix='Valid')
|
||||||
|
test_loss, test_loss_l1, test_loss_r = test_epoch(
|
||||||
|
model, test_loader, prefix='Test')
|
||||||
|
|
||||||
|
pprint('valid %.6f, test %.6f' %
|
||||||
|
(val_loss_l1, test_loss_l1))
|
||||||
|
|
||||||
|
if val_loss < best_score:
|
||||||
|
best_score = val_loss
|
||||||
|
stop_round = 0
|
||||||
|
best_epoch = epoch
|
||||||
|
torch.save(model.state_dict(), os.path.join(
|
||||||
|
output_path, save_model_name))
|
||||||
|
else:
|
||||||
|
stop_round += 1
|
||||||
|
if stop_round >= args.early_stop:
|
||||||
|
pprint('early stop')
|
||||||
|
break
|
||||||
|
|
||||||
|
pprint('best val score:', best_score, '@', best_epoch)
|
||||||
|
|
||||||
|
loaders = train_loader_list[0], valid_loader, test_loader
|
||||||
|
loss_list, loss_l1_list, loss_r_list = inference_all(output_path, model, os.path.join(
|
||||||
|
output_path, save_model_name), loaders)
|
||||||
|
pprint('MSE: train %.6f, valid %.6f, test %.6f' %
|
||||||
|
(loss_list[0], loss_list[1], loss_list[2]))
|
||||||
|
pprint('L1: train %.6f, valid %.6f, test %.6f' %
|
||||||
|
(loss_l1_list[0], loss_l1_list[1], loss_l1_list[2]))
|
||||||
|
pprint('RMSE: train %.6f, valid %.6f, test %.6f' %
|
||||||
|
(loss_r_list[0], loss_r_list[1], loss_r_list[2]))
|
||||||
|
pprint('Finished.')
|
||||||
|
|
||||||
|
|
||||||
|
def get_args():
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
|
# model
|
||||||
|
parser.add_argument('--model_name', default='AdaRNN')
|
||||||
|
parser.add_argument('--d_feat', type=int, default=6)
|
||||||
|
|
||||||
|
parser.add_argument('--hidden_size', type=int, default=64)
|
||||||
|
parser.add_argument('--num_layers', type=int, default=2)
|
||||||
|
parser.add_argument('--dropout', type=float, default=0.0)
|
||||||
|
parser.add_argument('--class_num', type=int, default=1)
|
||||||
|
parser.add_argument('--pre_epoch', type=int, default=40) # 20, 30, 50
|
||||||
|
|
||||||
|
# training
|
||||||
|
parser.add_argument('--n_epochs', type=int, default=200)
|
||||||
|
parser.add_argument('--lr', type=float, default=5e-4)
|
||||||
|
parser.add_argument('--early_stop', type=int, default=40)
|
||||||
|
parser.add_argument('--smooth_steps', type=int, default=5)
|
||||||
|
parser.add_argument('--batch_size', type=int, default=36)
|
||||||
|
parser.add_argument('--dw', type=float, default=0.5) # 0.01, 0.05, 5.0
|
||||||
|
parser.add_argument('--loss_type', type=str, default='adv')
|
||||||
|
parser.add_argument('--station', type=str, default='Dongsi')
|
||||||
|
parser.add_argument('--data_mode', type=str,
|
||||||
|
default='tdc')
|
||||||
|
parser.add_argument('--num_domain', type=int, default=2)
|
||||||
|
parser.add_argument('--len_seq', type=int, default=24)
|
||||||
|
|
||||||
|
# other
|
||||||
|
parser.add_argument('--seed', type=int, default=10)
|
||||||
|
parser.add_argument('--data_path', default="/root/Messi_du/adarnn/")
|
||||||
|
parser.add_argument('--outdir', default='./outputs')
|
||||||
|
parser.add_argument('--overwrite', action='store_true')
|
||||||
|
parser.add_argument('--log_file', type=str, default='run.log')
|
||||||
|
parser.add_argument('--gpu_id', type=int, default=0)
|
||||||
|
parser.add_argument('--len_win', type=int, default=0)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = get_args()
|
||||||
|
np.random.seed(args.seed)
|
||||||
|
torch.manual_seed(args.seed)
|
||||||
|
torch.cuda.manual_seed_all(args.seed)
|
||||||
|
torch.backends.cudnn.deterministic = True
|
||||||
|
torch.backends.cudnn.benchmark = False
|
||||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu_id)
|
||||||
|
main_transfer(args)
|
||||||
Loading…
Reference in New Issue