leecode更新

This commit is contained in:
markilue 2022-10-12 14:18:02 +08:00
parent d8f403cc14
commit f888fdcc78
12 changed files with 1747 additions and 0 deletions

View File

@ -0,0 +1,178 @@
package com.markilue.leecode.backtrace;
import org.junit.Test;
import java.util.ArrayList;
import java.util.List;
/**
* @BelongsProject: Leecode
* @BelongsPackage: com.markilue.leecode.backtrace
* @Author: markilue
* @CreateTime: 2022-10-12 11:23
* @Description:
* TODO 力扣77题 组合
* 给定两个整数 n k返回范围 [1, n] 中所有可能的 k 个数的组合
* 你可以按 任何顺序 返回答案
* @Version: 1.0
*/
public class Combine {
@Test
public void test(){
int n = 4, k = 2;
List<List<Integer>> combine = combine2(4, 2);
System.out.println(combine);
}
@Test
public void test1(){
int n = 4, k = 2;
List<List<Integer>> combine = combine(1, 1);
System.out.println(combine);
}
@Test
public void test2(){
int n = 4, k = 2;
List<List<Integer>> combine = combine(6, 3);
System.out.println(combine);
}
/**
* 由于k不确定因此使用for循环解决显然不现实这里考虑使用递归法
* 速度击败45.7%内存击败41.95%
* @param n
* @param k
* @return
*/
public List<List<Integer>> combine(int n, int k) {
for (int i = 1; i <= n; i++) {
list(i,n,k);
cur.remove(cur.size()-1);
}
return result;
}
List<List<Integer>> result=new ArrayList<>();
List<Integer> cur=new ArrayList<>();
/**
* 传递还需要多少个数k,和可选的范围
* @param val 当前可以传的值
* @param k 还需要多少个数
*/
public void list(int val,int n,int k){
cur.add(val);
if(k==1){
ArrayList<Integer> list1 = new ArrayList<>();
list1.addAll(cur);
result.add(list1);
return;
}
for (int i = val+1; i <= n; i++) {
list(i,n,k-1);
cur.remove(cur.size()-1);
}
}
/**
* 对照模板的写法
*
* @param n
* @param k
* @return
*/
public List<List<Integer>> combine1(int n, int k) {
result.clear();
cur.clear();
backtracking(n,k,1);
return result;
}
public void backtracking(int n,int k,int val){
if(k==cur.size()){
ArrayList<Integer> list1 = new ArrayList<>();
list1.addAll(cur);
result.add(list1);
return;
}
//这里使用n-(k-cur.size())+1剪枝即第一层最多可以取到n-(k-cur.size())+1因为比如 5个数取3个数第一层最多到3即3 4 5
for (int i = val; i <= n-(k-cur.size())+1; i++) {
cur.add(i);
backtracking(n,k,i+1);
cur.remove(cur.size()-1);
}
}
/**
* 非递归法(字典序法)核心是利用两个规则变幻出二进制的各种组合从而遍历出所有的可能
* 规则一
* xx 的最低位为 11这种情况下如果末尾由 tt 个连续的 11我们直接将倒数第 tt 位的 11 和倒数第 t + 1t+1 位的 00 替换
* 就可以得到 {next}(x)next(x) 0011 -> 01010101 -> 01101001 -> 10101001111 -> 1010111
*
* 规则二
* xx 的最低位为 00这种情况下末尾有 tt 个连续的 00而这 tt 个连续的 00 之前有 mm 个连续的 11
* 我们可以将倒数第 t + mt+m 位置的 11 和倒数第 t + m + 1t+m+1 位的 00 对换
* 然后把倒数第 t + 1t+1 位到倒数第 t + m - 1t+m1 位的 11 移动到最低位
* 0110 -> 10011010 -> 11001011100 -> 1100011
*
* 具体参考leecode笔记上的例子变换
*
* @param k
* @return
*/
public List<List<Integer>> combine2(int n, int k) {
List<Integer> temp = new ArrayList<Integer>();
List<List<Integer>> ans = new ArrayList<List<Integer>>();
// 初始化
// temp [0, k - 1] 每个位置 i 设置为 i + 1 [0, k - 1] [1, k]
// 末尾加一位 n + 1 作为哨兵
for (int i = 1; i <= k; ++i) {
temp.add(i);
}
temp.add(n + 1);
int j = 0;
while (j < k) {
ans.add(new ArrayList<Integer>(temp.subList(0, k)));
j = 0;
// 寻找第一个 temp[j] + 1 != temp[j + 1] 的位置 t
// 我们需要把 [0, t - 1] 区间内的每个位置重置成 [1, t]
while (j < k && temp.get(j) + 1 == temp.get(j + 1)) {
temp.set(j, j + 1);
++j;
}
// j 是第一个 temp[j] + 1 != temp[j + 1] 的位置
temp.set(j, temp.get(j) + 1);
}
return ans;
}
}

View File

@ -0,0 +1,437 @@
# -*- coding: utf-8 -*-
# coding: utf-8
'''
@Author : dingjiawen
@Date : 2022/10/11 18:53
@Usage :
@Desc :
'''
# -*- coding: utf-8 -*-
# coding: utf-8
'''
@Author : dingjiawen
@Date : 2022/10/11 18:52
@Usage : 对比实验与JointNet相同深度,进行预测
@Desc :
'''
# -*- coding: utf-8 -*-
# coding: utf-8
import tensorflow as tf
import tensorflow.keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from model.DepthwiseCon1D.DepthwiseConv1D import DepthwiseConv1D
from model.Dynamic_channelAttention.Dynamic_channelAttention import DynamicChannelAttention
from condition_monitoring.data_deal import loadData
from model.Joint_Monitoring.Joint_Monitoring3 import Joint_Monitoring
from model.CommonFunction.CommonFunction import *
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model, save_model
from keras.callbacks import EarlyStopping
'''超参数设置'''
time_stamp = 120
feature_num = 10
batch_size = 16
learning_rate = 0.001
EPOCH = 101
model_name = "DCConv"
'''EWMA超参数'''
K = 18
namuda = 0.01
'''保存名称'''
save_name = "./model/{0}_timestamp{1}_feature{2}.h5".format(model_name,
time_stamp,
feature_num,
batch_size,
EPOCH)
save_step_two_name = "../hard_model/two_weight/{0}_timestamp{1}_feature{2}_weight_epoch14/weight".format(model_name,
time_stamp,
feature_num,
batch_size,
EPOCH)
# save_name = "../model/joint/{0}_timestamp{1}_feature{2}.h5".format(model_name,
# time_stamp,
# feature_num,
# batch_size,
# EPOCH)
# save_step_two_name = "../model/joint_two/{0}_timestamp{1}_feature{2}.h5".format(model_name,
# time_stamp,
# feature_num,
# batch_size,
# EPOCH)
'''文件名'''
file_name = "G:\data\SCADA数据\jb4q_8_delete_total_zero.csv"
'''
文件说明jb4q_8_delete_total_zero.csv是删除了只删除了全是0的列的文件
文件从0:415548行均是正常值(2019/7.30 00:00:00 - 2019/9/18 11:14:00)
从415549:432153行均是异常值(2019/9/18 11:21:01 - 2021/1/18 00:00:00)
'''
'''文件参数'''
# 最后正常的时间点
healthy_date = 415548
# 最后异常的时间点
unhealthy_date = 432153
# 异常容忍程度
unhealthy_patience = 5
def remove(data, time_stamp=time_stamp):
rows, cols = data.shape
print("remove_data.shape:", data.shape)
num = int(rows / time_stamp)
return data[:num * time_stamp, :]
pass
# 不重叠采样
def get_training_data(data, time_stamp: int = time_stamp):
removed_data = remove(data=data)
rows, cols = removed_data.shape
print("removed_data.shape:", data.shape)
print("removed_data:", removed_data)
train_data = np.reshape(removed_data, [-1, time_stamp, cols])
print("train_data:", train_data)
batchs, time_stamp, cols = train_data.shape
for i in range(1, batchs):
each_label = np.expand_dims(train_data[i, 0, :], axis=0)
if i == 1:
train_label = each_label
else:
train_label = np.concatenate([train_label, each_label], axis=0)
print("train_data.shape:", train_data.shape)
print("train_label.shape", train_label.shape)
return train_data[:-1, :], train_label
# 重叠采样
def get_training_data_overlapping(data, time_stamp: int = time_stamp, is_Healthy: bool = True):
rows, cols = data.shape
train_data = np.empty(shape=[rows - time_stamp - 1, time_stamp, cols])
train_label = np.empty(shape=[rows - time_stamp - 1, cols])
for i in range(rows):
if i + time_stamp >= rows:
break
if i + time_stamp < rows - 1:
train_data[i] = data[i:i + time_stamp]
train_label[i] = data[i + time_stamp]
print("重叠采样以后:")
print("data:", train_data) # (300334,120,10)
print("label:", train_label) # (300334,10)
if is_Healthy:
train_label2 = np.ones(shape=[train_label.shape[0]])
else:
train_label2 = np.zeros(shape=[train_label.shape[0]])
print("label2:", train_label2)
return train_data, train_label, train_label2
# 归一化
def normalization(data):
rows, cols = data.shape
print("归一化之前:", data)
print(data.shape)
print("======================")
# 归一化
max = np.max(data, axis=0)
max = np.broadcast_to(max, [rows, cols])
min = np.min(data, axis=0)
min = np.broadcast_to(min, [rows, cols])
data = (data - min) / (max - min)
print("归一化之后:", data)
print(data.shape)
return data
# 正则化
def Regularization(data):
rows, cols = data.shape
print("正则化之前:", data)
print(data.shape)
print("======================")
# 正则化
mean = np.mean(data, axis=0)
mean = np.broadcast_to(mean, shape=[rows, cols])
dst = np.sqrt(np.var(data, axis=0))
dst = np.broadcast_to(dst, shape=[rows, cols])
data = (data - mean) / dst
print("正则化之后:", data)
print(data.shape)
return data
pass
def EWMA(data, K=K, namuda=namuda):
# t是啥暂时未知
t = 0
mid = np.mean(data, axis=0)
standard = np.sqrt(np.var(data, axis=0))
UCL = mid + K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
LCL = mid - K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
return mid, UCL, LCL
pass
def get_MSE(data, label, new_model):
predicted_data = new_model.predict(data)
temp = np.abs(predicted_data - label)
temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape))
temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape)
temp3 = temp1 / temp2
mse = np.sum((temp1 / temp2) ** 2, axis=1)
print("z:", mse)
print(mse.shape)
# mse=np.mean((predicted_data-label)**2,axis=1)
print("mse", mse)
dims, = mse.shape
mean = np.mean(mse)
std = np.sqrt(np.var(mse))
max = mean + 3 * std
# min = mean-3*std
max = np.broadcast_to(max, shape=[dims, ])
# min = np.broadcast_to(min,shape=[dims,])
mean = np.broadcast_to(mean, shape=[dims, ])
# plt.plot(max)
# plt.plot(mse)
# plt.plot(mean)
# # plt.plot(min)
# plt.show()
#
#
return mse, mean, max
# pass
def condition_monitoring_model():
input = tf.keras.Input(shape=[time_stamp, feature_num])
conv1 = tf.keras.layers.Conv1D(filters=256, kernel_size=1)(input)
GRU1 = tf.keras.layers.GRU(128, return_sequences=False)(conv1)
d1 = tf.keras.layers.Dense(300)(GRU1)
output = tf.keras.layers.Dense(10)(d1)
model = tf.keras.Model(inputs=input, outputs=output)
return model
# trian_data:(300455,120,10)
# trian_label1:(300455,10)
# trian_label2:(300455,)
def shuffle(train_data, train_label1, train_label2, is_split: bool = False, split_size: float = 0.2):
(train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(train_data,
train_label1,
train_label2,
test_size=split_size,
shuffle=True,
random_state=100)
if is_split:
return train_data, train_label1, train_label2, test_data, test_label1, test_label2
train_data = np.concatenate([train_data, test_data], axis=0)
train_label1 = np.concatenate([train_label1, test_label1], axis=0)
train_label2 = np.concatenate([train_label2, test_label2], axis=0)
# print(train_data.shape)
# print(train_label1.shape)
# print(train_label2.shape)
# print(train_data.shape)
return train_data, train_label1, train_label2
pass
def split_test_data(healthy_data, healthy_label1, healthy_label2, unhealthy_data, unhealthy_label1, unhealthy_label2,
split_size: float = 0.2, shuffle: bool = True):
data = np.concatenate([healthy_data, unhealthy_data], axis=0)
label1 = np.concatenate([healthy_label1, unhealthy_label1], axis=0)
label2 = np.concatenate([healthy_label2, unhealthy_label2], axis=0)
(train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(data,
label1,
label2,
test_size=split_size,
shuffle=shuffle,
random_state=100)
# print(train_data.shape)
# print(train_label1.shape)
# print(train_label2.shape)
# print(train_data.shape)
return train_data, train_label1, train_label2, test_data, test_label1, test_label2
pass
def test(step_one_model, step_two_model, test_data, test_label1, test_label2):
history_loss = []
history_val_loss = []
val_loss, val_accuracy = step_two_model.get_val_loss(val_data=test_data, val_label1=test_label1,
val_label2=test_label2,
is_first_time=False, step_one_model=step_one_model)
history_val_loss.append(val_loss)
print("val_accuracy:", val_accuracy)
print("val_loss:", val_loss)
def showResult(step_two_model: Joint_Monitoring, test_data, isPlot: bool = False):
# 获取模型的所有参数的个数
# step_two_model.count_params()
total_result = []
size, length, dims = test_data.shape
for epoch in range(0, size - batch_size + 1, batch_size):
each_test_data = test_data[epoch:epoch + batch_size, :, :]
_, _, _, output4 = step_two_model.call(each_test_data, is_first_time=False)
total_result.append(output4)
total_result = np.reshape(total_result, [total_result.__len__(), -1])
total_result = np.reshape(total_result, [-1, ])
if isPlot:
plt.scatter(list(range(total_result.shape[0])), total_result, c='black', s=10)
# 画出 y=1 这条水平线
plt.axhline(0.5, c='red', label='Failure threshold')
# 箭头指向上面的水平线
# plt.arrow(35000, 0.9, 33000, 0.75, head_width=0.02, head_length=0.1, shape="full", fc='red', ec='red',
# alpha=0.9, overhang=0.5)
# plt.text(35000, 0.9, "Truth Fault", fontsize=10, color='black', verticalalignment='top')
plt.axvline(test_data.shape[0] * 2 / 3, c='blue', ls='-.')
plt.xlabel("time")
plt.ylabel("confience")
plt.text(total_result.shape[0] * 4 / 5, 0.6, "Fault", fontsize=10, color='black', verticalalignment='top',
horizontalalignment='center',
bbox={'facecolor': 'grey',
'pad': 10})
plt.text(total_result.shape[0] * 1 / 3, 0.4, "Norm", fontsize=10, color='black', verticalalignment='top',
horizontalalignment='center',
bbox={'facecolor': 'grey',
'pad': 10})
plt.grid()
# plt.ylim(0, 1)
# plt.xlim(-50, 1300)
# plt.legend("", loc='upper left')
plt.show()
return total_result
def GRU_Model():
input = tf.keras.Input(shape=[time_stamp, feature_num])
input = tf.cast(input, tf.float32)
LSTM = tf.keras.layers.Conv1D(10, 3, padding="causal",dilation_rate=2)(input)
LSTM = tf.keras.layers.Conv1D(20, 3, padding="causal",dilation_rate=4)(LSTM)
LSTM = tf.keras.layers.Conv1D(40, 3, padding="causal",dilation_rate=8)(LSTM)
LSTM = tf.keras.layers.Conv1D(40, 3, padding="causal",dilation_rate=16)(LSTM)
LSTM = tf.keras.layers.Conv1D(40, 3, padding="causal",dilation_rate=32)(LSTM)
LSTM = tf.keras.layers.Conv1D(40, 3, padding="causal",dilation_rate=64)(LSTM)
LSTM = tf.keras.layers.Conv1D(40, 3, padding="causal",dilation_rate=128)(LSTM)
# LSTM = tf.keras.layers.Conv1D(40, 3, padding="causal",dilation_rate=2)(LSTM)
# bn = tf.keras.layers.BatchNormalization()(LSTM)
d1 = tf.keras.layers.Dense(20)(LSTM)
# bn = tf.keras.layers.BatchNormalization()(d1)
output = tf.keras.layers.Dense(10, name='output')(d1)
model = tf.keras.Model(inputs=input, outputs=output)
return model
pass
if __name__ == '__main__':
total_data = loadData.execute(N=feature_num, file_name=file_name)
total_data = normalization(data=total_data)
train_data_healthy, train_label1_healthy, train_label2_healthy = get_training_data_overlapping(
total_data[:healthy_date, :], is_Healthy=True)
train_data_unhealthy, train_label1_unhealthy, train_label2_unhealthy = get_training_data_overlapping(
total_data[healthy_date - time_stamp + unhealthy_patience:unhealthy_date, :],
is_Healthy=False)
#### TODO 第一步训练
# 单次测试
model = GRU_Model()
model.compile(optimizer=tf.optimizers.Adam(0.01), loss=tf.losses.mse)
model.summary()
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=3, mode='min', verbose=1)
checkpoint = tf.keras.callbacks.ModelCheckpoint(
filepath=save_name,
monitor='val_loss',
verbose=1,
save_best_only=True,
mode='min',
period=1)
history = model.fit(train_data_healthy[:30000, :, :], train_label1_healthy[:30000, :], epochs=20,
batch_size=32, validation_split=0.2, shuffle=False, verbose=1,
callbacks=[checkpoint, early_stop])
model.save(save_name)
## TODO testing
test_data, test_label = get_training_data(total_data[:300455, :])
newModel = tf.keras.models.load_model(save_name)
mse, mean, max = get_MSE(test_data, test_label, new_model=newModel)
test_data, test_label = get_training_data(total_data[20000:, :])
predicted_data = newModel.predict(test_data)
rows, cols = predicted_data.shape
temp = np.abs(predicted_data - test_label)
temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape))
temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape)
temp3 = temp1 / temp2
mse = np.sum((temp1 / temp2) ** 2, axis=1)
plt.plot(mse)
plt.plot(mean)
plt.plot(max)
plt.show()
data = pd.DataFrame(mse).ewm(span=3).mean()
print(data)
data = np.array(data)
index, _ = data.shape
for i in range(2396):
if data[i, 0] > 5:
data[i, 0] = data[i - 1, :]
print(data)
mean = data[2000:2396, :].mean()
std = data[2000:2396, :].std()
mean = np.broadcast_to(mean, shape=[500, ])
std = np.broadcast_to(std, shape=[500, ])
plt.plot(data[2000:2396, :])
plt.plot(mean)
plt.plot(mean + 3 * std)
plt.plot(mean - 3 * std)
plt.show()
pass

View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
# coding: utf-8
'''
@Author : dingjiawen
@Date : 2022/10/11 18:52
@Usage : 对比实验与JointNet相同深度,进行预测
@Desc :
'''

View File

@ -0,0 +1,473 @@
# -*- coding: utf-8 -*-
# coding: utf-8
'''
@Author : dingjiawen
@Date : 2022/10/11 18:52
@Usage : 对比实验与JointNet相同深度,进行预测
@Desc :
'''
import tensorflow as tf
import tensorflow.keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from model.DepthwiseCon1D.DepthwiseConv1D import DepthwiseConv1D
from model.Dynamic_channelAttention.Dynamic_channelAttention import DynamicChannelAttention
from condition_monitoring.data_deal import loadData
from model.Joint_Monitoring.Joint_Monitoring3 import Joint_Monitoring
from model.CommonFunction.CommonFunction import *
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model, save_model
from keras.callbacks import EarlyStopping
'''超参数设置'''
time_stamp = 120
feature_num = 10
batch_size = 16
learning_rate = 0.001
EPOCH = 101
model_name = "GRU"
'''EWMA超参数'''
K = 18
namuda = 0.01
'''保存名称'''
save_name = "./model/{0}_timestamp{1}_feature{2}.h5".format(model_name,
time_stamp,
feature_num,
batch_size,
EPOCH)
save_step_two_name = "../hard_model/two_weight/{0}_timestamp{1}_feature{2}_weight_epoch14/weight".format(model_name,
time_stamp,
feature_num,
batch_size,
EPOCH)
# save_name = "../model/joint/{0}_timestamp{1}_feature{2}.h5".format(model_name,
# time_stamp,
# feature_num,
# batch_size,
# EPOCH)
# save_step_two_name = "../model/joint_two/{0}_timestamp{1}_feature{2}.h5".format(model_name,
# time_stamp,
# feature_num,
# batch_size,
# EPOCH)
'''文件名'''
file_name = "G:\data\SCADA数据\jb4q_8_delete_total_zero.csv"
'''
文件说明jb4q_8_delete_total_zero.csv是删除了只删除了全是0的列的文件
文件从0:415548行均是正常值(2019/7.30 00:00:00 - 2019/9/18 11:14:00)
从415549:432153行均是异常值(2019/9/18 11:21:01 - 2021/1/18 00:00:00)
'''
'''文件参数'''
# 最后正常的时间点
healthy_date = 415548
# 最后异常的时间点
unhealthy_date = 432153
# 异常容忍程度
unhealthy_patience = 5
def remove(data, time_stamp=time_stamp):
rows, cols = data.shape
print("remove_data.shape:", data.shape)
num = int(rows / time_stamp)
return data[:num * time_stamp, :]
pass
# 不重叠采样
def get_training_data(data, time_stamp: int = time_stamp):
removed_data = remove(data=data)
rows, cols = removed_data.shape
print("removed_data.shape:", data.shape)
print("removed_data:", removed_data)
train_data = np.reshape(removed_data, [-1, time_stamp, cols])
print("train_data:", train_data)
batchs, time_stamp, cols = train_data.shape
for i in range(1, batchs):
each_label = np.expand_dims(train_data[i, 0, :], axis=0)
if i == 1:
train_label = each_label
else:
train_label = np.concatenate([train_label, each_label], axis=0)
print("train_data.shape:", train_data.shape)
print("train_label.shape", train_label.shape)
return train_data[:-1, :], train_label
# 重叠采样
def get_training_data_overlapping(data, time_stamp: int = time_stamp, is_Healthy: bool = True):
rows, cols = data.shape
train_data = np.empty(shape=[rows - time_stamp - 1, time_stamp, cols])
train_label = np.empty(shape=[rows - time_stamp - 1, cols])
for i in range(rows):
if i + time_stamp >= rows:
break
if i + time_stamp < rows - 1:
train_data[i] = data[i:i + time_stamp]
train_label[i] = data[i + time_stamp]
print("重叠采样以后:")
print("data:", train_data) # (300334,120,10)
print("label:", train_label) # (300334,10)
if is_Healthy:
train_label2 = np.ones(shape=[train_label.shape[0]])
else:
train_label2 = np.zeros(shape=[train_label.shape[0]])
print("label2:", train_label2)
return train_data, train_label, train_label2
# 归一化
def normalization(data):
rows, cols = data.shape
print("归一化之前:", data)
print(data.shape)
print("======================")
# 归一化
max = np.max(data, axis=0)
max = np.broadcast_to(max, [rows, cols])
min = np.min(data, axis=0)
min = np.broadcast_to(min, [rows, cols])
data = (data - min) / (max - min)
print("归一化之后:", data)
print(data.shape)
return data
# 正则化
def Regularization(data):
rows, cols = data.shape
print("正则化之前:", data)
print(data.shape)
print("======================")
# 正则化
mean = np.mean(data, axis=0)
mean = np.broadcast_to(mean, shape=[rows, cols])
dst = np.sqrt(np.var(data, axis=0))
dst = np.broadcast_to(dst, shape=[rows, cols])
data = (data - mean) / dst
print("正则化之后:", data)
print(data.shape)
return data
pass
def EWMA(data, K=K, namuda=namuda):
# t是啥暂时未知
t = 0
mid = np.mean(data, axis=0)
standard = np.sqrt(np.var(data, axis=0))
UCL = mid + K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
LCL = mid - K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
return mid, UCL, LCL
pass
def get_MSE(data, label, new_model, isStandard: bool = True, isPlot: bool = True):
predicted_data = new_model.predict(data)
temp = np.abs(predicted_data - label)
temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape))
temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape)
temp3 = temp1 / temp2
mse = np.sum((temp1 / temp2) ** 2, axis=1)
print("z:", mse)
print(mse.shape)
# mse=np.mean((predicted_data-label)**2,axis=1)
print("mse", mse)
if isStandard:
dims, = mse.shape
mean = np.mean(mse)
std = np.sqrt(np.var(mse))
max = mean + 3 * std
print("max:", max)
# min = mean-3*std
max = np.broadcast_to(max, shape=[dims, ])
# min = np.broadcast_to(min,shape=[dims,])
mean = np.broadcast_to(mean, shape=[dims, ])
if isPlot:
plt.plot(max)
plt.plot(mse)
plt.plot(mean)
# plt.plot(min)
plt.show()
else:
if isPlot:
plt.plot(mse)
# plt.plot(min)
plt.show()
return mse
return mse, mean, max
# pass
def condition_monitoring_model():
input = tf.keras.Input(shape=[time_stamp, feature_num])
conv1 = tf.keras.layers.Conv1D(filters=256, kernel_size=1)(input)
GRU1 = tf.keras.layers.GRU(128, return_sequences=False)(conv1)
d1 = tf.keras.layers.Dense(300)(GRU1)
output = tf.keras.layers.Dense(10)(d1)
model = tf.keras.Model(inputs=input, outputs=output)
return model
# trian_data:(300455,120,10)
# trian_label1:(300455,10)
# trian_label2:(300455,)
def shuffle(train_data, train_label1, train_label2, is_split: bool = False, split_size: float = 0.2):
(train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(train_data,
train_label1,
train_label2,
test_size=split_size,
shuffle=True,
random_state=100)
if is_split:
return train_data, train_label1, train_label2, test_data, test_label1, test_label2
train_data = np.concatenate([train_data, test_data], axis=0)
train_label1 = np.concatenate([train_label1, test_label1], axis=0)
train_label2 = np.concatenate([train_label2, test_label2], axis=0)
# print(train_data.shape)
# print(train_label1.shape)
# print(train_label2.shape)
# print(train_data.shape)
return train_data, train_label1, train_label2
pass
def split_test_data(healthy_data, healthy_label1, healthy_label2, unhealthy_data, unhealthy_label1, unhealthy_label2,
split_size: float = 0.2, shuffle: bool = True):
data = np.concatenate([healthy_data, unhealthy_data], axis=0)
label1 = np.concatenate([healthy_label1, unhealthy_label1], axis=0)
label2 = np.concatenate([healthy_label2, unhealthy_label2], axis=0)
(train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(data,
label1,
label2,
test_size=split_size,
shuffle=shuffle,
random_state=100)
# print(train_data.shape)
# print(train_label1.shape)
# print(train_label2.shape)
# print(train_data.shape)
return train_data, train_label1, train_label2, test_data, test_label1, test_label2
pass
def test(step_one_model, step_two_model, test_data, test_label1, test_label2):
history_loss = []
history_val_loss = []
val_loss, val_accuracy = step_two_model.get_val_loss(val_data=test_data, val_label1=test_label1,
val_label2=test_label2,
is_first_time=False, step_one_model=step_one_model)
history_val_loss.append(val_loss)
print("val_accuracy:", val_accuracy)
print("val_loss:", val_loss)
def showResult(step_two_model: Joint_Monitoring, test_data, isPlot: bool = False):
# 获取模型的所有参数的个数
# step_two_model.count_params()
total_result = []
size, length, dims = test_data.shape
for epoch in range(0, size - batch_size + 1, batch_size):
each_test_data = test_data[epoch:epoch + batch_size, :, :]
_, _, _, output4 = step_two_model.call(each_test_data, is_first_time=False)
total_result.append(output4)
total_result = np.reshape(total_result, [total_result.__len__(), -1])
total_result = np.reshape(total_result, [-1, ])
if isPlot:
plt.scatter(list(range(total_result.shape[0])), total_result, c='black', s=10)
# 画出 y=1 这条水平线
plt.axhline(0.5, c='red', label='Failure threshold')
# 箭头指向上面的水平线
# plt.arrow(35000, 0.9, 33000, 0.75, head_width=0.02, head_length=0.1, shape="full", fc='red', ec='red',
# alpha=0.9, overhang=0.5)
# plt.text(35000, 0.9, "Truth Fault", fontsize=10, color='black', verticalalignment='top')
plt.axvline(test_data.shape[0] * 2 / 3, c='blue', ls='-.')
plt.xlabel("time")
plt.ylabel("confience")
plt.text(total_result.shape[0] * 4 / 5, 0.6, "Fault", fontsize=10, color='black', verticalalignment='top',
horizontalalignment='center',
bbox={'facecolor': 'grey',
'pad': 10})
plt.text(total_result.shape[0] * 1 / 3, 0.4, "Norm", fontsize=10, color='black', verticalalignment='top',
horizontalalignment='center',
bbox={'facecolor': 'grey',
'pad': 10})
plt.grid()
# plt.ylim(0, 1)
# plt.xlim(-50, 1300)
# plt.legend("", loc='upper left')
plt.show()
return total_result
def GRU_Model():
input = tf.keras.Input(shape=[time_stamp, feature_num])
input = tf.cast(input, tf.float32)
LSTM = tf.keras.layers.GRU(units=10, return_sequences=True)(input)
LSTM = tf.keras.layers.GRU(units=20, return_sequences=True)(LSTM)
LSTM = tf.keras.layers.GRU(units=40, return_sequences=True)(LSTM)
LSTM = tf.keras.layers.GRU(units=40, return_sequences=True)(LSTM)
LSTM = tf.keras.layers.GRU(units=40, return_sequences=True)(LSTM)
LSTM = tf.keras.layers.GRU(units=40, return_sequences=True)(LSTM)
LSTM = tf.keras.layers.GRU(units=40, return_sequences=True)(LSTM)
LSTM = tf.keras.layers.GRU(units=40, return_sequences=False)(LSTM)
# bn = tf.keras.layers.BatchNormalization()(LSTM)
d1 = tf.keras.layers.Dense(20)(LSTM)
# bn = tf.keras.layers.BatchNormalization()(d1)
output = tf.keras.layers.Dense(10, name='output')(d1)
model = tf.keras.Model(inputs=input, outputs=output)
return model
pass
# healthy_data是健康数据,用于确定阈值all_data是完整的数据,用于模型出结果
def getResult(model: tf.keras.Model, healthy_data, healthy_label, unhealthy_data, unhealthy_label, isPlot: bool = False,
isSave: bool = False):
# TODO 计算MSE确定阈值
mse, mean, max = get_MSE(healthy_data, healthy_label, model)
# 误报率的计算
total, = mse.shape
faultNum = 0
faultList = []
for i in range(total):
if (mse[i] > max[i]):
faultNum += 1
faultList.append(mse[i])
fault_rate = faultNum / total
print("误报率:", fault_rate)
# 漏报率计算
missNum = 0
missList = []
mse1 = get_MSE(unhealthy_data, unhealthy_label, model,isStandard=False)
all,= mse1.shape
for i in range(all):
if (mse1[i] < max[0]):
missNum += 1
missList.append(mse1[i])
miss_rate = missNum / all
print("漏报率:", miss_rate)
pass
if __name__ == '__main__':
total_data = loadData.execute(N=feature_num, file_name=file_name)
total_data = normalization(data=total_data)
train_data_healthy, train_label1_healthy, train_label2_healthy = get_training_data_overlapping(
total_data[:healthy_date, :], is_Healthy=True)
train_data_unhealthy, train_label1_unhealthy, train_label2_unhealthy = get_training_data_overlapping(
total_data[healthy_date - time_stamp + unhealthy_patience:unhealthy_date, :],
is_Healthy=False)
#### TODO 第一步训练
# 单次测试
model = GRU_Model()
model.compile(optimizer=tf.optimizers.Adam(0.01), loss=tf.losses.mse)
model.summary()
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=5, mode='min', verbose=1)
checkpoint = tf.keras.callbacks.ModelCheckpoint(
filepath=save_name,
monitor='val_loss',
verbose=1,
save_best_only=True,
mode='min',
period=1)
history = model.fit(train_data_healthy[:30000,:,:], train_label1_healthy[:30000,:], epochs=10,
batch_size=32,validation_split=0.2,shuffle=False, verbose=1,callbacks=[checkpoint,early_stop])
# model.save(save_name)
## TODO testing
# test_data, test_label = get_training_data(total_data[:healthy_date, :])
# newModel = tf.keras.models.load_model(save_name)
# mse, mean, max = get_MSE(test_data, test_label, new_model=newModel)
healthy_size, _, _ = train_data_healthy.shape
unhealthy_size, _, _ = train_data_unhealthy.shape
all_data, _, _ = get_training_data_overlapping(
total_data[healthy_size - 2 * unhealthy_size:unhealthy_date, :], is_Healthy=True)
newModel = tf.keras.models.load_model(save_name)
getResult(newModel, healthy_data=train_data_healthy[healthy_size - 2 * unhealthy_size:, :],
healthy_label=train_label1_healthy[healthy_size - 2 * unhealthy_size:, :],
unhealthy_data=train_data_unhealthy, unhealthy_label=train_label1_unhealthy)
# mse, mean, max = get_MSE(train_data_healthy[healthy_size - 2 * unhealthy_size:, :],
# train_label1_healthy[healthy_size - 2 * unhealthy_size:, :], new_model=newModel)
# test_data, test_label = get_training_data(total_data[20000:, :])
# predicted_data = newModel.predict(test_data)
# rows, cols = predicted_data.shape
#
# temp = np.abs(predicted_data - test_label)
# temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape))
# temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape)
# temp3 = temp1 / temp2
# mse = np.sum((temp1 / temp2) ** 2, axis=1)
#
# plt.plot(mse)
# plt.plot(mean)
# plt.plot(max)
# plt.show()
#
# data = pd.DataFrame(mse).ewm(span=3).mean()
# print(data)
# data = np.array(data)
#
# index, _ = data.shape
#
# for i in range(2396):
# if data[i, 0] > 5:
# data[i, 0] = data[i - 1, :]
# print(data)
# mean = data[2000:2396, :].mean()
# std = data[2000:2396, :].std()
# mean = np.broadcast_to(mean, shape=[500, ])
# std = np.broadcast_to(std, shape=[500, ])
# plt.plot(data[2000:2396, :])
# plt.plot(mean)
# plt.plot(mean + 3 * std)
# plt.plot(mean - 3 * std)
# plt.show()
pass

View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
# coding: utf-8
'''
@Author : dingjiawen
@Date : 2022/10/11 18:55
@Usage :
@Desc :
'''

View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
# coding: utf-8
'''
@Author : dingjiawen
@Date : 2022/10/11 18:54
@Usage :
@Desc :
'''

View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
# coding: utf-8
'''
@Author : dingjiawen
@Date : 2022/10/11 18:53
@Usage :
@Desc :
'''

View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
# coding: utf-8
'''
@Author : dingjiawen
@Date : 2022/10/11 18:53
@Usage :
@Desc :
'''

View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
# coding: utf-8
'''
@Author : dingjiawen
@Date : 2022/10/11 19:00
@Usage :
@Desc :
'''

View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
# coding: utf-8
'''
@Author : dingjiawen
@Date : 2022/10/11 18:55
@Usage :
@Desc :
'''

View File

@ -0,0 +1,579 @@
# -*- coding: utf-8 -*-
# coding: utf-8
'''
@Author : dingjiawen
@Date : 2022/10/11 18:53
@Usage : 对比实验与JointNet相同深度,不加DCAU,进行预测
@Desc :
'''
import tensorflow as tf
import tensorflow.keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from model.DepthwiseCon1D.DepthwiseConv1D import DepthwiseConv1D
from model.Dynamic_channelAttention.Dynamic_channelAttention import DynamicChannelAttention
from condition_monitoring.data_deal import loadData
from model.Joint_Monitoring.Joint_Monitoring3 import Joint_Monitoring
from model.CommonFunction.CommonFunction import *
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model, save_model
'''超参数设置'''
time_stamp = 120
feature_num = 10
batch_size = 16
learning_rate = 0.001
EPOCH = 101
model_name = "RNet"
'''EWMA超参数'''
K = 18
namuda = 0.01
'''保存名称'''
save_name = "../hard_model/weight/{0}_timestamp{1}_feature{2}_weight_epoch8/weight".format(model_name,
time_stamp,
feature_num,
batch_size,
EPOCH)
save_step_two_name = "../hard_model/two_weight/{0}_timestamp{1}_feature{2}_weight_epoch14/weight".format(model_name,
time_stamp,
feature_num,
batch_size,
EPOCH)
# save_name = "../model/joint/{0}_timestamp{1}_feature{2}.h5".format(model_name,
# time_stamp,
# feature_num,
# batch_size,
# EPOCH)
# save_step_two_name = "../model/joint_two/{0}_timestamp{1}_feature{2}.h5".format(model_name,
# time_stamp,
# feature_num,
# batch_size,
# EPOCH)
'''文件名'''
file_name = "G:\data\SCADA数据\jb4q_8_delete_total_zero.csv"
'''
文件说明jb4q_8_delete_total_zero.csv是删除了只删除了全是0的列的文件
文件从0:415548行均是正常值(2019/7.30 00:00:00 - 2019/9/18 11:14:00)
从415549:432153行均是异常值(2019/9/18 11:21:01 - 2021/1/18 00:00:00)
'''
'''文件参数'''
# 最后正常的时间点
healthy_date = 415548
# 最后异常的时间点
unhealthy_date = 432153
# 异常容忍程度
unhealthy_patience = 5
def remove(data, time_stamp=time_stamp):
rows, cols = data.shape
print("remove_data.shape:", data.shape)
num = int(rows / time_stamp)
return data[:num * time_stamp, :]
pass
# 不重叠采样
def get_training_data(data, time_stamp: int = time_stamp):
removed_data = remove(data=data)
rows, cols = removed_data.shape
print("removed_data.shape:", data.shape)
print("removed_data:", removed_data)
train_data = np.reshape(removed_data, [-1, time_stamp, cols])
print("train_data:", train_data)
batchs, time_stamp, cols = train_data.shape
for i in range(1, batchs):
each_label = np.expand_dims(train_data[i, 0, :], axis=0)
if i == 1:
train_label = each_label
else:
train_label = np.concatenate([train_label, each_label], axis=0)
print("train_data.shape:", train_data.shape)
print("train_label.shape", train_label.shape)
return train_data[:-1, :], train_label
# 重叠采样
def get_training_data_overlapping(data, time_stamp: int = time_stamp, is_Healthy: bool = True):
rows, cols = data.shape
train_data = np.empty(shape=[rows - time_stamp - 1, time_stamp, cols])
train_label = np.empty(shape=[rows - time_stamp - 1, cols])
for i in range(rows):
if i + time_stamp >= rows:
break
if i + time_stamp < rows - 1:
train_data[i] = data[i:i + time_stamp]
train_label[i] = data[i + time_stamp]
print("重叠采样以后:")
print("data:", train_data) # (300334,120,10)
print("label:", train_label) # (300334,10)
if is_Healthy:
train_label2 = np.ones(shape=[train_label.shape[0]])
else:
train_label2 = np.zeros(shape=[train_label.shape[0]])
print("label2:", train_label2)
return train_data, train_label, train_label2
# RepConv重参数化卷积
def RepConv(input_tensor, k=3):
_, _, output_dim = input_tensor.shape
conv1 = tf.keras.layers.Conv1D(filters=output_dim, kernel_size=k, strides=1, padding='SAME')(input_tensor)
b1 = tf.keras.layers.BatchNormalization()(conv1)
conv2 = tf.keras.layers.Conv1D(filters=output_dim, kernel_size=1, strides=1, padding='SAME')(input_tensor)
b2 = tf.keras.layers.BatchNormalization()(conv2)
b3 = tf.keras.layers.BatchNormalization()(input_tensor)
out = tf.keras.layers.Add()([b1, b2, b3])
out = tf.nn.relu(out)
return out
# RepBlock模块
def RepBlock(input_tensor, num: int = 3):
for i in range(num):
input_tensor = RepConv(input_tensor)
return input_tensor
# GAP 全局平均池化
def Global_avg_channelAttention(input_tensor):
_, length, channel = input_tensor.shape
DWC1 = DepthwiseConv1D(kernel_size=1, padding='SAME')(input_tensor)
GAP = tf.keras.layers.GlobalAvgPool1D()(DWC1)
c1 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GAP)
s1 = tf.nn.sigmoid(c1)
output = tf.multiply(input_tensor, s1)
return output
# GDP 全局动态池化
def Global_Dynamic_channelAttention(input_tensor):
_, length, channel = input_tensor.shape
DWC1 = DepthwiseConv1D(kernel_size=1, padding='SAME')(input_tensor)
# GAP
GAP = tf.keras.layers.GlobalAvgPool1D()(DWC1)
c1 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GAP)
s1 = tf.nn.sigmoid(c1)
# GMP
GMP = tf.keras.layers.GlobalMaxPool1D()(DWC1)
c2 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GMP)
s3 = tf.nn.sigmoid(c2)
output = tf.multiply(input_tensor, s1)
return output
# 归一化
def normalization(data):
rows, cols = data.shape
print("归一化之前:", data)
print(data.shape)
print("======================")
# 归一化
max = np.max(data, axis=0)
max = np.broadcast_to(max, [rows, cols])
min = np.min(data, axis=0)
min = np.broadcast_to(min, [rows, cols])
data = (data - min) / (max - min)
print("归一化之后:", data)
print(data.shape)
return data
# 正则化
def Regularization(data):
rows, cols = data.shape
print("正则化之前:", data)
print(data.shape)
print("======================")
# 正则化
mean = np.mean(data, axis=0)
mean = np.broadcast_to(mean, shape=[rows, cols])
dst = np.sqrt(np.var(data, axis=0))
dst = np.broadcast_to(dst, shape=[rows, cols])
data = (data - mean) / dst
print("正则化之后:", data)
print(data.shape)
return data
pass
def EWMA(data, K=K, namuda=namuda):
# t是啥暂时未知
t = 0
mid = np.mean(data, axis=0)
standard = np.sqrt(np.var(data, axis=0))
UCL = mid + K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
LCL = mid - K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
return mid, UCL, LCL
pass
def get_MSE(data, label, new_model):
predicted_data = new_model.predict(data)
temp = np.abs(predicted_data - label)
temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape))
temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape)
temp3 = temp1 / temp2
mse = np.sum((temp1 / temp2) ** 2, axis=1)
print("z:", mse)
print(mse.shape)
# mse=np.mean((predicted_data-label)**2,axis=1)
print("mse", mse)
dims, = mse.shape
mean = np.mean(mse)
std = np.sqrt(np.var(mse))
max = mean + 3 * std
# min = mean-3*std
max = np.broadcast_to(max, shape=[dims, ])
# min = np.broadcast_to(min,shape=[dims,])
mean = np.broadcast_to(mean, shape=[dims, ])
# plt.plot(max)
# plt.plot(mse)
# plt.plot(mean)
# # plt.plot(min)
# plt.show()
#
#
return mse, mean, max
# pass
def condition_monitoring_model():
input = tf.keras.Input(shape=[time_stamp, feature_num])
conv1 = tf.keras.layers.Conv1D(filters=256, kernel_size=1)(input)
GRU1 = tf.keras.layers.GRU(128, return_sequences=False)(conv1)
d1 = tf.keras.layers.Dense(300)(GRU1)
output = tf.keras.layers.Dense(10)(d1)
model = tf.keras.Model(inputs=input, outputs=output)
return model
# trian_data:(300455,120,10)
# trian_label1:(300455,10)
# trian_label2:(300455,)
def shuffle(train_data, train_label1, train_label2, is_split: bool = False, split_size: float = 0.2):
(train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(train_data,
train_label1,
train_label2,
test_size=split_size,
shuffle=True,
random_state=100)
if is_split:
return train_data, train_label1, train_label2, test_data, test_label1, test_label2
train_data = np.concatenate([train_data, test_data], axis=0)
train_label1 = np.concatenate([train_label1, test_label1], axis=0)
train_label2 = np.concatenate([train_label2, test_label2], axis=0)
# print(train_data.shape)
# print(train_label1.shape)
# print(train_label2.shape)
# print(train_data.shape)
return train_data, train_label1, train_label2
pass
def split_test_data(healthy_data, healthy_label1, healthy_label2, unhealthy_data, unhealthy_label1, unhealthy_label2,
split_size: float = 0.2, shuffle: bool = True):
data = np.concatenate([healthy_data, unhealthy_data], axis=0)
label1 = np.concatenate([healthy_label1, unhealthy_label1], axis=0)
label2 = np.concatenate([healthy_label2, unhealthy_label2], axis=0)
(train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(data,
label1,
label2,
test_size=split_size,
shuffle=shuffle,
random_state=100)
# print(train_data.shape)
# print(train_label1.shape)
# print(train_label2.shape)
# print(train_data.shape)
return train_data, train_label1, train_label2, test_data, test_label1, test_label2
pass
# trian_data:(300455,120,10)
# trian_label1:(300455,10)
# trian_label2:(300455,)
def train_step_one(train_data, train_label1, train_label2):
model = Joint_Monitoring()
# # # # TODO 需要运行编译一次,才能打印model.summary()
# model.build(input_shape=(batch_size, filter_num, dims))
# model.summary()
history_loss = []
history_val_loss = []
learning_rate = 1e-3
for epoch in range(EPOCH):
print()
print("EPOCH:", epoch, "/", EPOCH, ":")
train_data, train_label1, train_label2 = shuffle(train_data, train_label1, train_label2)
if epoch == 0:
train_data, train_label1, train_label2, val_data, val_label1, val_label2 = shuffle(train_data, train_label1,
train_label2,
is_split=True)
# print()
# print("EPOCH:", epoch, "/", EPOCH, ":")
# 用于让train知道这是这个epoch中的第几次训练
z = 0
# 用于batch_size次再训练
k = 1
for data_1, label_1, label_2 in zip(train_data, train_label1, train_label2):
size, _, _ = train_data.shape
data_1 = tf.expand_dims(data_1, axis=0)
label_1 = tf.expand_dims(label_1, axis=0)
label_2 = tf.expand_dims(label_2, axis=0)
if batch_size != 1:
if k % batch_size == 1:
data = data_1
label1 = label_1
label2 = label_2
else:
data = tf.concat([data, data_1], axis=0)
label1 = tf.concat([label1, label_1], axis=0)
label2 = tf.concat([label2, label_2], axis=0)
else:
data = data_1
label1 = label_1
label2 = label_2
if k % batch_size == 0:
# label = tf.expand_dims(label, axis=-1)
loss_value, accuracy_value = model.train(input_tensor=data, label1=label1, label2=label2,
learning_rate=learning_rate,
is_first_time=True)
print(z * batch_size, "/", size, ":===============>", "loss:", loss_value.numpy())
k = 0
z = z + 1
k = k + 1
val_loss, val_accuracy = model.get_val_loss(val_data=val_data, val_label1=val_label1, val_label2=val_label2,
is_first_time=True)
SaveBestModel(model=model, save_name=save_name, history_loss=history_val_loss, loss_value=val_loss.numpy())
# SaveBestH5Model(model=model, save_name=save_name, history_loss=history_val_loss, loss_value=val_loss.numpy())
history_val_loss.append(val_loss)
history_loss.append(loss_value.numpy())
print('Training loss is :', loss_value.numpy())
print('Validating loss is :', val_loss.numpy())
if IsStopTraining(history_loss=history_val_loss, patience=7):
break
if Is_Reduce_learning_rate(history_loss=history_val_loss, patience=3):
if learning_rate >= 1e-4:
learning_rate = learning_rate * 0.1
pass
def train_step_two(step_one_model, step_two_model, train_data, train_label1, train_label2):
# step_two_model = Joint_Monitoring()
# step_two_model.build(input_shape=(batch_size, time_stamp, feature_num))
# step_two_model.summary()
history_loss = []
history_val_loss = []
history_accuracy = []
learning_rate = 1e-3
for epoch in range(EPOCH):
print()
print("EPOCH:", epoch, "/", EPOCH, ":")
train_data, train_label1, train_label2 = shuffle(train_data, train_label1, train_label2)
if epoch == 0:
train_data, train_label1, train_label2, val_data, val_label1, val_label2 = shuffle(train_data, train_label1,
train_label2,
is_split=True)
# print()
# print("EPOCH:", epoch, "/", EPOCH, ":")
# 用于让train知道这是这个epoch中的第几次训练
z = 0
# 用于batch_size次再训练
k = 1
accuracy_num = 0
for data_1, label_1, label_2 in zip(train_data, train_label1, train_label2):
size, _, _ = train_data.shape
data_1 = tf.expand_dims(data_1, axis=0)
label_1 = tf.expand_dims(label_1, axis=0)
label_2 = tf.expand_dims(label_2, axis=0)
if batch_size != 1:
if k % batch_size == 1:
data = data_1
label1 = label_1
label2 = label_2
else:
data = tf.concat([data, data_1], axis=0)
label1 = tf.concat([label1, label_1], axis=0)
label2 = tf.concat([label2, label_2], axis=0)
else:
data = data_1
label1 = label_1
label2 = label_2
if k % batch_size == 0:
# label = tf.expand_dims(label, axis=-1)
output1, output2, output3, _ = step_one_model.call(inputs=data, is_first_time=True)
loss_value, accuracy_value = step_two_model.train(input_tensor=data, label1=label1, label2=label2,
learning_rate=learning_rate,
is_first_time=False, pred_3=output1, pred_4=output2,
pred_5=output3)
accuracy_num += accuracy_value
print(z * batch_size, "/", size, ":===============>", "loss:", loss_value.numpy(), "| accuracy:",
accuracy_num / ((z + 1) * batch_size))
k = 0
z = z + 1
k = k + 1
val_loss, val_accuracy = step_two_model.get_val_loss(val_data=val_data, val_label1=val_label1,
val_label2=val_label2,
is_first_time=False, step_one_model=step_one_model)
SaveBestModelByAccuracy(model=step_two_model, save_name=save_step_two_name, history_accuracy=history_accuracy,
accuracy_value=val_accuracy)
history_val_loss.append(val_loss)
history_loss.append(loss_value.numpy())
history_accuracy.append(val_accuracy)
print('Training loss is : {0} | Training accuracy is : {1}'.format(loss_value.numpy(),
accuracy_num / ((z + 1) * batch_size)))
print('Validating loss is : {0} | Validating accuracy is : {1}'.format(val_loss.numpy(), val_accuracy))
if IsStopTraining(history_loss=history_val_loss, patience=7):
break
if Is_Reduce_learning_rate(history_loss=history_val_loss, patience=3):
if learning_rate >= 1e-4:
learning_rate = learning_rate * 0.1
pass
def test(step_one_model, step_two_model, test_data, test_label1, test_label2):
history_loss = []
history_val_loss = []
val_loss, val_accuracy = step_two_model.get_val_loss(val_data=test_data, val_label1=test_label1,
val_label2=test_label2,
is_first_time=False, step_one_model=step_one_model)
history_val_loss.append(val_loss)
print("val_accuracy:", val_accuracy)
print("val_loss:", val_loss)
def showResult(step_two_model: Joint_Monitoring, test_data, isPlot: bool = False):
# 获取模型的所有参数的个数
# step_two_model.count_params()
total_result = []
size, length, dims = test_data.shape
for epoch in range(0, size - batch_size + 1, batch_size):
each_test_data = test_data[epoch:epoch + batch_size, :, :]
_, _, _, output4 = step_two_model.call(each_test_data, is_first_time=False)
total_result.append(output4)
total_result = np.reshape(total_result, [total_result.__len__(), -1])
total_result = np.reshape(total_result, [-1, ])
if isPlot:
plt.scatter(list(range(total_result.shape[0])), total_result, c='black', s=10)
# 画出 y=1 这条水平线
plt.axhline(0.5, c='red', label='Failure threshold')
# 箭头指向上面的水平线
# plt.arrow(35000, 0.9, 33000, 0.75, head_width=0.02, head_length=0.1, shape="full", fc='red', ec='red',
# alpha=0.9, overhang=0.5)
# plt.text(35000, 0.9, "Truth Fault", fontsize=10, color='black', verticalalignment='top')
plt.axvline(test_data.shape[0] * 2 / 3, c='blue', ls='-.')
plt.xlabel("time")
plt.ylabel("confience")
plt.text(total_result.shape[0] * 4 / 5, 0.6, "Fault", fontsize=10, color='black', verticalalignment='top',
horizontalalignment='center',
bbox={'facecolor': 'grey',
'pad': 10})
plt.text(total_result.shape[0] * 1 / 3, 0.4, "Norm", fontsize=10, color='black', verticalalignment='top',
horizontalalignment='center',
bbox={'facecolor': 'grey',
'pad': 10})
plt.grid()
# plt.ylim(0, 1)
# plt.xlim(-50, 1300)
# plt.legend("", loc='upper left')
plt.show()
return total_result
if __name__ == '__main__':
total_data = loadData.execute(N=feature_num, file_name=file_name)
total_data = normalization(data=total_data)
train_data_healthy, train_label1_healthy, train_label2_healthy = get_training_data_overlapping(
total_data[:healthy_date, :], is_Healthy=True)
train_data_unhealthy, train_label1_unhealthy, train_label2_unhealthy = get_training_data_overlapping(
total_data[healthy_date - time_stamp + unhealthy_patience:unhealthy_date, :],
is_Healthy=False)
#### TODO 第一步训练
# 单次测试
# train_step_one(train_data=train_data_healthy[:32, :, :], train_label1=train_label1_healthy[:32, :],train_label2=train_label2_healthy[:32, ])
# train_step_one(train_data=train_data_healthy, train_label1=train_label1_healthy, train_label2=train_label2_healthy)
# 导入第一步已经训练好的模型,一个继续训练,一个只输出结果
# step_one_model = Joint_Monitoring()
# step_one_model.load_weights(save_name)
#
# step_two_model = Joint_Monitoring()
# step_two_model.load_weights(save_name)
#### TODO 第二步训练
### healthy_data.shape: (300333,120,10)
### unhealthy_data.shape: (16594,10)
healthy_size, _, _ = train_data_healthy.shape
unhealthy_size, _, _ = train_data_unhealthy.shape
# train_data, train_label1, train_label2, test_data, test_label1, test_label2 = split_test_data(
# healthy_data=train_data_healthy[healthy_size - 2 * unhealthy_size:, :, :],
# healthy_label1=train_label1_healthy[healthy_size - 2 * unhealthy_size:, :],
# healthy_label2=train_label2_healthy[healthy_size - 2 * unhealthy_size:, ], unhealthy_data=train_data_unhealthy,
# unhealthy_label1=train_label1_unhealthy, unhealthy_label2=train_label2_unhealthy)
# train_step_two(step_one_model=step_one_model, step_two_model=step_two_model,
# train_data=train_data,
# train_label1=train_label1, train_label2=np.expand_dims(train_label2, axis=-1))
### TODO 测试测试集
step_one_model = Joint_Monitoring()
step_one_model.load_weights(save_name)
step_two_model = Joint_Monitoring()
step_two_model.load_weights(save_step_two_name)
# test(step_one_model=step_one_model, step_two_model=step_two_model, test_data=test_data, test_label1=test_label1,
# test_label2=np.expand_dims(test_label2, axis=-1))
###TODO 展示全部的结果
all_data, _, _ = get_training_data_overlapping(
total_data[healthy_size - 2 * unhealthy_size:unhealthy_date, :], is_Healthy=True)
# all_data = np.concatenate([])
# 单次测试
# showResult(step_two_model, test_data=all_data[:32], isPlot=True)
showResult(step_two_model, test_data=all_data, isPlot=True)
pass

View File

@ -0,0 +1,10 @@
# -*- coding: utf-8 -*-
# coding: utf-8
'''
@Author : dingjiawen
@Date : 2022/10/11 18:51
@Usage :
@Desc :
'''