From d8746d192fc21a251539a559617cb41ba0052a34 Mon Sep 17 00:00:00 2001
From: kevinding1125 <745518019@qq.com>
Date: Wed, 8 Nov 2023 19:14:57 +0800
Subject: [PATCH] =?UTF-8?q?20231108=E7=88=AC=E8=99=AB=E5=AD=A6=E4=B9=A0?=
=?UTF-8?q?=E6=9B=B4=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../BeautifulSoup库/BeautifulSoupLearning.py | 244 ++++++++
.../BeautifulSoup库/__init__.py | 8 +
.../Pyquery库/__init__.py | 8 +
.../Pyquery库/pyqueryLearning.py | 329 ++++++++++
.../XPath库/XpathLearning.py | 195 ++++++
.../XPath库/__init__.py | 8 +
.../XPath库/test.html | 9 +
Spider/Chapter03_网页数据的提取/__init__.py | 8 +
.../dataETL/NewtonInsert.py | 153 +++++
.../condition_monitoring/dataETL/__init__.py | 0
.../dataETL/lagrangeInsert.py | 96 +++
.../scada_data_process_for_JBYQ_YSD.py | 149 +++++
.../condition_monitoring/data_deal/baseETL.py | 67 ++
.../data_deal/loadData.py | 228 +++++++
.../data_deal/loadData_daban.py | 207 +++++++
.../data_deal/plot_raw_data.py | 60 ++
.../others_idea/CNN_GRU.py | 262 ++++++++
.../others_idea/__init__.py | 0
.../self_try/Joint_Monitoring.py | 526 ++++++++++++++++
.../self_try/Joint_Monitoring_hard.py | 576 ++++++++++++++++++
20 files changed, 3133 insertions(+)
create mode 100644 Spider/Chapter03_网页数据的提取/BeautifulSoup库/BeautifulSoupLearning.py
create mode 100644 Spider/Chapter03_网页数据的提取/BeautifulSoup库/__init__.py
create mode 100644 Spider/Chapter03_网页数据的提取/Pyquery库/__init__.py
create mode 100644 Spider/Chapter03_网页数据的提取/Pyquery库/pyqueryLearning.py
create mode 100644 Spider/Chapter03_网页数据的提取/XPath库/XpathLearning.py
create mode 100644 Spider/Chapter03_网页数据的提取/XPath库/__init__.py
create mode 100644 Spider/Chapter03_网页数据的提取/XPath库/test.html
create mode 100644 Spider/Chapter03_网页数据的提取/__init__.py
create mode 100644 TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/NewtonInsert.py
create mode 100644 TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/__init__.py
create mode 100644 TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/lagrangeInsert.py
create mode 100644 TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/scada_data_process_for_JBYQ_YSD.py
create mode 100644 TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/baseETL.py
create mode 100644 TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData.py
create mode 100644 TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData_daban.py
create mode 100644 TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/plot_raw_data.py
create mode 100644 TensorFlow_eaxmple/Model_train_test/condition_monitoring/others_idea/CNN_GRU.py
create mode 100644 TensorFlow_eaxmple/Model_train_test/condition_monitoring/others_idea/__init__.py
create mode 100644 TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring.py
create mode 100644 TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring_hard.py
diff --git a/Spider/Chapter03_网页数据的提取/BeautifulSoup库/BeautifulSoupLearning.py b/Spider/Chapter03_网页数据的提取/BeautifulSoup库/BeautifulSoupLearning.py
new file mode 100644
index 0000000..6890cb5
--- /dev/null
+++ b/Spider/Chapter03_网页数据的提取/BeautifulSoup库/BeautifulSoupLearning.py
@@ -0,0 +1,244 @@
+# -*- encoding:utf-8 -*-
+
+'''
+@Author : dingjiawen
+@Date : 2023/11/8 16:08
+@Usage :
+@Desc :参考 https://github.com/Python3WebSpider/BeautifulSoupTest
+'''
+
+html = """
+
The Dormouse's story
+
+The Dormouse's story
+Once upon a time there were three little sisters; and their names were
+,
+Lacie and
+Tillie;
+and they lived at the bottom of a well.
+...
+"""
+from bs4 import BeautifulSoup
+
+
+
+def baseUse():
+ soup = BeautifulSoup(html, 'lxml')
+ print(soup.title) # The Dormouse's story
+ print(type(soup.title)) #
+ print(soup.title.string) # The Dormouse's story
+ print(soup.head) # The Dormouse's story
+ print(soup.p) # The Dormouse's story
+ print(soup.p.name) # 获取节点名称 p
+ print(soup.p.attrs) # 获取属性 {'class': ['title'], 'name': 'dromouse'}
+ print(soup.p.attrs['name']) # 获取属性值 dromouse
+ print(soup.p['name']) # 获取属性值 dromouse
+ print(soup.body.p['name']) # 嵌套选择 dromouse
+
+ print("==========================")
+
+
+def child():
+ html = """
+
+
+ The Dormouse's story
+
+
+
+ Once upon a time there were three little sisters; and their names were
+
+ Elsie
+
+ Lacie
+ and
+ Tillie
+ and they lived at the bottom of a well.
+
+ ...
+ """
+ soup = BeautifulSoup(html, 'lxml')
+ # 子结点
+ for i, child in enumerate(soup.p.children):
+ print(i, child)
+ print("===============================")
+ # 子孙节点
+ for i, child in enumerate(soup.p.descendants):
+ print(i, child)
+ print("===============================")
+
+
+def parent():
+ soup = BeautifulSoup(html, 'lxml')
+ # 父节点
+ print(soup.a.parent)
+ print("===============================")
+ # 祖父节点
+ print(type(soup.a.parents))
+ print(list(enumerate(soup.a.parents)))
+ print("=============================")
+
+
+def brother():
+ html = """
+
+
+
+ Once upon a time there were three little sisters; and their names were
+
+ Elsie
+
+ Hello
+ Lacie
+ and
+ Tillie
+ and they lived at the bottom of a well.
+
+ """
+ # 兄弟节点
+ soup = BeautifulSoup(html, 'lxml')
+ print('Next Sibling', soup.a.next_sibling)
+ print('Prev Sibling', soup.a.previous_sibling)
+ print('Next Siblings', list(enumerate(soup.a.next_siblings)))
+ print('Prev Siblings', list(enumerate(soup.a.previous_siblings)))
+
+# 找到所有满足条件的
+def findAll():
+
+ html = '''
+
+ '''
+ soup = BeautifulSoup(html, 'lxml')
+ print(soup.find_all(name='ul'))
+ print(type(soup.find_all(name='ul')[0]))
+
+ for ul in soup.find_all(name='ul'):
+ print(ul.find_all(name='li'))
+
+ for ul in soup.find_all(name='ul'):
+ print(ul.find_all(name='li'))
+ for li in ul.find_all(name='li'):
+ print(li.string)
+
+
+# 找属性满足匹配得到
+def attrs():
+ html = '''
+
+ '''
+
+ soup = BeautifulSoup(html, 'lxml')
+ print(soup.find_all(attrs={'id': 'list-1'}))
+ print(soup.find_all(attrs={'name': 'elements'}))
+
+ # 常用的属性可以不用attrs传递
+ soup = BeautifulSoup(html, 'lxml')
+ print(soup.find_all(id='list-1'))
+ print(soup.find_all(class_='element'))
+ import re
+ print(soup.find_all(string=re.compile('Foo')))# string等同于text,即里面的具体内容
+
+
+# 返回匹配到的第一个元素
+def find():
+ html = '''
+
+ '''
+ soup = BeautifulSoup(html, 'lxml')
+ print(soup.find(name='ul'))
+ print(type(soup.find(name='ul')))
+ print(soup.find(class_='list'))
+
+# css选择器
+def cssSelect():
+ html = '''
+
+ '''
+
+ soup = BeautifulSoup(html, 'lxml')
+ print(soup.select('.panel .panel-heading'))
+ print(soup.select('ul li'))
+ print(soup.select('#list-2 .element'))
+ print(type(soup.select('ul')[0]))
+
+ # 嵌套选择
+ soup = BeautifulSoup(html, 'lxml')
+ for ul in soup.select('ul'):
+ print(ul.select('li'))
+
+ # 获取属性
+ soup = BeautifulSoup(html, 'lxml')
+ for ul in soup.select('ul'):
+ print(ul['id'])
+ print(ul.attrs['id'])
+
+ # 获取文本
+ soup = BeautifulSoup(html, 'lxml')
+ for li in soup.select('li'):
+ print('Get Text:', li.get_text())
+ print('String:', li.string)
+
+
+
+if __name__ == '__main__':
+ cssSelect()
\ No newline at end of file
diff --git a/Spider/Chapter03_网页数据的提取/BeautifulSoup库/__init__.py b/Spider/Chapter03_网页数据的提取/BeautifulSoup库/__init__.py
new file mode 100644
index 0000000..4cced9b
--- /dev/null
+++ b/Spider/Chapter03_网页数据的提取/BeautifulSoup库/__init__.py
@@ -0,0 +1,8 @@
+#-*- encoding:utf-8 -*-
+
+'''
+@Author : dingjiawen
+@Date : 2023/11/8 16:07
+@Usage :
+@Desc :
+'''
\ No newline at end of file
diff --git a/Spider/Chapter03_网页数据的提取/Pyquery库/__init__.py b/Spider/Chapter03_网页数据的提取/Pyquery库/__init__.py
new file mode 100644
index 0000000..2460833
--- /dev/null
+++ b/Spider/Chapter03_网页数据的提取/Pyquery库/__init__.py
@@ -0,0 +1,8 @@
+#-*- encoding:utf-8 -*-
+
+'''
+@Author : dingjiawen
+@Date : 2023/11/8 16:54
+@Usage :
+@Desc :
+'''
\ No newline at end of file
diff --git a/Spider/Chapter03_网页数据的提取/Pyquery库/pyqueryLearning.py b/Spider/Chapter03_网页数据的提取/Pyquery库/pyqueryLearning.py
new file mode 100644
index 0000000..af946f6
--- /dev/null
+++ b/Spider/Chapter03_网页数据的提取/Pyquery库/pyqueryLearning.py
@@ -0,0 +1,329 @@
+# -*- encoding:utf-8 -*-
+
+'''
+@Author : dingjiawen
+@Date : 2023/11/8 16:54
+@Usage :
+@Desc :Pyquery学习 参考: https://github.com/Python3WebSpider/PyQueryTest
+'''
+from pyquery import PyQuery as pq
+
+
+# 字符串初始化
+def stringBase():
+ html = '''
+
+ '''
+
+ doc = pq(html)
+ print(doc('li'))
+
+
+# URL初始化
+def URLBase():
+ doc = pq(url='https://cuiqingcai.com')
+ print(doc('title'))
+
+ # 上述代码等同于下面
+ # doc = pq(requests.get('https://cuiqingcai.com').text)
+ # print(doc('title'))
+
+
+# 文件初始化
+def fileBase():
+ doc = pq(filename='demo.html')
+ print(doc('li'))
+
+# 基本的css选择器
+def cssSelect():
+ html = '''
+
+ '''
+ doc = pq(html)
+ print(doc('#container .list li'))
+ print(type(doc('#container .list li')))
+
+ #
+ for item in doc('#container .list li').items():
+ print(item.text())
+
+# 寻找子节点
+def child():
+ html = '''
+
+ '''
+ doc = pq(html)
+ items = doc('.list')
+ print(type(items))
+ print(items)
+ lis = items.find('li')
+ print(type(lis))
+ print(lis)
+ #
+ #
+ lis = items.children()
+ print(type(lis))
+ print(lis)
+
+ #
+ lis = items.children('.active')
+ print(lis)
+
+
+def parent():
+ html = '''
+
+ '''
+ from pyquery import PyQuery as pq
+ doc = pq(html)
+ items = doc('.list')
+ container = items.parent()
+ print(type(container))
+ print(container)
+
+ from pyquery import PyQuery as pq
+ doc = pq(html)
+ items = doc('.list')
+ parents = items.parents()
+ print(type(parents))
+ print(parents)
+
+ parent = items.parents('.wrap')
+ print(parent)
+
+ from pyquery import PyQuery as pq
+ doc = pq(html)
+ li = doc('.list .item-0.active')
+ print(li.siblings())
+
+def brother():
+ html = '''
+
+ '''
+ from pyquery import PyQuery as pq
+ doc = pq(html)
+ li = doc('.list .item-0.active')
+ print(li.siblings('.active'))
+
+ from pyquery import PyQuery as pq
+ doc = pq(html)
+ li = doc('.item-0.active')
+ print(li)
+ print(str(li))
+
+ from pyquery import PyQuery as pq
+ doc = pq(html)
+ # 可能是多个节点
+ lis = doc('li').items()
+ print(type(lis))
+ for li in lis:
+ print(li, type(li))
+
+def attrs():
+ html = '''
+
+ '''
+ from pyquery import PyQuery as pq
+ doc = pq(html)
+ a = doc('.item-0.active a')
+ print(a, type(a))
+ print(a.attr('href'))
+
+ a = doc('a')
+ print(a, type(a))
+ print(a.attr('href'))
+ print(a.attr.href)
+
+ from pyquery import PyQuery as pq
+ doc = pq(html)
+ a = doc('a')
+ for item in a.items():
+ # 获取属性和文本
+ print(item.attr('href'),item.text())
+
+def getHTML():
+ html = '''
+
+ '''
+ from pyquery import PyQuery as pq
+ doc = pq(html)
+ li = doc('li')
+ print(li.html()) # 第一个节点对应的html second item
+ print(li.text()) # 所有匹配的节点的文本 second item third item fourth item fifth item
+ print(type(li.text()))
+
+# 增加或者删除节点的class
+def operateNode():
+ html = '''
+
+ '''
+ from pyquery import PyQuery as pq
+ doc = pq(html)
+ li = doc('.item-0.active')
+ print(li)
+ li.removeClass('active')
+ print(li)
+ li.addClass('active')
+ print(li)
+
+ '''
+ third item
+
+ third item
+
+ third item
+ '''
+
+
+
+def operateNodeInformation():
+ html = '''
+
+ '''
+ from pyquery import PyQuery as pq
+ doc = pq(html)
+ li = doc('.item-0.active')
+ print(li)
+ li.attr('name', 'link')
+ print(li)
+ li.text('changed item')
+ print(li)
+ li.html('changed item')
+ print(li)
+ '''
+ third item
+ changed item
+ changed item
+ '''
+
+
+def removeInformation():
+ html = '''
+
+ Hello, World
+
This is a paragraph.
+
+ '''
+ from pyquery import PyQuery as pq
+ doc = pq(html)
+ wrap = doc('.wrap')
+ print(wrap.text())
+ '''
+ Hello, World
+ This is a paragraph.
+ '''
+ wrap.find('p').remove()
+ print(wrap.text())
+ '''
+ Hello, World
+ '''
+
+# 伪类选择器
+def fakeCSSSelect():
+ html = '''
+
+ '''
+ from pyquery import PyQuery as pq
+ doc = pq(html)
+ li = doc('li:first-child')
+ print(li)
+ li = doc('li:last-child')
+ print(li)
+ li = doc('li:nth-child(2)')
+ print(li)
+ li = doc('li:gt(2)')
+ print(li)
+ li = doc('li:nth-child(2n)')
+ print(li)
+ li = doc('li:contains(second)')
+ print(li)
+
+
+
+if __name__ == '__main__':
+ fakeCSSSelect()
diff --git a/Spider/Chapter03_网页数据的提取/XPath库/XpathLearning.py b/Spider/Chapter03_网页数据的提取/XPath库/XpathLearning.py
new file mode 100644
index 0000000..fc4c031
--- /dev/null
+++ b/Spider/Chapter03_网页数据的提取/XPath库/XpathLearning.py
@@ -0,0 +1,195 @@
+# -*- encoding:utf-8 -*-
+
+'''
+@Author : dingjiawen
+@Date : 2023/11/8 15:15
+@Usage :
+@Desc :
+'''
+
+from lxml import etree
+
+'''
+XPath基本规则:
+
+ 1) nodename:选择此节点的所有子节点
+ 2) /:从当前节点选取直接子节点
+ 3) //:从当前阶段选择子孙节点
+ 4) .:选取当前节点
+ 5) ..:选取当前节点的父节点
+ 6) @:选取属性
+
+举例:
+//title[@lang='eng]代表选择所有名称为title,同时属性lang的值为eng的节点
+'''
+
+
+def htmlByString():
+ text = '''
+
+ '''
+ html = etree.HTML(text)
+ result = etree.tostring(html)
+ print(result.decode('utf-8'))
+
+
+def htmlByFile():
+ html = etree.parse('./test.html', etree.HTMLParser())
+ result = etree.tostring(html)
+ print(result.decode('utf-8'))
+
+
+def allNode():
+ html = etree.parse('./test.html', etree.HTMLParser())
+ # 从头开始匹配所有的
+ result = html.xpath('//*')
+ print(result)
+ print(result[0])
+
+ # 匹配所有li的
+ result = html.xpath('//li')
+ print(result)
+ print(result[0])
+
+
+# 子节点匹配
+def childNode():
+ html = etree.parse('./test.html', etree.HTMLParser())
+
+ # 匹配所有li的子节点a
+ result = html.xpath('//li/a')
+ print(result)
+ print(result[0])
+
+ # 匹配所有li的子孙节点a 相当于只要是子节点下面的就可以匹配上
+ result = html.xpath('//ul//a')
+ print(result)
+ print(result[0])
+
+
+# 父节点匹配
+def fatherNode():
+ html = etree.parse('./test.html', etree.HTMLParser())
+
+ # 匹配a节点属性href是link4.html的父节点的class属性
+ result = html.xpath('//a[@href="link4.html"]/../@class')
+ print(result)
+ # 也可以通过parent::来获取
+ result = html.xpath('//a[@href="link4.html"]/parent::*/@class')
+ print(result)
+
+
+# 文本获取
+def textGet():
+ html = etree.parse('./test.html', etree.HTMLParser())
+
+ # 匹配li节点属性class是item-0的节点的子节点a的text
+ result = html.xpath('//li[@class="item-0"]/a/text()')
+ print(result) # ['first item', 'fifth item']
+
+ # 匹配li节点属性class是item-0的节点的子孙节点的text
+ result = html.xpath('//li[@class="item-0"]//text()')
+ print(result) # ['first item', 'fifth item', '\r\n ']
+
+
+# 属性获取
+def fieldGet():
+ html = etree.parse('./test.html', etree.HTMLParser())
+
+ # 匹配li节点属性class是item-0的节点的子节点a的href属性
+ result = html.xpath('//li/a/@href')
+ print(result) # ['link1.html', 'link2.html', 'link3.html', 'link4.html', 'link5.html']
+
+
+# 属性多值匹配
+def fieldsGet():
+ text = '''
+ first item
+ '''
+ html = etree.HTML(text)
+ result = html.xpath('//li[@class="li"]/a/text()')
+ print(result) # [] 匹配不到
+
+ result = html.xpath('//li[contains(@class, "li")]/a/text()')
+ print(result) # ['first item'] contains匹配到了
+
+
+# 多属性匹配
+def fieldssGet():
+ text = '''
+ first item
+ '''
+ html = etree.HTML(text)
+ # 多属性用and连接
+ result = html.xpath('//li[contains(@class, "li") and @name="item"]/a/text()')
+ print(result)
+
+
+# 按序选择
+def orderGet():
+ text = '''
+
+ '''
+ html = etree.HTML(text)
+ result = html.xpath('//li[1]/a/text()')
+ print(result) # ['first item']
+ result = html.xpath('//li[last()]/a/text()')
+ print(result) # ['fifth item']
+ result = html.xpath('//li[position()<3]/a/text()')
+ print(result) # ['first item', 'second item']
+ result = html.xpath('//li[last()-2]/a/text()')
+ print(result) # ['third item']
+
+
+def nodeSelect():
+ text = '''
+
+ '''
+ html = etree.HTML(text)
+ result = html.xpath('//li[1]/ancestor::*')
+ print(result)
+ # ancestor获取祖先
+ result = html.xpath('//li[1]/ancestor::div')
+ print(result)
+ # attribute获取所有属性
+ result = html.xpath('//li[1]/attribute::*')
+ print(result)
+ # child获取子节点
+ result = html.xpath('//li[1]/child::a[@href="link1.html"]')
+ print(result)
+ # descendant获取子孙结点
+ result = html.xpath('//li[1]/descendant::span')
+ print(result)
+ # following获取当前节点之后的所有节点
+ result = html.xpath('//li[1]/following::*[2]')
+ print(result)
+ # following-sibling获取当前节点之后的同级节点
+ result = html.xpath('//li[1]/following-sibling::*')
+ print(result)
+
+if __name__ == '__main__':
+ nodeSelect()
diff --git a/Spider/Chapter03_网页数据的提取/XPath库/__init__.py b/Spider/Chapter03_网页数据的提取/XPath库/__init__.py
new file mode 100644
index 0000000..d92f042
--- /dev/null
+++ b/Spider/Chapter03_网页数据的提取/XPath库/__init__.py
@@ -0,0 +1,8 @@
+#-*- encoding:utf-8 -*-
+
+'''
+@Author : dingjiawen
+@Date : 2023/11/8 15:15
+@Usage :
+@Desc :
+'''
\ No newline at end of file
diff --git a/Spider/Chapter03_网页数据的提取/XPath库/test.html b/Spider/Chapter03_网页数据的提取/XPath库/test.html
new file mode 100644
index 0000000..cb77f50
--- /dev/null
+++ b/Spider/Chapter03_网页数据的提取/XPath库/test.html
@@ -0,0 +1,9 @@
+
\ No newline at end of file
diff --git a/Spider/Chapter03_网页数据的提取/__init__.py b/Spider/Chapter03_网页数据的提取/__init__.py
new file mode 100644
index 0000000..2e0ad3a
--- /dev/null
+++ b/Spider/Chapter03_网页数据的提取/__init__.py
@@ -0,0 +1,8 @@
+#-*- encoding:utf-8 -*-
+
+'''
+@Author : dingjiawen
+@Date : 2023/11/8 15:12
+@Usage :
+@Desc :
+'''
\ No newline at end of file
diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/NewtonInsert.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/NewtonInsert.py
new file mode 100644
index 0000000..82c4854
--- /dev/null
+++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/NewtonInsert.py
@@ -0,0 +1,153 @@
+# _*_ coding: UTF-8 _*_
+
+
+'''
+@Author : dingjiawen
+@Date : 2022/7/11 12:55
+@Usage :
+@Desc :
+'''
+
+import numpy as np
+import pandas as pd
+import time
+# 只计算了该程序运行CPU的时间
+import timeit
+
+# cat_sale = pd.read_excel('data/catering_sale.xls')
+path = "G:\data\SCADA数据\jb4q_8.csv"
+cat_sale = pd.read_csv(path)
+# cat_sale.drop('日期', axis=1, inplace=True)
+
+# 过滤异常值,并置为空值
+# cat_sale['销量'][(cat_sale['销量'] < 400) | (cat_sale['销量'] > 5000)] = np.NAN
+# 将0值变成NAN 通过双中括号进行索引任意位置
+# print(df['realtime'][1])
+cat_sale[:][cat_sale[:] == 0] = np.nan # 在索引比较的时候,要转换成同一类型,使用astype
+
+# 分别定义求插商与求w的函数
+'''
+:param x:差值前后的索引值
+:param y:差值前后的数值
+'''
+def cal_f(x, y):
+ """
+ 计算插商
+ """
+ f0 = np.zeros((len(x), len(y))) # 定义一个存储插商的数组
+ for k in range(len(y) + 1): # 遍历列
+ for i in range(k, len(x)): # 遍历行
+ if k == 0:
+ f0[i, k] = y[i]
+ else:
+ f0[i, k] = (f0[i, k - 1] - f0[i - 1, k - 1]) / (x[i] - x[i - 1])
+ # print('差商表', '\n', f0)
+ return f0
+
+
+'''
+:param x:差值前后的索引值
+:param y:差值前后的数值
+:param x_j:需要差值的索引
+'''
+def newton(x, y, x_j):
+ """
+ 牛顿差值多项式
+ """
+ f0 = cal_f(x, y) # 计算插商
+ f0 = f0.diagonal() # 插商对角线
+ # 与w相乘
+ f1 = 0
+ for i in range(len(f0)):
+ s = 1
+ k = 0
+ while k < i:
+ s = s * (x_j - x[k])
+ k += 1
+ f1 = f1 + f0[i] * s
+ return f1
+
+
+# 自定义列向量插值函数,获取需差值的前后几个数
+'''
+:param s:整个差值的序列
+:param n:需要差值的索引
+:param x_j:需要差值的索引
+:param is_fast:是否需要快速差值(无论前后是否是零值均采用);反之则一直找到不为0值的进行计算
+:param k:取前后多少个数
+'''
+def ployinterp_columns(s, n, x_j, is_fast: bool = False, k=3):
+ X = []
+ Y = []
+ if is_fast:
+ # 如果最前面的值不够k个
+ if n < k:
+ a = list(range(0, n)) + list(range(n + 1, n + k + 1))
+ y = s[list(range(0, n)) + list(range(n + 1, n + k + 1))]
+ # 如果最后面的值不够k个
+ elif n > len(s) - k - 1:
+ y = s[list(range(n - k, n)) + list(range(n + 1, len(s)))]
+ # 前后均有k个
+ else:
+ y = s[list(range(n - k, n)) + list(range(n + 1, n + k + 1))] # 取空值处的前后5个数
+ y = y[y.notnull()] # 剔除空值
+ X = y.index
+ Y = list(y)
+ else:
+ # 先取序列前后各k个不为空的值
+ index = n - 1
+ while len(X) < k and index >= 0:
+ if not np.isnan(s[index]):
+ Y.append(s[index])
+ X.append(index)
+ index -= 1
+ index = n + 1
+ X.reverse()
+ Y.reverse()
+
+ while len(X) < 2 * k and index <= len(s):
+ if not np.isnan(s[index]):
+ Y.append(s[index])
+ X.append(index)
+ index += 1
+ # print(X)
+ # print(Y)
+
+ return newton(X, Y, x_j) # 插值并返回插值结果
+
+
+def execute():
+ cat_sale[:][cat_sale[:] == 0] = np.nan # 在索引比较的时候,要转换成同一类型,使用astype
+ for i in cat_sale.columns:
+ temp = cat_sale[i].isnull()
+ if temp[:][temp[:] == True].__len__() > 0:
+ print("{0}列处理前空行数:{1}".format(i, cat_sale[i].isnull().sum()))
+ for j in range(len(cat_sale)):
+ if (cat_sale[i].isnull())[j]:
+ x_j = cat_sale.index[j]
+ cat_sale.loc[j,i] = ployinterp_columns(cat_sale[i], j, x_j)
+ print('第{0}行牛顿插值为{1}'.format(j, cat_sale.loc[j, i]))
+ print("{0}列处理后空行数:{1}".format(i, cat_sale[i].isnull().sum()))
+ print("========================================")
+ print(cat_sale)
+ cat_sale.to_csv("G:\data\SCADA数据\jb4q_8_dealed.csv")
+ # cat_sale.to_excel('saless.xls')
+
+
+def test():
+ cat_sale[:][cat_sale[:] == 0] = np.nan # 在索引比较的时候,要转换成同一类型,使用astype
+ for j in range(len(cat_sale['num_gearbox_sumptemp'])):
+ if (cat_sale['num_gearbox_sumptemp'].isnull())[j]:
+ x_j = cat_sale.index[j]
+ cat_sale.loc[j,'num_gearbox_sumptemp'] = ployinterp_columns(cat_sale['num_gearbox_sumptemp'], j, x_j,is_fast=True)
+ # print('第{0}行牛顿插值为{1}'.format(j, cat_sale.loc[j,'num_gearbox_sumptemp']))
+
+
+if __name__ == '__main__':
+ start = timeit.default_timer()
+ # execute()
+ test()
+ end = timeit.default_timer()
+ print('Running time: %s Seconds' % (end - start))
+ # 返回值是浮点数
+
diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/__init__.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/lagrangeInsert.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/lagrangeInsert.py
new file mode 100644
index 0000000..3591cdf
--- /dev/null
+++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/lagrangeInsert.py
@@ -0,0 +1,96 @@
+# _*_ coding: UTF-8 _*_
+
+
+'''
+@Author : dingjiawen
+@Date : 2022/7/11 11:43
+@Usage :
+@Desc :
+'''
+
+
+import numpy as np
+import pandas as pd
+
+
+# 拉格朗日插值算法
+def LagrangeInterpolation(slices, x, k=5):
+ # slices(series) :the defining points
+ # k :the number of defining points of Lagrange poly 前后各k个值
+ # slices index :the corresponding value on each defining point
+ # x :the point whose value we are interested
+ # print(slices[x])
+ # print(np.isnan(slices[x]))
+ result = 0 # later to save final result
+ X = []
+ Y = []
+ # 先取序列前后各k个不为空的值
+ index = x - 1
+ while len(X) < k and index >= 0:
+ if not np.isnan(slices[index]):
+ Y.append(slices[index])
+ X.append(index)
+ index -= 1
+ index = x + 1
+ X.reverse()
+ Y.reverse()
+
+ while len(X) < 2 * k and index <= len(slices):
+ if not np.isnan(slices[index]):
+ Y.append(slices[index])
+ X.append(index)
+ index += 1
+ # print(X)
+ # print(Y)
+
+ for j in range(len(X)):
+ # result_l 基函数
+ result_l = 1
+ for i in range(len(X)):
+ if i != j:
+ result_l = result_l * (x - X[i]) / (X[j] - X[i])
+ # 取值 slices[j]
+ result = result + slices[j] * result_l
+
+ return result
+
+
+
+
+
+
+if __name__ == '__main__':
+ path = "G:\data\SCADA数据\jb4q_8.csv"
+
+ df = pd.read_csv(path)
+ columns = df.columns
+ print(df.columns)
+
+ # 将0值变成NAN 通过双中括号进行索引任意位置
+ # print(df['realtime'][1])
+ df[:][df[:] == 0] = np.nan # 在索引比较的时候,要转换成同一类型,使用astype
+
+ # TODO 测试单点插值
+ print(df['num_gearbox_sumptemp'].isnull())
+ # print("插值为:", LagrangeInterpolation(df['num_gearbox_sumptemp'], 47, 2))
+
+ # TODO 单列测试插值
+ print("之前的空值数量:", df['num_gearbox_sumptemp'].isnull().sum())
+ for j in range(len(df)):
+ if (df['num_gearbox_sumptemp'].isnull())[j]:
+ s = df['num_gearbox_sumptemp']
+ df.loc[j, 'num_gearbox_sumptemp'] = LagrangeInterpolation(s, j, 5)
+ print("插值之后的空值数量:", df['num_gearbox_sumptemp'].isnull().sum())
+
+ # # TODO 整体处理
+ print("之前的空值数量:", df.isnull().sum())
+ for i in columns:
+ temp = df[i].isnull()
+ if temp[:][temp[:] == True].__len__() > 0:
+ for j in range(len(df)):
+ if (df[i].isnull())[j]:
+ s = df[columns[i]]
+ df.loc[j, i] = LagrangeInterpolation(s, j, 3)
+
+ print("插值之后的空值数量:",df.isnull().sum())
+ df.to_csv("G:\实验室/2022项目中期\数据治理算法\jb4q_8_lagrange.csv")
diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/scada_data_process_for_JBYQ_YSD.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/scada_data_process_for_JBYQ_YSD.py
new file mode 100644
index 0000000..68755dc
--- /dev/null
+++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/scada_data_process_for_JBYQ_YSD.py
@@ -0,0 +1,149 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Sun Jun 7 09:23:31 2020
+
+@author: AlbertHu
+"""
+
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jun 5 21:33:46 2020
+
+@author: AlbertHu
+"""
+
+# -*- coding: utf-8 -*-
+"""
+Created on Fri Jun 5 10:40:27 2020
+
+@author: AlbertHu
+"""
+
+import os
+import time
+import numpy as np
+import pandas as pd
+import datetime
+
+def findallfiles(cmsfilesfatherpath): #返回父目录包括子目录下所有文件的地址
+ cmsfilepaths = []
+ files = os.listdir(cmsfilesfatherpath)
+ for fi in files:
+ fi_d = os.path.join(cmsfilesfatherpath, fi)
+ if os.path.isdir(fi_d):
+ # files.extend(findcmsfiles(fi_d))
+ pass
+ else:
+ cmsfilepaths.append(fi_d)
+ return cmsfilepaths
+def findIndexOfExceptPoint(data):
+ indexList2D = []
+ indexList1 = []
+ indexList2 = []
+ indexList3 = []
+ indexList4 = []
+ indexList5 = []
+ print("开始清洗")
+ for i in data.index:
+ if i % 10000 == 0:
+ print("已处理了{}组数据".format(i))
+ #条件1
+ if data[' 瞬时风速'][i] < 3.5 and data[' 1#叶片变桨角度'][i] > 89:
+ indexList1.append(i)
+ elif data[' 瞬时风速'][i] >= 3.5 and data[' 瞬时风速'][i] <= 10 and data[' 1#叶片变桨角度'][i] > 0.5:
+ indexList1.append(i)
+ elif data[' 瞬时风速'][i] >= 11 and data[' 瞬时风速'][i] <= 25 and (data[' 有功功率'][i] < 1800 and data[' 1#叶片变桨角度'][i] > 1.5):
+ indexList1.append(i)
+ elif data[' 瞬时风速'][i] > 25 and data[' 有功功率'][i] >0:
+ indexList1.append(i)
+ else:
+ pass
+ #条件2
+ if abs(data[' 齿轮箱高速轴前端温度'][i])>200 or abs(data[' 齿轮箱高速轴后端温度'][i])>200 or abs(data[' 齿轮箱冷却水温'][i])>200 or abs(data[' 齿轮箱进口油温'][i])>200 or abs(data[' 齿轮箱油池温度'][i])>200 or abs(data[' 环境温度'][i]>200):
+ indexList2.append(i)
+ else:
+ pass
+ #条件3 #条件6
+ if data[' 齿轮箱高速轴前端温度'][i] > 80 or data[' 齿轮箱高速轴后端温度'][i] > 80 or abs(data[' 齿轮箱高速轴前端温度'][i] - data[' 齿轮箱高速轴后端温度'][i]) > 20:
+ indexList3.append(i)
+ else:
+ pass
+ #条件4
+ if data[' 有功功率'][i] > 100 and data[' 齿轮箱进口压力'][i] <= 0:
+ indexList4.append(i)
+ else:
+ pass
+ #条件5
+ if abs(data[' 齿轮箱进口压力'][i] - data[' 齿轮箱泵出口压力'][i]) > 5:
+ indexList5.append(i)
+ else:
+ pass
+ indexList2D = [indexList1,indexList2,indexList3,indexList4,indexList5]
+ return indexList2D
+# #条件6
+# if data[' 齿轮箱高速轴前端温度'][i] > 80 or data[' 齿轮箱高速轴后端温度'][i]) > 80:
+
+
+
+
+fathpath = r'D:\1.SCADA_风电数据\靖边二期2019_已处理'
+allfilepaths = findallfiles(fathpath)
+testpath = allfilepaths[0]
+#allfilepaths = [r'F:\scada_ewma本地数据2(重要)\data\DataResult(靖边二期2019)\风机7.csv']
+
+#testpath=r'F:\scada_ewma本地数据2(重要)\data\DataResult(粤水电达坂城2020.1月-5月)\风机1.csv'
+for testpath in allfilepaths:
+ data = pd.read_csv(testpath,encoding='gbk',parse_dates = ['时间'])
+ data.columns
+
+ indexList2D = findIndexOfExceptPoint(data)
+
+ savePath = r'./cleanScada/JB2Q615/风机{}'.format(data['风机号'][1])
+ if not os.path.exists(savePath):
+ os.makedirs(savePath)
+ file = open(savePath + '/IndexOfExceptPoint.txt','w')
+ a = 1
+ for List in indexList2D:
+ for i in List:
+ file.write(str(i)+',')
+ try:
+ data.drop([i],inplace=True)
+ except:
+ continue
+ file.write('第{}组\n'.format(a))
+ a += 1
+ file.close()
+
+ data.to_csv(savePath+'.csv',encoding='gbk')
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/baseETL.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/baseETL.py
new file mode 100644
index 0000000..402886e
--- /dev/null
+++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/baseETL.py
@@ -0,0 +1,67 @@
+# _*_ coding: UTF-8 _*_
+
+
+'''
+@Author : dingjiawen
+@Date : 2022/7/7 10:29
+@Usage : 对SCADA数据进行基础的清洗工作
+@Desc :
+'''
+
+import tensorflow as tf
+import pandas as pd
+import numpy as np
+import os
+import time
+from condition_monitoring.lib.IOBase import ioLib
+
+'''
+超参数设置
+'''
+# 需处理文件的父目录
+fatherPath = "G:\data\SCADA数据\华能三塘湖"
+# 处理好文件的父目录
+fatherDealedPath = "G:\data\SCADA数据\华能三塘湖\dealed"
+
+baseUseCols = ["时间", "风机号", "发电机转矩", "发电机无功功率", "发电机转速", "发电机有功功率", "发电机绕组最高温度", "齿轮箱油池温度", "齿轮箱进口油温", "齿轮箱进口压力",
+ "齿轮箱油泵出口压力", "齿轮箱冷却水温度", "有功功率", "60s平均有功功率", "10min平均有功功率", "10s平均有功功率", "10s平均无功功率", "无功功率", "瞬时风速",
+ "机舱温度"]
+
+baseWinds = []
+
+# 列出父目录下所有文件
+def listFile(fatherPath = fatherPath):
+ filepaths = []
+ files = os.listdir(fatherPath)
+ for file in files:
+ fi_d = os.path.join(fatherPath, file)
+ if os.path.isdir(fi_d):
+ pass
+ # files.extend(findcmsfiles(fi_d))
+ else:
+ filepaths.append(fi_d)
+
+ return filepaths
+
+
+def dropNa(filePath):
+ data = pd.read_csv(filePath, low_memory=False, encoding='gbk', usecols=baseUseCols, parse_dates=['时间'])
+ print(data)
+ data.dropna(axis=0, how='any', inplace=True)
+ print(data)
+ data.append()
+ ioLib.saveCSV(data=data, savePath=fatherDealedPath)
+
+
+
+def separateByWindNum(data):
+ indexLists = []
+ windList1 = []
+ windList2 = []
+
+
+
+if __name__ == '__main__':
+ filePath = "G:\data\SCADA数据\华能三塘湖/1华能三塘湖20180730-20180803.csv"
+
+
diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData.py
new file mode 100644
index 0000000..f110cb1
--- /dev/null
+++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData.py
@@ -0,0 +1,228 @@
+import pandas as pd
+import numpy as np
+import tensorflow as tf
+import csv
+import os
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+'''设置数据源文件路径'''
+# source_path = r'G:\data\SCADA数据\jb4q_8.csv'
+source_path = "G:\data\SCADA数据\jb4q_8_delete_total_zero.csv"
+
+'''修改后的数据源存储路径'''
+save_path = r'G:\data\SCADA数据\jb4q_8_delete_total_zero.csv'
+
+'''需要的列'''
+
+
+# baseUseCols = ["num_gearbox_sumptemp","num_gearbox_inletoiltemp","num_gearbox_inletpress","num_gearbox_coolingwatertemp"]
+
+# target_path = r'G:\data\SCADA数据\华能三塘湖/dealed/后十万2018.01.16.csv'
+# target_folder = r'G:\data\SCADA数据\华能三塘湖/dealed'
+
+
+# 生成文件夹
+def folderGenerate(folder_name):
+ if not os.path.exists(folder_name):
+ os.makedirs(folder_name)
+
+
+# 皮尔逊相关系数
+def cal_correlation_coefficient(data, label):
+ print("计算皮尔逊相关系数")
+ print(data)
+ print(data.shape)
+ pd_data = pd.DataFrame(data)
+ person = pd_data.corr()
+ print(person)
+ # 画热点图heatmap
+ # cmap = sns.heatmap(person, annot=True, xticklabels=label, yticklabels=label)
+ # plt.figure(1, figsize=(6.0, 2.68))
+ # plt.subplots_adjust(left=0.1, right=0.94, bottom=0.2, top=0.9, wspace=None,
+ # hspace=None)
+ # plt.tight_layout()
+ # font1 = {'family': 'Times New Roman', 'weight': 'normal', 'size': 10} # 设置坐标标签的字体大小,字体
+ # font2 = {'family': 'Times New Roman', 'weight': 'normal', 'size': 15} # 设置坐标标签的字体大小,字体
+ # plt.xlabel("X", size=10,fontdict=font1)
+ # plt.ylabel("Y", size=10,fontdict=font1)
+ # plt.title("Heatmap of correlation coefficient matrix", size=20,fontdict=font1)
+ #
+ # # 调整色带的标签:
+ # cbar = cmap.collections[0].colorbar
+ # cbar.ax.tick_params(labelsize=15, labelcolor="black")
+ # cbar.ax.set_ylabel(ylabel="color scale", color="red", loc="center",fontdict=font2)
+ #
+ # plt.show()
+ return person
+
+
+def get_most_N_correlation_coefficient(person, N=10):
+ print("获得相关度最高的{}个值".format(N))
+ # total_correlation = person[1:, 1:]
+ abs_correlation = np.abs(person)
+ one = np.ones(shape=abs_correlation.shape)
+ two = np.subtract(one, abs_correlation)
+ rows, cols = two.shape
+ total_sum = []
+ for i in range(cols):
+ # print(two[i])
+ total = np.sum(two[i])
+ total_sum.append(total)
+
+ print("total_sum:", total_sum)
+ # 取最小的N个数,因为是与1减了以后的,越小相关系数越大
+ print("arg:",np.argpartition(total_sum, N))
+ min = np.argpartition(total_sum, N)[:N]
+ max = np.argpartition(total_sum, N)[total_sum.__len__() - N:]
+ print("min:",min)
+ return min
+
+
+# 过滤或者线性填充
+def findIndexOfExceptPoint(data: pd.DataFrame):
+ # indexList2D = []
+ # indexList = []
+ # indexList2 = []
+ # indexList3 = []
+ # indexList4 = []
+ indexList = []
+ print("开始清洗")
+ for i in data.index:
+ if i % 10000 == 0:
+ print("已处理了{}条数据".format(i))
+ ## 删除绝大多数0
+ # if data['num_gearbox_sumptemp'][i] != 0 and (i < 416166 or i > 432766) and (
+ # data['num_gearbox_pumpoutletpress'][i] == 0 or data['num_activepower'][i] == 0 or
+ # data['num_gen_torque'][i] == 0):
+ # indexList.append(i)
+ # 删除全部有0
+ # if (i < 416166 or i > 432766) and (
+ # data['num_gearbox_pumpoutletpress'][i] == 0 or data['num_activepower'][i] == 0 or
+ # data['num_gen_torque'][i] == 0):
+ # indexList.append(i)
+ # 只删除全部0
+ if (i < 416166 or i > 432766) and (
+ data['num_gearbox_sumptemp'][i] == 0 and data['num_gearbox_inletoiltemp'][i] == 0 and
+ data['num_gearbox_inletpress'][i] == 0):
+ indexList.append(i)
+ else:
+ pass
+
+ # indexList2D = [indexList1, indexList2, indexList3, indexList4, indexList5]
+ indexList2D = set(indexList)
+ print("要移除的index:", indexList2D)
+ return indexList2D
+
+
+# 根据index移除异常数据
+def removeDataByIndex(indexList, data):
+ print("开始移除异常index的数据")
+ a = 1
+ data.drop(indexList, inplace=True)
+ # for i in indexList:
+ # try:
+ # data.drop([i], inplace=True)
+ # except:
+ # continue
+ # # print('第{}组\n'.format(a))
+ # # a += 1
+ return data
+
+
+# 处理数据(移除,重新赋值,或者是其他操作)
+def dealData(scada_data: pd.DataFrame):
+ # 是否保存处理好的数据
+ Is_save = True
+ indexList = findIndexOfExceptPoint(scada_data)
+ removeDataByIndex(indexList=indexList, data=scada_data)
+ print("处理后的数据为:")
+ print(scada_data)
+ if Is_save:
+ print("============保存处理好的数据,路径为{}============".format(save_path))
+ scada_data.to_csv(save_path, index=False, encoding='gbk')
+
+ return scada_data
+
+
+# 读取数据,转为numpy数组或者tf数组
+def read_data(file_name, isNew: bool = False):
+ ''' 导入数据 '''
+ with open(file_name, 'r') as f:
+ if isNew:
+ # scada_data = pd.read_csv(f,low_memory=False, encoding='gbk', usecols=baseUseCols, parse_dates=['时间'])
+ scada_data = pd.read_csv(f, low_memory=False, encoding='gbk', parse_dates=['realtime'])
+ print(scada_data)
+ scada_data = dealData(scada_data=scada_data)
+ print(scada_data.head)
+ scada_data = np.array(scada_data)
+ else:
+ scada_data = np.loadtxt(f, str, delimiter=",")
+ label = scada_data[0, 3:]
+ label=list(['Gs','Gio','Gip','Gp','Gwt','En','Gft','Grt','Gwt','Et','Rs','Ap','Ws','Dw','Ges','Gt','Vx','Vy'])
+ print("导入数据成功,将数据转为numpy或tf数组...")
+ needed_data = scada_data[1:, 3:].astype(dtype=np.float)
+ ## needed_data = tf.cast(needed_data, tf.float32) tensor无法转为pd.DataFrame
+ print(needed_data)
+ print("转换成功,并返回...")
+ return needed_data, label
+
+
+def plot_original_data(data):
+ rows, cols = data.shape
+ print("开始画图...")
+
+ for i in range(cols):
+ plt.figure(i)
+ plt.plot(data[:, i])
+ plt.show()
+
+
+def execute(file_name=source_path,N=10):
+ needed_data, label = read_data(file_name=file_name, isNew=False)
+ print(needed_data)
+ print(needed_data.shape)
+ # plot_original_data(needed_data)
+ person = cal_correlation_coefficient(needed_data, label)
+ person = np.array(person)
+ min = get_most_N_correlation_coefficient(person, N=N)
+
+ for index in min:
+ if index == min[0]:
+ total_data = np.expand_dims(needed_data[:, index], axis=-1)
+ else:
+ total_data = np.concatenate([total_data, np.expand_dims(needed_data[:, index], axis=-1)], axis=-1)
+
+ return total_data
+
+
+def deal_data(file_name=source_path):
+ ''' 导入数据 '''
+ with open(file_name, 'r') as f:
+
+ # scada_data = pd.read_csv(f,low_memory=False, encoding='gbk', usecols=baseUseCols, parse_dates=['时间'])
+ scada_data = pd.read_csv(f, low_memory=False, encoding='gbk', parse_dates=['realtime'])
+ print(scada_data)
+ scada_data = dealData(scada_data=scada_data)
+ print(scada_data.head)
+ scada_data = np.array(scada_data)
+
+ scada_data = np.loadtxt(f, str, delimiter=",")
+ label = scada_data[0, 3:]
+ label = list(
+ ['Gs', 'Gio', 'Gip', 'Gp', 'Gwt', 'En', 'Gft', 'Grt', 'Gwt', 'Et', 'Rs', 'Ap', 'Ws', 'Dw', 'Ges', 'Gt',
+ 'Vx', 'Vy'])
+ print("导入数据成功,将数据转为numpy或tf数组...")
+ needed_data = scada_data[1:, 3:].astype(dtype=np.float)
+ ## needed_data = tf.cast(needed_data, tf.float32) tensor无法转为pd.DataFrame
+ print(needed_data)
+ print("转换成功,并返回...")
+ return needed_data, label
+ pass
+
+
+if __name__ == '__main__':
+ total_data = execute(N=10, file_name=source_path)
+ # print(total_data)
+ # print(total_data.shape)
+ # plot_original_data()
diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData_daban.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData_daban.py
new file mode 100644
index 0000000..4a0f33b
--- /dev/null
+++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData_daban.py
@@ -0,0 +1,207 @@
+import pandas as pd
+import numpy as np
+import tensorflow as tf
+import csv
+import os
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+'''设置数据源文件路径'''
+# source_path = r'G:\data\SCADA数据\jb4q_8.csv'
+source_path = "G:\data\SCADA数据\jb4q_8_delete_total_zero.csv"
+
+'''修改后的数据源存储路径'''
+save_path = r'G:\data\SCADA数据\jb4q_8_delete_total_zero.csv'
+
+'''需要的列'''
+
+
+# baseUseCols = ["num_gearbox_sumptemp","num_gearbox_inletoiltemp","num_gearbox_inletpress","num_gearbox_coolingwatertemp"]
+
+# target_path = r'G:\data\SCADA数据\华能三塘湖/dealed/后十万2018.01.16.csv'
+# target_folder = r'G:\data\SCADA数据\华能三塘湖/dealed'
+
+#96748 107116
+
+
+# 生成文件夹
+def folderGenerate(folder_name):
+ if not os.path.exists(folder_name):
+ os.makedirs(folder_name)
+
+
+# 皮尔逊相关系数
+def cal_correlation_coefficient(data, label):
+ print("计算皮尔逊相关系数")
+ pd_data = pd.DataFrame(data)
+ person = pd_data.corr()
+ print(person)
+ # 画热点图heatmap
+ # cmap = sns.heatmap(person, annot=True, xticklabels=label, yticklabels=label)
+ # plt.figure(1, figsize=(6.0, 2.68))
+ # plt.subplots_adjust(left=0.1, right=0.94, bottom=0.2, top=0.9, wspace=None,
+ # hspace=None)
+ # plt.tight_layout()
+ # font1 = {'family': 'Times New Roman', 'weight': 'normal', 'size': 10} # 设置坐标标签的字体大小,字体
+ # font2 = {'family': 'Times New Roman', 'weight': 'normal', 'size': 15} # 设置坐标标签的字体大小,字体
+ # plt.xlabel("X", size=10,fontdict=font1)
+ # plt.ylabel("Y", size=10,fontdict=font1)
+ # plt.title("Heatmap of correlation coefficient matrix", size=20,fontdict=font1)
+ #
+ # # 调整色带的标签:
+ # cbar = cmap.collections[0].colorbar
+ # cbar.ax.tick_params(labelsize=15, labelcolor="black")
+ # cbar.ax.set_ylabel(ylabel="color scale", color="red", loc="center",fontdict=font2)
+ #
+ # plt.show()
+ return person
+
+
+def get_most_N_correlation_coefficient(person, N=10):
+ print("获得相关度最高的{}个值".format(N))
+ # total_correlation = person[1:, 1:]
+ abs_correlation = np.abs(person)
+ one = np.ones(shape=abs_correlation.shape)
+ two = np.subtract(one, abs_correlation)
+ rows, cols = two.shape
+ total_sum = []
+ for i in range(cols):
+ # print(two[i])
+ total = np.sum(two[i])
+ total_sum.append(total)
+
+ print("total_sum:", total_sum)
+ # 取最小的N个数,因为是与1减了以后的,越小相关系数越大
+ print("arg:",np.argpartition(total_sum, N))
+ min = np.argpartition(total_sum, N)[:N]
+ max = np.argpartition(total_sum, N)[total_sum.__len__() - N:]
+ print("min:",min)
+ return min
+
+
+# 过滤或者线性填充
+def findIndexOfExceptPoint(data: pd.DataFrame):
+ # indexList2D = []
+ # indexList = []
+ # indexList2 = []
+ # indexList3 = []
+ # indexList4 = []
+ indexList = []
+ print("开始清洗")
+ for i in data.index:
+ if i % 10000 == 0:
+ print("已处理了{}条数据".format(i))
+ ## 删除绝大多数0
+ # if data['num_gearbox_sumptemp'][i] != 0 and (i < 416166 or i > 432766) and (
+ # data['num_gearbox_pumpoutletpress'][i] == 0 or data['num_activepower'][i] == 0 or
+ # data['num_gen_torque'][i] == 0):
+ # indexList.append(i)
+ # 删除全部有0
+ # if (i < 416166 or i > 432766) and (
+ # data['num_gearbox_pumpoutletpress'][i] == 0 or data['num_activepower'][i] == 0 or
+ # data['num_gen_torque'][i] == 0):
+ # indexList.append(i)
+ # 只删除全部0
+ if (i < 416166 or i > 432766) and (
+ data['num_gearbox_sumptemp'][i] == 0 and data['num_gearbox_inletoiltemp'][i] == 0 and
+ data['num_gearbox_inletpress'][i] == 0):
+ indexList.append(i)
+ else:
+ pass
+
+ # indexList2D = [indexList1, indexList2, indexList3, indexList4, indexList5]
+ indexList2D = set(indexList)
+ print("要移除的index:", indexList2D)
+ return indexList2D
+
+
+# 根据index移除异常数据
+def removeDataByIndex(indexList, data):
+ print("开始移除异常index的数据")
+ a = 1
+ data.drop(indexList, inplace=True)
+ # for i in indexList:
+ # try:
+ # data.drop([i], inplace=True)
+ # except:
+ # continue
+ # # print('第{}组\n'.format(a))
+ # # a += 1
+ return data
+
+
+# 处理数据(移除,重新赋值,或者是其他操作)
+def dealData(scada_data: pd.DataFrame):
+ # 是否保存处理好的数据
+ Is_save = True
+ indexList = findIndexOfExceptPoint(scada_data)
+ removeDataByIndex(indexList=indexList, data=scada_data)
+ print("处理后的数据为:")
+ print(scada_data)
+ if Is_save:
+ print("============保存处理好的数据,路径为{}============".format(save_path))
+ scada_data.to_csv(save_path, index=False, encoding='gbk')
+
+ return scada_data
+
+
+# 读取数据,转为numpy数组或者tf数组
+def read_data(file_name, isNew: bool = False):
+ ''' 导入数据 '''
+ with open(file_name, 'r') as f:
+ if isNew:
+ # scada_data = pd.read_csv(f,low_memory=False, encoding='gbk', usecols=baseUseCols, parse_dates=['时间'])
+ scada_data = pd.read_csv(f, low_memory=False, encoding='gbk', parse_dates=['realtime'])
+ print(scada_data)
+ scada_data = dealData(scada_data=scada_data)
+ print(scada_data.head)
+ scada_data = np.array(scada_data)
+ else:
+ scada_data = np.loadtxt(f, str, delimiter=",")
+ label = scada_data[0, 4:]
+ label=list(['Gs','Gio','Gip','Gp','Gwt','En','Gft','Grt','Gwt','Et','Rs','Ap','Ws','Dw','Ges','Gt','Vx','Vy'])
+ print("导入数据成功,将数据转为numpy或tf数组...")
+ needed_data = scada_data[1:, 4:].astype(dtype=np.float)
+ ## needed_data = tf.cast(needed_data, tf.float32) tensor无法转为pd.DataFrame
+ print(needed_data)
+ print("转换成功,并返回...")
+ return needed_data, label
+
+
+def plot_original_data(data):
+ rows, cols = data.shape
+ print("开始画图...")
+
+ for i in range(cols):
+ plt.figure(i)
+ plt.plot(data[:, i])
+ plt.show()
+
+
+def execute(file_name=source_path,N=10):
+ needed_data, label = read_data(file_name=file_name, isNew=False)
+ print(needed_data)
+ print(needed_data.shape)
+ # plot_original_data(needed_data)
+ person = cal_correlation_coefficient(needed_data, label)
+ person = np.array(person)
+ min = get_most_N_correlation_coefficient(person, N=N)
+
+ for index in min:
+ if index == min[0]:
+ total_data = np.expand_dims(needed_data[:, index], axis=-1)
+ else:
+ total_data = np.concatenate([total_data, np.expand_dims(needed_data[:, index], axis=-1)], axis=-1)
+
+ return total_data
+
+
+if __name__ == '__main__':
+ # total_data = execute(N=10, file_name=source_path)
+ # print(total_data)
+ # print(total_data.shape)7 10 13
+ # 15中间有一段差别很大
+ file_name='H:\data\SCADA数据\SCADA_已处理_粤水电达坂城2020.1月-5月/风机15.csv'
+ needed_data, label = read_data(file_name=file_name, isNew=False)
+ print(needed_data.shape)
+ plot_original_data(needed_data)
diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/plot_raw_data.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/plot_raw_data.py
new file mode 100644
index 0000000..b0258fa
--- /dev/null
+++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/plot_raw_data.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+
+# coding: utf-8
+
+'''
+@Author : dingjiawen
+@Date : 2022/11/2 12:59
+@Usage : 画原始数据
+@Desc :
+'''
+import pandas as pd
+import numpy as np
+
+
+
+
+source_path = "G:\data\SCADA数据\jb4q_8_delete_total_zero.csv"
+
+def deal_data(file_name=source_path):
+ ''' 导入数据 '''
+ with open(file_name, 'r') as f:
+ scada_data = np.loadtxt(f, str, delimiter=",")
+ label = scada_data[0, 3:]
+ label = list(
+ ['Gs', 'Gio', 'Gip', 'Gp', 'Gwt', 'En', 'Gft', 'Grt', 'Gwt', 'Et', 'Rs', 'Ap', 'Ws', 'Dw', 'Ges', 'Gt',
+ 'Vx', 'Vy'])
+ print("导入数据成功,将数据转为numpy或tf数组...")
+ needed_data = scada_data[1:37000, 3:].astype(dtype=np.float)
+ ## needed_data = tf.cast(needed_data, tf.float32) tensor无法转为pd.DataFrame
+ print(needed_data)
+ print("转换成功,并返回...")
+ return needed_data, label
+ pass
+
+
+# 归一化
+def normalization(data):
+ rows, cols = data.shape
+ print("归一化之前:", data)
+ print(data.shape)
+ print("======================")
+
+ # 归一化
+ max = np.max(data, axis=0)
+ max = np.broadcast_to(max, [rows, cols])
+ min = np.min(data, axis=0)
+ min = np.broadcast_to(min, [rows, cols])
+
+ data = (data - min) / (max - min)
+ print("归一化之后:", data)
+ print(data.shape)
+
+ return data
+
+
+if __name__ == '__main__':
+ needed_data, label=deal_data()
+ data=normalization(data=needed_data)
+ np.savetxt('G:\data\SCADA数据/normalization.csv',data,delimiter=',')
+ print(data.shape)
\ No newline at end of file
diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/others_idea/CNN_GRU.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/others_idea/CNN_GRU.py
new file mode 100644
index 0000000..e2bb0f0
--- /dev/null
+++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/others_idea/CNN_GRU.py
@@ -0,0 +1,262 @@
+import tensorflow as tf
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from condition_monitoring.data_deal import loadData
+from keras.callbacks import EarlyStopping
+import os
+import shutil
+
+# 孔师兄idea:CNN+GRU
+
+
+'''超参数设置'''
+time_stamp = 120
+feature_num = 10
+batch_size = 8
+learning_rate = 0.01
+EPOCH = 101
+model_name = "CNN_GRU"
+'''EWMA超参数'''
+K = 18
+namuda = 0.01
+'''保存名称'''
+save_name = "../model/{0}_timestamp{1}_featureNum{2}_batch_size{3}_Epoch{4}.h5".format(model_name,
+ time_stamp, feature_num,
+ batch_size, EPOCH)
+'''文件名'''
+file_name = "G:\data\SCADA数据\jb4q_8_delete_all_zero.csv"
+
+def remove(data, time_stamp=time_stamp):
+ rows, cols = data.shape
+ print("remove_data.shape:", data.shape)
+ num = int(rows / time_stamp)
+
+ return data[:num * time_stamp, :]
+ pass
+
+
+# 不重叠采样
+def get_training_data(data, time_stamp=time_stamp):
+ removed_data = remove(data=data)
+ rows, cols = removed_data.shape
+ # print("removed_data.shape:", data.shape)
+ # print("removed_data:", removed_data)
+ train_data = np.reshape(removed_data, [-1, time_stamp, cols])
+ # print("train_data:", train_data)
+ batchs, time_stamp, cols = train_data.shape
+
+ for i in range(1, batchs):
+ each_label = np.expand_dims(train_data[i, 0, :], axis=0)
+ if i == 1:
+ train_label = each_label
+ else:
+ train_label = np.concatenate([train_label, each_label], axis=0)
+
+ # print("train_data.shape:", train_data.shape)
+ # print("train_label.shape", train_label.shape)
+ return train_data[:-1, :], train_label
+
+
+# 重叠采样
+def get_training_data_overlapping(data,time_stamp=time_stamp):
+
+ rows,cols = data.shape
+ train_data = np.empty(shape=[rows-time_stamp-1,time_stamp,cols])
+ train_label = np.empty(shape=[rows-time_stamp-1,cols])
+ for i in range(rows):
+ if i +time_stamp >= rows:
+ break
+ if i + time_stamp < rows - 1:
+ train_data[i] = data[i:i+time_stamp]
+ train_label[i] = data[i+time_stamp]
+
+ print("重叠采样以后:")
+ print("data:",train_data)
+ print("label:",train_label)
+
+ return train_data,train_label
+
+
+
+def condition_monitoring_model():
+ input = tf.keras.Input(shape=[time_stamp, feature_num])
+ conv1 = tf.keras.layers.Conv1D(filters=256, kernel_size=1)(input)
+ GRU1 = tf.keras.layers.GRU(128, return_sequences=False)(conv1)
+ d1 = tf.keras.layers.Dense(300)(GRU1)
+ output = tf.keras.layers.Dense(10)(d1)
+ model = tf.keras.Model(inputs=input, outputs=output)
+
+ return model
+
+
+# 归一化
+def normalization(data):
+ rows, cols = data.shape
+ print("归一化之前:", data)
+ print(data.shape)
+ print("======================")
+
+ # 归一化
+ max = np.max(data, axis=0)
+ max = np.broadcast_to(max, [rows, cols])
+ min = np.min(data, axis=0)
+ min = np.broadcast_to(min, [rows, cols])
+
+ data = (data - min) / (max - min)
+ print("归一化之后:", data)
+ print(data.shape)
+
+ return data
+
+
+# 正则化
+def Regularization(data):
+ rows, cols = data.shape
+ print("正则化之前:", data)
+ print(data.shape)
+ print("======================")
+
+ # 正则化
+ mean = np.mean(data, axis=0)
+ mean = np.broadcast_to(mean, shape=[rows, cols])
+ dst = np.sqrt(np.var(data, axis=0))
+ dst = np.broadcast_to(dst, shape=[rows, cols])
+ data = (data - mean) / dst
+ print("正则化之后:", data)
+ print(data.shape)
+
+ return data
+ pass
+
+
+def EWMA(data, K=K, namuda=namuda):
+ # t是啥暂时未知
+ t = 0
+ mid = np.mean(data, axis=0)
+ standard = np.sqrt(np.var(data, axis=0))
+ UCL = mid + K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
+ LCL = mid - K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
+ return mid, UCL, LCL
+ pass
+
+
+def get_MSE(data, label, new_model):
+ predicted_data = new_model.predict(data)
+
+ temp = np.abs(predicted_data - label)
+ temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape))
+ temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape)
+ temp3 = temp1/temp2
+ mse = np.sum((temp1 / temp2) ** 2, axis=1)
+ print("z:", mse)
+ print(mse.shape)
+
+ # mse=np.mean((predicted_data-label)**2,axis=1)
+ print("mse", mse)
+
+ dims, = mse.shape
+
+ mean = np.mean(mse)
+ std = np.sqrt(np.var(mse))
+ max = mean + 3 * std
+ # min = mean-3*std
+ max = np.broadcast_to(max, shape=[dims, ])
+ # min = np.broadcast_to(min,shape=[dims,])
+ mean = np.broadcast_to(mean, shape=[dims, ])
+
+ # plt.plot(max)
+ # plt.plot(mse)
+ # plt.plot(mean)
+ # # plt.plot(min)
+ # plt.show()
+ #
+ #
+ return mse,mean,max
+ # pass
+
+
+if __name__ == '__main__':
+ total_data = loadData.execute(N=feature_num,file_name=file_name)
+ total_data = normalization(data=total_data)
+ train_data, train_label = get_training_data_overlapping(total_data[:300455, :])
+
+ ## TODO training
+ # model = condition_monitoring_model()
+ # checkpoint = tf.keras.callbacks.ModelCheckpoint(
+ # filepath=save_name,
+ # monitor='val_loss',
+ # verbose=1,
+ # save_best_only=True,
+ # mode='min',
+ # period=1)
+ # lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001)
+ # early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=30, mode='min', verbose=1)
+ # model.compile(optimizer=tf.optimizers.Adam(learning_rate=learning_rate), loss=tf.losses.mse)
+ # model.summary()
+ # model.fit(train_data, train_label, batch_size=batch_size, epochs=EPOCH, validation_split=0.1,
+ # callbacks=[checkpoint, lr_scheduler, early_stop])
+
+ ## TODO testing
+ print("===============================")
+ print(total_data.shape)
+ print("===============================")
+ test_data, test_label = get_training_data(total_data[:300455, :])
+ newModel = tf.keras.models.load_model(save_name)
+ mse,mean,max = get_MSE(test_data, test_label, new_model=newModel)
+ print("===============================")
+ print("mse:",mse)
+ print(mse.shape)
+ print("===============================")
+
+
+ test_data, test_label = get_training_data(total_data[20000:, :])
+ predicted_data = newModel.predict(test_data)
+ rows, cols = predicted_data.shape
+ print("=====================================")
+ print(predicted_data)
+ print(predicted_data.shape)
+ print("=====================================")
+
+ temp = np.abs(predicted_data - test_label)
+ temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape))
+ temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape)
+ temp3 = temp1 / temp2
+ mse = np.sum((temp1 / temp2) ** 2, axis=1)
+ print("====================")
+ print("new_mse:",mse)
+ print(mse.shape)
+ np.savetxt("mse", mse, delimiter=',')
+ print("===================")
+
+ plt.plot(mse[2000:])
+ plt.plot(mean)
+ plt.plot(max)
+ plt.show()
+
+
+
+
+
+
+ data = pd.DataFrame(mse).ewm(span=3).mean()
+ print(data)
+ data =np.array(data)
+
+ index,_ = data.shape
+
+
+
+ for i in range(2396):
+ if data[i,0] >5:
+ data[i,0] = data[i-1,:]
+ print(data)
+ mean = data[2000:2396,:].mean()
+ std = data[2000:2396,:].std()
+ mean=np.broadcast_to(mean,shape=[500,])
+ std=np.broadcast_to(std,shape=[500,])
+ plt.plot(data[2000:2396,:])
+ plt.plot(mean)
+ plt.plot(mean+3*std)
+ plt.plot(mean-3*std)
+ plt.show()
diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/others_idea/__init__.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/others_idea/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring.py
new file mode 100644
index 0000000..939628a
--- /dev/null
+++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring.py
@@ -0,0 +1,526 @@
+# -*- coding: utf-8 -*-
+
+# coding: utf-8
+import tensorflow as tf
+import tensorflow.keras
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from model.DepthwiseCon1D.DepthwiseConv1D import DepthwiseConv1D
+from model.Dynamic_channelAttention.Dynamic_channelAttention import DynamicChannelAttention
+from condition_monitoring.data_deal import loadData
+from model.Joint_Monitoring.Joint_Monitoring2 import Joint_Monitoring
+
+from model.CommonFunction.CommonFunction import *
+from sklearn.model_selection import train_test_split
+from tensorflow.keras.models import load_model, save_model
+
+'''
+@Author : dingjiawen
+@Date : 2022/7/8 10:29
+@Usage : 尝试将预测和分类两种方式相结合,联合监测
+@Desc :REPVGG+unsampling+GRU进行重构,后面接GDP=全局动态池化+分类器
+随epoch衰减的MSELoss+随epoch增强的crossEntropy
+'''
+
+'''超参数设置'''
+time_stamp = 120
+feature_num = 10
+batch_size = 16
+learning_rate = 0.001
+EPOCH = 101
+model_name = "joint"
+'''EWMA超参数'''
+K = 18
+namuda = 0.01
+'''保存名称'''
+
+save_name = "../model/weight/{0}_timestamp{1}_feature{2}_Epoch{4}_weight/weight".format(model_name,
+ time_stamp,
+ feature_num,
+ batch_size,
+ EPOCH)
+save_step_two_name = "../model/two_weight/{0}_timestamp{1}_feature{2}_weight/weight".format(model_name,
+ time_stamp,
+ feature_num,
+ batch_size,
+ EPOCH)
+
+# save_name = "../model/joint/{0}_timestamp{1}_feature{2}.h5".format(model_name,
+# time_stamp,
+# feature_num,
+# batch_size,
+# EPOCH)
+# save_step_two_name = "../model/joint_two/{0}_timestamp{1}_feature{2}.h5".format(model_name,
+# time_stamp,
+# feature_num,
+# batch_size,
+# EPOCH)
+'''文件名'''
+file_name = "G:\data\SCADA数据\jb4q_8_delete_all_zero.csv"
+
+'''
+文件说明:jb4q_8_delete_all_zero.csv是删除了除异常以外的所有0值的文件
+文件从0:300454行均是正常值(2019/7.30 00:00:00 - 2019/9/18 11:21:00)
+从300455:317052行均是异常值(2019/9/18 11:21:01 - 2019/9/29 23:59:00)
+'''
+'''文件参数'''
+# 最后正常的时间点
+healthy_date = 300454
+# 最后异常的时间点
+unhealthy_date = 317052
+# 异常容忍程度
+unhealthy_patience = 5
+
+
+def remove(data, time_stamp=time_stamp):
+ rows, cols = data.shape
+ print("remove_data.shape:", data.shape)
+ num = int(rows / time_stamp)
+
+ return data[:num * time_stamp, :]
+ pass
+
+
+# 不重叠采样
+def get_training_data(data, time_stamp: int = time_stamp):
+ removed_data = remove(data=data)
+ rows, cols = removed_data.shape
+ print("removed_data.shape:", data.shape)
+ print("removed_data:", removed_data)
+ train_data = np.reshape(removed_data, [-1, time_stamp, cols])
+ print("train_data:", train_data)
+ batchs, time_stamp, cols = train_data.shape
+
+ for i in range(1, batchs):
+ each_label = np.expand_dims(train_data[i, 0, :], axis=0)
+ if i == 1:
+ train_label = each_label
+ else:
+ train_label = np.concatenate([train_label, each_label], axis=0)
+
+ print("train_data.shape:", train_data.shape)
+ print("train_label.shape", train_label.shape)
+ return train_data[:-1, :], train_label
+
+
+# 重叠采样
+def get_training_data_overlapping(data, time_stamp: int = time_stamp, is_Healthy: bool = True):
+ rows, cols = data.shape
+ train_data = np.empty(shape=[rows - time_stamp - 1, time_stamp, cols])
+ train_label = np.empty(shape=[rows - time_stamp - 1, cols])
+ for i in range(rows):
+ if i + time_stamp >= rows:
+ break
+ if i + time_stamp < rows - 1:
+ train_data[i] = data[i:i + time_stamp]
+ train_label[i] = data[i + time_stamp]
+
+ print("重叠采样以后:")
+ print("data:", train_data) # (300334,120,10)
+ print("label:", train_label) # (300334,10)
+
+ if is_Healthy:
+ train_label2 = np.ones(shape=[train_label.shape[0]])
+ else:
+ train_label2 = np.zeros(shape=[train_label.shape[0]])
+
+ print("label2:", train_label2)
+
+ return train_data, train_label, train_label2
+
+
+# RepConv重参数化卷积
+def RepConv(input_tensor, k=3):
+ _, _, output_dim = input_tensor.shape
+ conv1 = tf.keras.layers.Conv1D(filters=output_dim, kernel_size=k, strides=1, padding='SAME')(input_tensor)
+ b1 = tf.keras.layers.BatchNormalization()(conv1)
+
+ conv2 = tf.keras.layers.Conv1D(filters=output_dim, kernel_size=1, strides=1, padding='SAME')(input_tensor)
+ b2 = tf.keras.layers.BatchNormalization()(conv2)
+
+ b3 = tf.keras.layers.BatchNormalization()(input_tensor)
+
+ out = tf.keras.layers.Add()([b1, b2, b3])
+ out = tf.nn.relu(out)
+ return out
+
+
+# RepBlock模块
+def RepBlock(input_tensor, num: int = 3):
+ for i in range(num):
+ input_tensor = RepConv(input_tensor)
+ return input_tensor
+
+
+# GAP 全局平均池化
+def Global_avg_channelAttention(input_tensor):
+ _, length, channel = input_tensor.shape
+ DWC1 = DepthwiseConv1D(kernel_size=1, padding='SAME')(input_tensor)
+ GAP = tf.keras.layers.GlobalAvgPool1D()(DWC1)
+ c1 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GAP)
+ s1 = tf.nn.sigmoid(c1)
+ output = tf.multiply(input_tensor, s1)
+ return output
+
+
+# GDP 全局动态池化
+def Global_Dynamic_channelAttention(input_tensor):
+ _, length, channel = input_tensor.shape
+ DWC1 = DepthwiseConv1D(kernel_size=1, padding='SAME')(input_tensor)
+
+ # GAP
+ GAP = tf.keras.layers.GlobalAvgPool1D()(DWC1)
+ c1 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GAP)
+ s1 = tf.nn.sigmoid(c1)
+
+ # GMP
+ GMP = tf.keras.layers.GlobalMaxPool1D()(DWC1)
+ c2 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GMP)
+ s3 = tf.nn.sigmoid(c2)
+
+ output = tf.multiply(input_tensor, s1)
+ return output
+
+
+# 归一化
+def normalization(data):
+ rows, cols = data.shape
+ print("归一化之前:", data)
+ print(data.shape)
+ print("======================")
+
+ # 归一化
+ max = np.max(data, axis=0)
+ max = np.broadcast_to(max, [rows, cols])
+ min = np.min(data, axis=0)
+ min = np.broadcast_to(min, [rows, cols])
+
+ data = (data - min) / (max - min)
+ print("归一化之后:", data)
+ print(data.shape)
+
+ return data
+
+
+# 正则化
+def Regularization(data):
+ rows, cols = data.shape
+ print("正则化之前:", data)
+ print(data.shape)
+ print("======================")
+
+ # 正则化
+ mean = np.mean(data, axis=0)
+ mean = np.broadcast_to(mean, shape=[rows, cols])
+ dst = np.sqrt(np.var(data, axis=0))
+ dst = np.broadcast_to(dst, shape=[rows, cols])
+ data = (data - mean) / dst
+ print("正则化之后:", data)
+ print(data.shape)
+
+ return data
+ pass
+
+
+def EWMA(data, K=K, namuda=namuda):
+ # t是啥暂时未知
+ t = 0
+ mid = np.mean(data, axis=0)
+ standard = np.sqrt(np.var(data, axis=0))
+ UCL = mid + K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
+ LCL = mid - K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
+ return mid, UCL, LCL
+ pass
+
+
+def get_MSE(data, label, new_model):
+ predicted_data = new_model.predict(data)
+
+ temp = np.abs(predicted_data - label)
+ temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape))
+ temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape)
+ temp3 = temp1 / temp2
+ mse = np.sum((temp1 / temp2) ** 2, axis=1)
+ print("z:", mse)
+ print(mse.shape)
+
+ # mse=np.mean((predicted_data-label)**2,axis=1)
+ print("mse", mse)
+
+ dims, = mse.shape
+
+ mean = np.mean(mse)
+ std = np.sqrt(np.var(mse))
+ max = mean + 3 * std
+ # min = mean-3*std
+ max = np.broadcast_to(max, shape=[dims, ])
+ # min = np.broadcast_to(min,shape=[dims,])
+ mean = np.broadcast_to(mean, shape=[dims, ])
+
+ # plt.plot(max)
+ # plt.plot(mse)
+ # plt.plot(mean)
+ # # plt.plot(min)
+ # plt.show()
+ #
+ #
+ return mse, mean, max
+ # pass
+
+
+def condition_monitoring_model():
+ input = tf.keras.Input(shape=[time_stamp, feature_num])
+ conv1 = tf.keras.layers.Conv1D(filters=256, kernel_size=1)(input)
+ GRU1 = tf.keras.layers.GRU(128, return_sequences=False)(conv1)
+ d1 = tf.keras.layers.Dense(300)(GRU1)
+ output = tf.keras.layers.Dense(10)(d1)
+
+ model = tf.keras.Model(inputs=input, outputs=output)
+
+ return model
+
+
+# trian_data:(300455,120,10)
+# trian_label1:(300455,10)
+# trian_label2:(300455,)
+def shuffle(train_data, train_label1, train_label2, is_split: bool = False, split_size: float = 0.2):
+ (train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(train_data,
+ train_label1,
+ train_label2,
+ test_size=split_size,
+ shuffle=True,
+ random_state=100)
+ if is_split:
+ return train_data, train_label1, train_label2, test_data, test_label1, test_label2
+ train_data = np.concatenate([train_data, test_data], axis=0)
+ train_label1 = np.concatenate([train_label1, test_label1], axis=0)
+ train_label2 = np.concatenate([train_label2, test_label2], axis=0)
+ # print(train_data.shape)
+ # print(train_label1.shape)
+ # print(train_label2.shape)
+ # print(train_data.shape)
+
+ return train_data, train_label1, train_label2
+ pass
+
+
+def split_test_data(healthy_data, healthy_label1, healthy_label2, unhealthy_data, unhealthy_label1, unhealthy_label2,
+ split_size: float = 0.2):
+ data = np.concatenate([healthy_data, unhealthy_data], axis=0)
+ label1 = np.concatenate([healthy_label1, unhealthy_label1], axis=0)
+ label2 = np.concatenate([healthy_label2, unhealthy_label2], axis=0)
+ (train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(data,
+ label1,
+ label2,
+ test_size=split_size,
+ shuffle=True,
+ random_state=100)
+
+ # print(train_data.shape)
+ # print(train_label1.shape)
+ # print(train_label2.shape)
+ # print(train_data.shape)
+
+ return train_data, train_label1, train_label2, test_data, test_label1, test_label2
+
+ pass
+
+
+# trian_data:(300455,120,10)
+# trian_label1:(300455,10)
+# trian_label2:(300455,)
+def train_step_one(train_data, train_label1, train_label2):
+ model = Joint_Monitoring()
+ # # # # TODO 需要运行编译一次,才能打印model.summary()
+ # model.build(input_shape=(batch_size, filter_num, dims))
+ # model.summary()
+ history_loss = []
+ history_val_loss = []
+ learning_rate = 1e-3
+ for epoch in range(EPOCH):
+
+ print()
+ print("EPOCH:", epoch, "/", EPOCH, ":")
+ train_data, train_label1, train_label2 = shuffle(train_data, train_label1, train_label2)
+ if epoch == 0:
+ train_data, train_label1, train_label2, val_data, val_label1, val_label2 = shuffle(train_data, train_label1,
+ train_label2,
+ is_split=True)
+ # print()
+ # print("EPOCH:", epoch, "/", EPOCH, ":")
+ # 用于让train知道,这是这个epoch中的第几次训练
+ z = 0
+ # 用于batch_size次再训练
+ k = 1
+ for data_1, label_1, label_2 in zip(train_data, train_label1, train_label2):
+ size, _, _ = train_data.shape
+ data_1 = tf.expand_dims(data_1, axis=0)
+ label_1 = tf.expand_dims(label_1, axis=0)
+ label_2 = tf.expand_dims(label_2, axis=0)
+ if batch_size != 1:
+ if k % batch_size == 1:
+ data = data_1
+ label1 = label_1
+ label2 = label_2
+ else:
+ data = tf.concat([data, data_1], axis=0)
+ label1 = tf.concat([label1, label_1], axis=0)
+ label2 = tf.concat([label2, label_2], axis=0)
+ else:
+ data = data_1
+ label1 = label_1
+ label2 = label_2
+
+ if k % batch_size == 0:
+ # label = tf.expand_dims(label, axis=-1)
+ loss_value = model.train(input_tensor=data, label1=label1, label2=label2, learning_rate=learning_rate,
+ is_first_time=True)
+ print(z * batch_size, "/", size, ":===============>", "loss:", loss_value.numpy())
+ k = 0
+ z = z + 1
+ k = k + 1
+ val_loss = model.get_val_loss(val_data=val_data, val_label1=val_label1, val_label2=val_label2,
+ is_first_time=True)
+ SaveBestModel(model=model, save_name=save_name, history_loss=history_val_loss, loss_value=val_loss.numpy())
+ # SaveBestH5Model(model=model, save_name=save_name, history_loss=history_val_loss, loss_value=val_loss.numpy())
+ history_val_loss.append(val_loss)
+ history_loss.append(loss_value.numpy())
+ print('Training loss is :', loss_value.numpy())
+ print('Validating loss is :', val_loss.numpy())
+ if IsStopTraining(history_loss=history_val_loss, patience=7):
+ break
+ if Is_Reduce_learning_rate(history_loss=history_val_loss, patience=3):
+ if learning_rate >= 1e-4:
+ learning_rate = learning_rate * 0.1
+ pass
+
+
+def train_step_two(step_one_model, step_two_model, train_data, train_label1, train_label2):
+ # step_two_model = Joint_Monitoring()
+ # step_two_model.build(input_shape=(batch_size, time_stamp, feature_num))
+ # step_two_model.summary()
+ history_loss = []
+ history_val_loss = []
+ history_accuracy = []
+ learning_rate = 1e-3
+ for epoch in range(EPOCH):
+ print()
+ print("EPOCH:", epoch, "/", EPOCH, ":")
+ train_data, train_label1, train_label2 = shuffle(train_data, train_label1, train_label2)
+ if epoch == 0:
+ train_data, train_label1, train_label2, val_data, val_label1, val_label2 = shuffle(train_data, train_label1,
+ train_label2,
+ is_split=True)
+ # print()
+ # print("EPOCH:", epoch, "/", EPOCH, ":")
+ # 用于让train知道,这是这个epoch中的第几次训练
+ z = 0
+ # 用于batch_size次再训练
+ k = 1
+ accuracy_num = 0
+ for data_1, label_1, label_2 in zip(train_data, train_label1, train_label2):
+ size, _, _ = train_data.shape
+ data_1 = tf.expand_dims(data_1, axis=0)
+ label_1 = tf.expand_dims(label_1, axis=0)
+ label_2 = tf.expand_dims(label_2, axis=0)
+ if batch_size != 1:
+ if k % batch_size == 1:
+ data = data_1
+ label1 = label_1
+ label2 = label_2
+ else:
+ data = tf.concat([data, data_1], axis=0)
+ label1 = tf.concat([label1, label_1], axis=0)
+ label2 = tf.concat([label2, label_2], axis=0)
+ else:
+ data = data_1
+ label1 = label_1
+ label2 = label_2
+
+ if k % batch_size == 0:
+ # label = tf.expand_dims(label, axis=-1)
+ output1, output2, output3, _ = step_one_model.call(inputs=data, is_first_time=True)
+ loss_value, accuracy_value = step_two_model.train(input_tensor=data, label1=label1, label2=label2,
+ learning_rate=learning_rate,
+ is_first_time=False, pred_3=output1, pred_4=output2,
+ pred_5=output3)
+ accuracy_num += accuracy_value
+ print(z * batch_size, "/", size, ":===============>", "loss:", loss_value.numpy(), "| accuracy:",
+ accuracy_num / ((z + 1) * batch_size))
+ k = 0
+ z = z + 1
+ k = k + 1
+
+ val_loss, val_accuracy = step_two_model.get_val_loss(val_data=val_data, val_label1=val_label1,
+ val_label2=val_label2,
+ is_first_time=False, step_one_model=step_one_model)
+ SaveBestModelByAccuracy(model=step_two_model, save_name=save_step_two_name, history_accuracy=history_accuracy,
+ accuracy_value=val_accuracy)
+ history_val_loss.append(val_loss)
+ history_loss.append(loss_value.numpy())
+ print('Training loss is : {0} | Training accuracy is : {1}'.format(loss_value.numpy(),
+ accuracy_num / ((z + 1) * batch_size)))
+ print('Validating loss is : {0} | Validating accuracy is : {1}'.format(val_loss.numpy(), val_accuracy))
+ if IsStopTraining(history_loss=history_val_loss, patience=7):
+ break
+ if Is_Reduce_learning_rate(history_loss=history_val_loss, patience=3):
+ if learning_rate >= 1e-4:
+ learning_rate = learning_rate * 0.1
+ pass
+
+
+def test(step_one_model, step_two_model, test_data, test_label1, test_label2):
+ history_loss = []
+ history_val_loss = []
+
+ val_loss, val_accuracy = step_two_model.get_val_loss(val_data=test_data, val_label1=test_label1,
+ val_label2=test_label2,
+ is_first_time=False, step_one_model=step_one_model)
+
+ history_val_loss.append(val_loss)
+ print("val_accuracy:", val_accuracy)
+ print("val_loss:", val_loss)
+
+
+if __name__ == '__main__':
+ total_data = loadData.execute(N=feature_num, file_name=file_name)
+ total_data = normalization(data=total_data)
+ train_data_healthy, train_label1_healthy, train_label2_healthy = get_training_data_overlapping(
+ total_data[:healthy_date, :], is_Healthy=True)
+ train_data_unhealthy, train_label1_unhealthy, train_label2_unhealthy = get_training_data_overlapping(
+ total_data[healthy_date - time_stamp + unhealthy_patience:unhealthy_date, :],
+ is_Healthy=False)
+ # TODO 第一步训练
+ # 单次测试
+ # train_step_one(train_data=train_data_healthy[:32, :, :], train_label1=train_label1_healthy[:32, :],train_label2=train_label2_healthy[:32, ])
+ # train_step_one(train_data=train_data_healthy, train_label1=train_label1_healthy,train_label2=train_label2_healthy)
+
+ # 导入第一步已经训练好的模型,一个继续训练,一个只输出结果
+ step_one_model = Joint_Monitoring()
+ step_one_model.load_weights(save_name)
+ #
+ # step_two_model = Joint_Monitoring()
+ # step_two_model.load_weights(save_name)
+
+ # TODO 第二步训练
+ ### healthy_data.shape: (300333,120,10)
+ ### unhealthy_data.shape: (16594,10)
+ healthy_size, _, _ = train_data_healthy.shape
+ unhealthy_size, _, _ = train_data_unhealthy.shape
+ train_data, train_label1, train_label2, test_data, test_label1, test_label2 = split_test_data(
+ healthy_data=train_data_healthy[healthy_size - 2 * unhealthy_size:, :, :],
+ healthy_label1=train_label1_healthy[healthy_size - 2 * unhealthy_size:, :],
+ healthy_label2=train_label2_healthy[healthy_size - 2 * unhealthy_size:, ], unhealthy_data=train_data_unhealthy,
+ unhealthy_label1=train_label1_unhealthy, unhealthy_label2=train_label2_unhealthy)
+ # train_step_two(step_one_model=step_one_model, step_two_model=step_two_model,
+ # train_data=train_data,
+ # train_label1=train_label1, train_label2=np.expand_dims(train_label2, axis=-1))
+
+ # TODO 测试测试集
+ step_two_model = Joint_Monitoring()
+ step_two_model.load_weights(save_step_two_name)
+ test(step_one_model=step_one_model, step_two_model=step_two_model, test_data=test_data, test_label1=test_label1,
+ test_label2=np.expand_dims(test_label2, axis=-1))
+
+ pass
diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring_hard.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring_hard.py
new file mode 100644
index 0000000..7ab3cf3
--- /dev/null
+++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring_hard.py
@@ -0,0 +1,576 @@
+# -*- coding: utf-8 -*-
+
+# coding: utf-8
+import tensorflow as tf
+import tensorflow.keras
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from model.DepthwiseCon1D.DepthwiseConv1D import DepthwiseConv1D
+from model.Dynamic_channelAttention.Dynamic_channelAttention import DynamicChannelAttention
+from condition_monitoring.data_deal import loadData
+from model.Joint_Monitoring.Joint_Monitoring3 import Joint_Monitoring
+
+from model.CommonFunction.CommonFunction import *
+from sklearn.model_selection import train_test_split
+from tensorflow.keras.models import load_model, save_model
+
+'''
+@Author : dingjiawen
+@Date : 2022/7/8 10:29
+@Usage : 尝试将预测和分类两种方式相结合,联合监测
+@Desc :REPVGG+unsampling+GRU进行重构,后面接GDP=全局动态池化+分类器
+随epoch衰减的MSELoss+随epoch增强的crossEntropy
+'''
+
+'''超参数设置'''
+time_stamp = 120
+feature_num = 10
+batch_size = 16
+learning_rate = 0.001
+EPOCH = 101
+model_name = "joint"
+'''EWMA超参数'''
+K = 18
+namuda = 0.01
+'''保存名称'''
+
+save_name = "../hard_model/weight/{0}_timestamp{1}_feature{2}_weight_epoch8/weight".format(model_name,
+ time_stamp,
+ feature_num,
+ batch_size,
+ EPOCH)
+save_step_two_name = "../hard_model/two_weight/{0}_timestamp{1}_feature{2}_weight_epoch14/weight".format(model_name,
+ time_stamp,
+ feature_num,
+ batch_size,
+ EPOCH)
+
+# save_name = "../model/joint/{0}_timestamp{1}_feature{2}.h5".format(model_name,
+# time_stamp,
+# feature_num,
+# batch_size,
+# EPOCH)
+# save_step_two_name = "../model/joint_two/{0}_timestamp{1}_feature{2}.h5".format(model_name,
+# time_stamp,
+# feature_num,
+# batch_size,
+# EPOCH)
+'''文件名'''
+file_name = "G:\data\SCADA数据\jb4q_8_delete_total_zero.csv"
+
+'''
+文件说明:jb4q_8_delete_total_zero.csv是删除了只删除了全是0的列的文件
+文件从0:415548行均是正常值(2019/7.30 00:00:00 - 2019/9/18 11:14:00)
+从415549:432153行均是异常值(2019/9/18 11:21:01 - 2021/1/18 00:00:00)
+'''
+'''文件参数'''
+# 最后正常的时间点
+healthy_date = 415548
+# 最后异常的时间点
+unhealthy_date = 432153
+# 异常容忍程度
+unhealthy_patience = 5
+
+
+def remove(data, time_stamp=time_stamp):
+ rows, cols = data.shape
+ print("remove_data.shape:", data.shape)
+ num = int(rows / time_stamp)
+
+ return data[:num * time_stamp, :]
+ pass
+
+
+# 不重叠采样
+def get_training_data(data, time_stamp: int = time_stamp):
+ removed_data = remove(data=data)
+ rows, cols = removed_data.shape
+ print("removed_data.shape:", data.shape)
+ print("removed_data:", removed_data)
+ train_data = np.reshape(removed_data, [-1, time_stamp, cols])
+ print("train_data:", train_data)
+ batchs, time_stamp, cols = train_data.shape
+
+ for i in range(1, batchs):
+ each_label = np.expand_dims(train_data[i, 0, :], axis=0)
+ if i == 1:
+ train_label = each_label
+ else:
+ train_label = np.concatenate([train_label, each_label], axis=0)
+
+ print("train_data.shape:", train_data.shape)
+ print("train_label.shape", train_label.shape)
+ return train_data[:-1, :], train_label
+
+
+# 重叠采样
+def get_training_data_overlapping(data, time_stamp: int = time_stamp, is_Healthy: bool = True):
+ rows, cols = data.shape
+ train_data = np.empty(shape=[rows - time_stamp - 1, time_stamp, cols])
+ train_label = np.empty(shape=[rows - time_stamp - 1, cols])
+ for i in range(rows):
+ if i + time_stamp >= rows:
+ break
+ if i + time_stamp < rows - 1:
+ train_data[i] = data[i:i + time_stamp]
+ train_label[i] = data[i + time_stamp]
+
+ print("重叠采样以后:")
+ print("data:", train_data) # (300334,120,10)
+ print("label:", train_label) # (300334,10)
+
+ if is_Healthy:
+ train_label2 = np.ones(shape=[train_label.shape[0]])
+ else:
+ train_label2 = np.zeros(shape=[train_label.shape[0]])
+
+ print("label2:", train_label2)
+
+ return train_data, train_label, train_label2
+
+
+# RepConv重参数化卷积
+def RepConv(input_tensor, k=3):
+ _, _, output_dim = input_tensor.shape
+ conv1 = tf.keras.layers.Conv1D(filters=output_dim, kernel_size=k, strides=1, padding='SAME')(input_tensor)
+ b1 = tf.keras.layers.BatchNormalization()(conv1)
+
+ conv2 = tf.keras.layers.Conv1D(filters=output_dim, kernel_size=1, strides=1, padding='SAME')(input_tensor)
+ b2 = tf.keras.layers.BatchNormalization()(conv2)
+
+ b3 = tf.keras.layers.BatchNormalization()(input_tensor)
+
+ out = tf.keras.layers.Add()([b1, b2, b3])
+ out = tf.nn.relu(out)
+ return out
+
+
+# RepBlock模块
+def RepBlock(input_tensor, num: int = 3):
+ for i in range(num):
+ input_tensor = RepConv(input_tensor)
+ return input_tensor
+
+
+# GAP 全局平均池化
+def Global_avg_channelAttention(input_tensor):
+ _, length, channel = input_tensor.shape
+ DWC1 = DepthwiseConv1D(kernel_size=1, padding='SAME')(input_tensor)
+ GAP = tf.keras.layers.GlobalAvgPool1D()(DWC1)
+ c1 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GAP)
+ s1 = tf.nn.sigmoid(c1)
+ output = tf.multiply(input_tensor, s1)
+ return output
+
+
+# GDP 全局动态池化
+def Global_Dynamic_channelAttention(input_tensor):
+ _, length, channel = input_tensor.shape
+ DWC1 = DepthwiseConv1D(kernel_size=1, padding='SAME')(input_tensor)
+
+ # GAP
+ GAP = tf.keras.layers.GlobalAvgPool1D()(DWC1)
+ c1 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GAP)
+ s1 = tf.nn.sigmoid(c1)
+
+ # GMP
+ GMP = tf.keras.layers.GlobalMaxPool1D()(DWC1)
+ c2 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GMP)
+ s3 = tf.nn.sigmoid(c2)
+
+ output = tf.multiply(input_tensor, s1)
+ return output
+
+
+# 归一化
+def normalization(data):
+ rows, cols = data.shape
+ print("归一化之前:", data)
+ print(data.shape)
+ print("======================")
+
+ # 归一化
+ max = np.max(data, axis=0)
+ max = np.broadcast_to(max, [rows, cols])
+ min = np.min(data, axis=0)
+ min = np.broadcast_to(min, [rows, cols])
+
+ data = (data - min) / (max - min)
+ print("归一化之后:", data)
+ print(data.shape)
+
+ return data
+
+
+# 正则化
+def Regularization(data):
+ rows, cols = data.shape
+ print("正则化之前:", data)
+ print(data.shape)
+ print("======================")
+
+ # 正则化
+ mean = np.mean(data, axis=0)
+ mean = np.broadcast_to(mean, shape=[rows, cols])
+ dst = np.sqrt(np.var(data, axis=0))
+ dst = np.broadcast_to(dst, shape=[rows, cols])
+ data = (data - mean) / dst
+ print("正则化之后:", data)
+ print(data.shape)
+
+ return data
+ pass
+
+
+def EWMA(data, K=K, namuda=namuda):
+ # t是啥暂时未知
+ t = 0
+ mid = np.mean(data, axis=0)
+ standard = np.sqrt(np.var(data, axis=0))
+ UCL = mid + K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
+ LCL = mid - K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
+ return mid, UCL, LCL
+ pass
+
+
+def get_MSE(data, label, new_model):
+ predicted_data = new_model.predict(data)
+
+ temp = np.abs(predicted_data - label)
+ temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape))
+ temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape)
+ temp3 = temp1 / temp2
+ mse = np.sum((temp1 / temp2) ** 2, axis=1)
+ print("z:", mse)
+ print(mse.shape)
+
+ # mse=np.mean((predicted_data-label)**2,axis=1)
+ print("mse", mse)
+
+ dims, = mse.shape
+
+ mean = np.mean(mse)
+ std = np.sqrt(np.var(mse))
+ max = mean + 3 * std
+ # min = mean-3*std
+ max = np.broadcast_to(max, shape=[dims, ])
+ # min = np.broadcast_to(min,shape=[dims,])
+ mean = np.broadcast_to(mean, shape=[dims, ])
+
+ # plt.plot(max)
+ # plt.plot(mse)
+ # plt.plot(mean)
+ # # plt.plot(min)
+ # plt.show()
+ #
+ #
+ return mse, mean, max
+ # pass
+
+
+def condition_monitoring_model():
+ input = tf.keras.Input(shape=[time_stamp, feature_num])
+ conv1 = tf.keras.layers.Conv1D(filters=256, kernel_size=1)(input)
+ GRU1 = tf.keras.layers.GRU(128, return_sequences=False)(conv1)
+ d1 = tf.keras.layers.Dense(300)(GRU1)
+ output = tf.keras.layers.Dense(10)(d1)
+
+ model = tf.keras.Model(inputs=input, outputs=output)
+
+ return model
+
+
+# trian_data:(300455,120,10)
+# trian_label1:(300455,10)
+# trian_label2:(300455,)
+def shuffle(train_data, train_label1, train_label2, is_split: bool = False, split_size: float = 0.2):
+ (train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(train_data,
+ train_label1,
+ train_label2,
+ test_size=split_size,
+ shuffle=True,
+ random_state=100)
+ if is_split:
+ return train_data, train_label1, train_label2, test_data, test_label1, test_label2
+ train_data = np.concatenate([train_data, test_data], axis=0)
+ train_label1 = np.concatenate([train_label1, test_label1], axis=0)
+ train_label2 = np.concatenate([train_label2, test_label2], axis=0)
+ # print(train_data.shape)
+ # print(train_label1.shape)
+ # print(train_label2.shape)
+ # print(train_data.shape)
+
+ return train_data, train_label1, train_label2
+ pass
+
+
+def split_test_data(healthy_data, healthy_label1, healthy_label2, unhealthy_data, unhealthy_label1, unhealthy_label2,
+ split_size: float = 0.2, shuffle: bool = True):
+ data = np.concatenate([healthy_data, unhealthy_data], axis=0)
+ label1 = np.concatenate([healthy_label1, unhealthy_label1], axis=0)
+ label2 = np.concatenate([healthy_label2, unhealthy_label2], axis=0)
+ (train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(data,
+ label1,
+ label2,
+ test_size=split_size,
+ shuffle=shuffle,
+ random_state=100)
+
+ # print(train_data.shape)
+ # print(train_label1.shape)
+ # print(train_label2.shape)
+ # print(train_data.shape)
+
+ return train_data, train_label1, train_label2, test_data, test_label1, test_label2
+
+ pass
+
+
+# trian_data:(300455,120,10)
+# trian_label1:(300455,10)
+# trian_label2:(300455,)
+def train_step_one(train_data, train_label1, train_label2):
+ model = Joint_Monitoring()
+ # # # # TODO 需要运行编译一次,才能打印model.summary()
+ # model.build(input_shape=(batch_size, filter_num, dims))
+ # model.summary()
+ history_loss = []
+ history_val_loss = []
+ learning_rate = 1e-3
+ for epoch in range(EPOCH):
+
+ print()
+ print("EPOCH:", epoch, "/", EPOCH, ":")
+ train_data, train_label1, train_label2 = shuffle(train_data, train_label1, train_label2)
+ if epoch == 0:
+ train_data, train_label1, train_label2, val_data, val_label1, val_label2 = shuffle(train_data, train_label1,
+ train_label2,
+ is_split=True)
+ # print()
+ # print("EPOCH:", epoch, "/", EPOCH, ":")
+ # 用于让train知道,这是这个epoch中的第几次训练
+ z = 0
+ # 用于batch_size次再训练
+ k = 1
+ for data_1, label_1, label_2 in zip(train_data, train_label1, train_label2):
+ size, _, _ = train_data.shape
+ data_1 = tf.expand_dims(data_1, axis=0)
+ label_1 = tf.expand_dims(label_1, axis=0)
+ label_2 = tf.expand_dims(label_2, axis=0)
+ if batch_size != 1:
+ if k % batch_size == 1:
+ data = data_1
+ label1 = label_1
+ label2 = label_2
+ else:
+ data = tf.concat([data, data_1], axis=0)
+ label1 = tf.concat([label1, label_1], axis=0)
+ label2 = tf.concat([label2, label_2], axis=0)
+ else:
+ data = data_1
+ label1 = label_1
+ label2 = label_2
+
+ if k % batch_size == 0:
+ # label = tf.expand_dims(label, axis=-1)
+ loss_value, accuracy_value = model.train(input_tensor=data, label1=label1, label2=label2,
+ learning_rate=learning_rate,
+ is_first_time=True)
+ print(z * batch_size, "/", size, ":===============>", "loss:", loss_value.numpy())
+ k = 0
+ z = z + 1
+ k = k + 1
+ val_loss, val_accuracy = model.get_val_loss(val_data=val_data, val_label1=val_label1, val_label2=val_label2,
+ is_first_time=True)
+ SaveBestModel(model=model, save_name=save_name, history_loss=history_val_loss, loss_value=val_loss.numpy())
+ # SaveBestH5Model(model=model, save_name=save_name, history_loss=history_val_loss, loss_value=val_loss.numpy())
+ history_val_loss.append(val_loss)
+ history_loss.append(loss_value.numpy())
+ print('Training loss is :', loss_value.numpy())
+ print('Validating loss is :', val_loss.numpy())
+ if IsStopTraining(history_loss=history_val_loss, patience=7):
+ break
+ if Is_Reduce_learning_rate(history_loss=history_val_loss, patience=3):
+ if learning_rate >= 1e-4:
+ learning_rate = learning_rate * 0.1
+ pass
+
+
+def train_step_two(step_one_model, step_two_model, train_data, train_label1, train_label2):
+ # step_two_model = Joint_Monitoring()
+ # step_two_model.build(input_shape=(batch_size, time_stamp, feature_num))
+ # step_two_model.summary()
+ history_loss = []
+ history_val_loss = []
+ history_accuracy = []
+ learning_rate = 1e-3
+ for epoch in range(EPOCH):
+ print()
+ print("EPOCH:", epoch, "/", EPOCH, ":")
+ train_data, train_label1, train_label2 = shuffle(train_data, train_label1, train_label2)
+ if epoch == 0:
+ train_data, train_label1, train_label2, val_data, val_label1, val_label2 = shuffle(train_data, train_label1,
+ train_label2,
+ is_split=True)
+ # print()
+ # print("EPOCH:", epoch, "/", EPOCH, ":")
+ # 用于让train知道,这是这个epoch中的第几次训练
+ z = 0
+ # 用于batch_size次再训练
+ k = 1
+ accuracy_num = 0
+ for data_1, label_1, label_2 in zip(train_data, train_label1, train_label2):
+ size, _, _ = train_data.shape
+ data_1 = tf.expand_dims(data_1, axis=0)
+ label_1 = tf.expand_dims(label_1, axis=0)
+ label_2 = tf.expand_dims(label_2, axis=0)
+ if batch_size != 1:
+ if k % batch_size == 1:
+ data = data_1
+ label1 = label_1
+ label2 = label_2
+ else:
+ data = tf.concat([data, data_1], axis=0)
+ label1 = tf.concat([label1, label_1], axis=0)
+ label2 = tf.concat([label2, label_2], axis=0)
+ else:
+ data = data_1
+ label1 = label_1
+ label2 = label_2
+
+ if k % batch_size == 0:
+ # label = tf.expand_dims(label, axis=-1)
+ output1, output2, output3, _ = step_one_model.call(inputs=data, is_first_time=True)
+ loss_value, accuracy_value = step_two_model.train(input_tensor=data, label1=label1, label2=label2,
+ learning_rate=learning_rate,
+ is_first_time=False, pred_3=output1, pred_4=output2,
+ pred_5=output3)
+ accuracy_num += accuracy_value
+ print(z * batch_size, "/", size, ":===============>", "loss:", loss_value.numpy(), "| accuracy:",
+ accuracy_num / ((z + 1) * batch_size))
+ k = 0
+ z = z + 1
+ k = k + 1
+
+ val_loss, val_accuracy = step_two_model.get_val_loss(val_data=val_data, val_label1=val_label1,
+ val_label2=val_label2,
+ is_first_time=False, step_one_model=step_one_model)
+ SaveBestModelByAccuracy(model=step_two_model, save_name=save_step_two_name, history_accuracy=history_accuracy,
+ accuracy_value=val_accuracy)
+ history_val_loss.append(val_loss)
+ history_loss.append(loss_value.numpy())
+ history_accuracy.append(val_accuracy)
+ print('Training loss is : {0} | Training accuracy is : {1}'.format(loss_value.numpy(),
+ accuracy_num / ((z + 1) * batch_size)))
+ print('Validating loss is : {0} | Validating accuracy is : {1}'.format(val_loss.numpy(), val_accuracy))
+ if IsStopTraining(history_loss=history_val_loss, patience=7):
+ break
+ if Is_Reduce_learning_rate(history_loss=history_val_loss, patience=3):
+ if learning_rate >= 1e-4:
+ learning_rate = learning_rate * 0.1
+ pass
+
+
+def test(step_one_model, step_two_model, test_data, test_label1, test_label2):
+ history_loss = []
+ history_val_loss = []
+
+ val_loss, val_accuracy = step_two_model.get_val_loss(val_data=test_data, val_label1=test_label1,
+ val_label2=test_label2,
+ is_first_time=False, step_one_model=step_one_model)
+
+ history_val_loss.append(val_loss)
+ print("val_accuracy:", val_accuracy)
+ print("val_loss:", val_loss)
+
+
+def showResult(step_two_model: Joint_Monitoring, test_data, isPlot: bool = False):
+ # 获取模型的所有参数的个数
+ # step_two_model.count_params()
+ total_result = []
+ size, length, dims = test_data.shape
+ for epoch in range(0, size - batch_size + 1, batch_size):
+ each_test_data = test_data[epoch:epoch + batch_size, :, :]
+ _, _, _, output4 = step_two_model.call(each_test_data, is_first_time=False)
+ total_result.append(output4)
+ total_result = np.reshape(total_result, [total_result.__len__(), -1])
+ total_result = np.reshape(total_result, [-1, ])
+ if isPlot:
+ plt.scatter(list(range(total_result.shape[0])), total_result, c='black', s=10)
+ # 画出 y=1 这条水平线
+ plt.axhline(0.5, c='red', label='Failure threshold')
+ # 箭头指向上面的水平线
+ # plt.arrow(35000, 0.9, 33000, 0.75, head_width=0.02, head_length=0.1, shape="full", fc='red', ec='red',
+ # alpha=0.9, overhang=0.5)
+ # plt.text(35000, 0.9, "Truth Fault", fontsize=10, color='black', verticalalignment='top')
+ plt.axvline(test_data.shape[0] * 2 / 3, c='blue', ls='-.')
+ plt.xlabel("time")
+ plt.ylabel("confience")
+ plt.text(total_result.shape[0] * 4 / 5, 0.6, "Fault", fontsize=10, color='black', verticalalignment='top',
+ horizontalalignment='center',
+ bbox={'facecolor': 'grey',
+ 'pad': 10})
+ plt.text(total_result.shape[0] * 1 / 3, 0.4, "Norm", fontsize=10, color='black', verticalalignment='top',
+ horizontalalignment='center',
+ bbox={'facecolor': 'grey',
+ 'pad': 10})
+ plt.grid()
+ # plt.ylim(0, 1)
+ # plt.xlim(-50, 1300)
+ # plt.legend("", loc='upper left')
+ plt.show()
+ return total_result
+
+
+if __name__ == '__main__':
+ total_data = loadData.execute(N=feature_num, file_name=file_name)
+ total_data = normalization(data=total_data)
+ train_data_healthy, train_label1_healthy, train_label2_healthy = get_training_data_overlapping(
+ total_data[:healthy_date, :], is_Healthy=True)
+ train_data_unhealthy, train_label1_unhealthy, train_label2_unhealthy = get_training_data_overlapping(
+ total_data[healthy_date - time_stamp + unhealthy_patience:unhealthy_date, :],
+ is_Healthy=False)
+ #### TODO 第一步训练
+ # 单次测试
+ # train_step_one(train_data=train_data_healthy[:32, :, :], train_label1=train_label1_healthy[:32, :],train_label2=train_label2_healthy[:32, ])
+ # train_step_one(train_data=train_data_healthy, train_label1=train_label1_healthy, train_label2=train_label2_healthy)
+
+ # 导入第一步已经训练好的模型,一个继续训练,一个只输出结果
+ # step_one_model = Joint_Monitoring()
+ # step_one_model.load_weights(save_name)
+ #
+ # step_two_model = Joint_Monitoring()
+ # step_two_model.load_weights(save_name)
+
+ #### TODO 第二步训练
+ ### healthy_data.shape: (300333,120,10)
+ ### unhealthy_data.shape: (16594,10)
+ healthy_size, _, _ = train_data_healthy.shape
+ unhealthy_size, _, _ = train_data_unhealthy.shape
+ # train_data, train_label1, train_label2, test_data, test_label1, test_label2 = split_test_data(
+ # healthy_data=train_data_healthy[healthy_size - 2 * unhealthy_size:, :, :],
+ # healthy_label1=train_label1_healthy[healthy_size - 2 * unhealthy_size:, :],
+ # healthy_label2=train_label2_healthy[healthy_size - 2 * unhealthy_size:, ], unhealthy_data=train_data_unhealthy,
+ # unhealthy_label1=train_label1_unhealthy, unhealthy_label2=train_label2_unhealthy)
+ # train_step_two(step_one_model=step_one_model, step_two_model=step_two_model,
+ # train_data=train_data,
+ # train_label1=train_label1, train_label2=np.expand_dims(train_label2, axis=-1))
+
+ ### TODO 测试测试集
+ step_one_model = Joint_Monitoring()
+ step_one_model.load_weights(save_name)
+ step_two_model = Joint_Monitoring()
+ step_two_model.load_weights(save_step_two_name)
+ # test(step_one_model=step_one_model, step_two_model=step_two_model, test_data=test_data, test_label1=test_label1,
+ # test_label2=np.expand_dims(test_label2, axis=-1))
+
+ ###TODO 展示全部的结果
+ all_data, _, _ = get_training_data_overlapping(
+ total_data[healthy_size - 2 * unhealthy_size:unhealthy_date, :], is_Healthy=True)
+ # all_data = np.concatenate([])
+ # 单次测试
+ # showResult(step_two_model, test_data=all_data[:32], isPlot=True)
+ showResult(step_two_model, test_data=all_data, isPlot=True)
+
+ pass