diff --git a/Spider/Chapter03_网页数据的提取/BeautifulSoup库/BeautifulSoupLearning.py b/Spider/Chapter03_网页数据的提取/BeautifulSoup库/BeautifulSoupLearning.py new file mode 100644 index 0000000..6890cb5 --- /dev/null +++ b/Spider/Chapter03_网页数据的提取/BeautifulSoup库/BeautifulSoupLearning.py @@ -0,0 +1,244 @@ +# -*- encoding:utf-8 -*- + +''' +@Author : dingjiawen +@Date : 2023/11/8 16:08 +@Usage : +@Desc :参考 https://github.com/Python3WebSpider/BeautifulSoupTest +''' + +html = """ +The Dormouse's story + +

The Dormouse's story

+

Once upon a time there were three little sisters; and their names were +, +Lacie and +Tillie; +and they lived at the bottom of a well.

+

...

+""" +from bs4 import BeautifulSoup + + + +def baseUse(): + soup = BeautifulSoup(html, 'lxml') + print(soup.title) # The Dormouse's story + print(type(soup.title)) # + print(soup.title.string) # The Dormouse's story + print(soup.head) # The Dormouse's story + print(soup.p) #

The Dormouse's story

+ print(soup.p.name) # 获取节点名称 p + print(soup.p.attrs) # 获取属性 {'class': ['title'], 'name': 'dromouse'} + print(soup.p.attrs['name']) # 获取属性值 dromouse + print(soup.p['name']) # 获取属性值 dromouse + print(soup.body.p['name']) # 嵌套选择 dromouse + + print("==========================") + + +def child(): + html = """ + + + The Dormouse's story + + +

+ Once upon a time there were three little sisters; and their names were + + Elsie + + Lacie + and + Tillie + and they lived at the bottom of a well. +

+

...

+ """ + soup = BeautifulSoup(html, 'lxml') + # 子结点 + for i, child in enumerate(soup.p.children): + print(i, child) + print("===============================") + # 子孙节点 + for i, child in enumerate(soup.p.descendants): + print(i, child) + print("===============================") + + +def parent(): + soup = BeautifulSoup(html, 'lxml') + # 父节点 + print(soup.a.parent) + print("===============================") + # 祖父节点 + print(type(soup.a.parents)) + print(list(enumerate(soup.a.parents))) + print("=============================") + + +def brother(): + html = """ + + +

+ Once upon a time there were three little sisters; and their names were + + Elsie + + Hello + Lacie + and + Tillie + and they lived at the bottom of a well. +

+ """ + # 兄弟节点 + soup = BeautifulSoup(html, 'lxml') + print('Next Sibling', soup.a.next_sibling) + print('Prev Sibling', soup.a.previous_sibling) + print('Next Siblings', list(enumerate(soup.a.next_siblings))) + print('Prev Siblings', list(enumerate(soup.a.previous_siblings))) + +# 找到所有满足条件的 +def findAll(): + + html = ''' +
+
+

Hello

+
+
+
    +
  • Foo
  • +
  • Bar
  • +
  • Jay
  • +
+
    +
  • Foo
  • +
  • Bar
  • +
+
+
+ ''' + soup = BeautifulSoup(html, 'lxml') + print(soup.find_all(name='ul')) + print(type(soup.find_all(name='ul')[0])) + + for ul in soup.find_all(name='ul'): + print(ul.find_all(name='li')) + + for ul in soup.find_all(name='ul'): + print(ul.find_all(name='li')) + for li in ul.find_all(name='li'): + print(li.string) + + +# 找属性满足匹配得到 +def attrs(): + html = ''' +
+
+

Hello

+
+
+
    +
  • Foo
  • +
  • Bar
  • +
  • Jay
  • +
+
    +
  • Foo
  • +
  • Bar
  • +
+
+
+ ''' + + soup = BeautifulSoup(html, 'lxml') + print(soup.find_all(attrs={'id': 'list-1'})) + print(soup.find_all(attrs={'name': 'elements'})) + + # 常用的属性可以不用attrs传递 + soup = BeautifulSoup(html, 'lxml') + print(soup.find_all(id='list-1')) + print(soup.find_all(class_='element')) + import re + print(soup.find_all(string=re.compile('Foo')))# string等同于text,即里面的具体内容 + + +# 返回匹配到的第一个元素 +def find(): + html = ''' +
+
+

Hello

+
+
+
    +
  • Foo
  • +
  • Bar
  • +
  • Jay
  • +
+
    +
  • Foo
  • +
  • Bar
  • +
+
+
+ ''' + soup = BeautifulSoup(html, 'lxml') + print(soup.find(name='ul')) + print(type(soup.find(name='ul'))) + print(soup.find(class_='list')) + +# css选择器 +def cssSelect(): + html = ''' +
+
+

Hello

+
+
+
    +
  • Foo
  • +
  • Bar
  • +
  • Jay
  • +
+
    +
  • Foo
  • +
  • Bar
  • +
+
+
+ ''' + + soup = BeautifulSoup(html, 'lxml') + print(soup.select('.panel .panel-heading')) + print(soup.select('ul li')) + print(soup.select('#list-2 .element')) + print(type(soup.select('ul')[0])) + + # 嵌套选择 + soup = BeautifulSoup(html, 'lxml') + for ul in soup.select('ul'): + print(ul.select('li')) + + # 获取属性 + soup = BeautifulSoup(html, 'lxml') + for ul in soup.select('ul'): + print(ul['id']) + print(ul.attrs['id']) + + # 获取文本 + soup = BeautifulSoup(html, 'lxml') + for li in soup.select('li'): + print('Get Text:', li.get_text()) + print('String:', li.string) + + + +if __name__ == '__main__': + cssSelect() \ No newline at end of file diff --git a/Spider/Chapter03_网页数据的提取/BeautifulSoup库/__init__.py b/Spider/Chapter03_网页数据的提取/BeautifulSoup库/__init__.py new file mode 100644 index 0000000..4cced9b --- /dev/null +++ b/Spider/Chapter03_网页数据的提取/BeautifulSoup库/__init__.py @@ -0,0 +1,8 @@ +#-*- encoding:utf-8 -*- + +''' +@Author : dingjiawen +@Date : 2023/11/8 16:07 +@Usage : +@Desc : +''' \ No newline at end of file diff --git a/Spider/Chapter03_网页数据的提取/Pyquery库/__init__.py b/Spider/Chapter03_网页数据的提取/Pyquery库/__init__.py new file mode 100644 index 0000000..2460833 --- /dev/null +++ b/Spider/Chapter03_网页数据的提取/Pyquery库/__init__.py @@ -0,0 +1,8 @@ +#-*- encoding:utf-8 -*- + +''' +@Author : dingjiawen +@Date : 2023/11/8 16:54 +@Usage : +@Desc : +''' \ No newline at end of file diff --git a/Spider/Chapter03_网页数据的提取/Pyquery库/pyqueryLearning.py b/Spider/Chapter03_网页数据的提取/Pyquery库/pyqueryLearning.py new file mode 100644 index 0000000..af946f6 --- /dev/null +++ b/Spider/Chapter03_网页数据的提取/Pyquery库/pyqueryLearning.py @@ -0,0 +1,329 @@ +# -*- encoding:utf-8 -*- + +''' +@Author : dingjiawen +@Date : 2023/11/8 16:54 +@Usage : +@Desc :Pyquery学习 参考: https://github.com/Python3WebSpider/PyQueryTest +''' +from pyquery import PyQuery as pq + + +# 字符串初始化 +def stringBase(): + html = ''' +
+ +
+ ''' + + doc = pq(html) + print(doc('li')) + + +# URL初始化 +def URLBase(): + doc = pq(url='https://cuiqingcai.com') + print(doc('title')) + + # 上述代码等同于下面 + # doc = pq(requests.get('https://cuiqingcai.com').text) + # print(doc('title')) + + +# 文件初始化 +def fileBase(): + doc = pq(filename='demo.html') + print(doc('li')) + +# 基本的css选择器 +def cssSelect(): + html = ''' +
+ +
+ ''' + doc = pq(html) + print(doc('#container .list li')) + print(type(doc('#container .list li'))) + + # + for item in doc('#container .list li').items(): + print(item.text()) + +# 寻找子节点 +def child(): + html = ''' +
+ +
+ ''' + doc = pq(html) + items = doc('.list') + print(type(items)) + print(items) + lis = items.find('li') + print(type(lis)) + print(lis) + # + # + lis = items.children() + print(type(lis)) + print(lis) + + # + lis = items.children('.active') + print(lis) + + +def parent(): + html = ''' +
+
+ +
+
+ ''' + from pyquery import PyQuery as pq + doc = pq(html) + items = doc('.list') + container = items.parent() + print(type(container)) + print(container) + + from pyquery import PyQuery as pq + doc = pq(html) + items = doc('.list') + parents = items.parents() + print(type(parents)) + print(parents) + + parent = items.parents('.wrap') + print(parent) + + from pyquery import PyQuery as pq + doc = pq(html) + li = doc('.list .item-0.active') + print(li.siblings()) + +def brother(): + html = ''' +
+
+ +
+
+ ''' + from pyquery import PyQuery as pq + doc = pq(html) + li = doc('.list .item-0.active') + print(li.siblings('.active')) + + from pyquery import PyQuery as pq + doc = pq(html) + li = doc('.item-0.active') + print(li) + print(str(li)) + + from pyquery import PyQuery as pq + doc = pq(html) + # 可能是多个节点 + lis = doc('li').items() + print(type(lis)) + for li in lis: + print(li, type(li)) + +def attrs(): + html = ''' +
+
+ +
+
+ ''' + from pyquery import PyQuery as pq + doc = pq(html) + a = doc('.item-0.active a') + print(a, type(a)) + print(a.attr('href')) + + a = doc('a') + print(a, type(a)) + print(a.attr('href')) + print(a.attr.href) + + from pyquery import PyQuery as pq + doc = pq(html) + a = doc('a') + for item in a.items(): + # 获取属性和文本 + print(item.attr('href'),item.text()) + +def getHTML(): + html = ''' +
+ +
+ ''' + from pyquery import PyQuery as pq + doc = pq(html) + li = doc('li') + print(li.html()) # 第一个节点对应的html second item + print(li.text()) # 所有匹配的节点的文本 second item third item fourth item fifth item + print(type(li.text())) + +# 增加或者删除节点的class +def operateNode(): + html = ''' +
+
+ +
+
+ ''' + from pyquery import PyQuery as pq + doc = pq(html) + li = doc('.item-0.active') + print(li) + li.removeClass('active') + print(li) + li.addClass('active') + print(li) + + ''' +
  • third item
  • + +
  • third item
  • + +
  • third item
  • + ''' + + + +def operateNodeInformation(): + html = ''' + + ''' + from pyquery import PyQuery as pq + doc = pq(html) + li = doc('.item-0.active') + print(li) + li.attr('name', 'link') + print(li) + li.text('changed item') + print(li) + li.html('changed item') + print(li) + ''' +
  • third item
  • +
  • changed item
  • +
  • changed item
  • + ''' + + +def removeInformation(): + html = ''' +
    + Hello, World +

    This is a paragraph.

    +
    + ''' + from pyquery import PyQuery as pq + doc = pq(html) + wrap = doc('.wrap') + print(wrap.text()) + ''' + Hello, World + This is a paragraph. + ''' + wrap.find('p').remove() + print(wrap.text()) + ''' + Hello, World + ''' + +# 伪类选择器 +def fakeCSSSelect(): + html = ''' +
    +
    + +
    +
    + ''' + from pyquery import PyQuery as pq + doc = pq(html) + li = doc('li:first-child') + print(li) + li = doc('li:last-child') + print(li) + li = doc('li:nth-child(2)') + print(li) + li = doc('li:gt(2)') + print(li) + li = doc('li:nth-child(2n)') + print(li) + li = doc('li:contains(second)') + print(li) + + + +if __name__ == '__main__': + fakeCSSSelect() diff --git a/Spider/Chapter03_网页数据的提取/XPath库/XpathLearning.py b/Spider/Chapter03_网页数据的提取/XPath库/XpathLearning.py new file mode 100644 index 0000000..fc4c031 --- /dev/null +++ b/Spider/Chapter03_网页数据的提取/XPath库/XpathLearning.py @@ -0,0 +1,195 @@ +# -*- encoding:utf-8 -*- + +''' +@Author : dingjiawen +@Date : 2023/11/8 15:15 +@Usage : +@Desc : +''' + +from lxml import etree + +''' +XPath基本规则: + + 1) nodename:选择此节点的所有子节点 + 2) /:从当前节点选取直接子节点 + 3) //:从当前阶段选择子孙节点 + 4) .:选取当前节点 + 5) ..:选取当前节点的父节点 + 6) @:选取属性 + +举例: +//title[@lang='eng]代表选择所有名称为title,同时属性lang的值为eng的节点 +''' + + +def htmlByString(): + text = ''' +
    + +
    + ''' + html = etree.HTML(text) + result = etree.tostring(html) + print(result.decode('utf-8')) + + +def htmlByFile(): + html = etree.parse('./test.html', etree.HTMLParser()) + result = etree.tostring(html) + print(result.decode('utf-8')) + + +def allNode(): + html = etree.parse('./test.html', etree.HTMLParser()) + # 从头开始匹配所有的 + result = html.xpath('//*') + print(result) + print(result[0]) + + # 匹配所有li的 + result = html.xpath('//li') + print(result) + print(result[0]) + + +# 子节点匹配 +def childNode(): + html = etree.parse('./test.html', etree.HTMLParser()) + + # 匹配所有li的子节点a + result = html.xpath('//li/a') + print(result) + print(result[0]) + + # 匹配所有li的子孙节点a 相当于只要是子节点下面的就可以匹配上 + result = html.xpath('//ul//a') + print(result) + print(result[0]) + + +# 父节点匹配 +def fatherNode(): + html = etree.parse('./test.html', etree.HTMLParser()) + + # 匹配a节点属性href是link4.html的父节点的class属性 + result = html.xpath('//a[@href="link4.html"]/../@class') + print(result) + # 也可以通过parent::来获取 + result = html.xpath('//a[@href="link4.html"]/parent::*/@class') + print(result) + + +# 文本获取 +def textGet(): + html = etree.parse('./test.html', etree.HTMLParser()) + + # 匹配li节点属性class是item-0的节点的子节点a的text + result = html.xpath('//li[@class="item-0"]/a/text()') + print(result) # ['first item', 'fifth item'] + + # 匹配li节点属性class是item-0的节点的子孙节点的text + result = html.xpath('//li[@class="item-0"]//text()') + print(result) # ['first item', 'fifth item', '\r\n '] + + +# 属性获取 +def fieldGet(): + html = etree.parse('./test.html', etree.HTMLParser()) + + # 匹配li节点属性class是item-0的节点的子节点a的href属性 + result = html.xpath('//li/a/@href') + print(result) # ['link1.html', 'link2.html', 'link3.html', 'link4.html', 'link5.html'] + + +# 属性多值匹配 +def fieldsGet(): + text = ''' +
  • first item
  • + ''' + html = etree.HTML(text) + result = html.xpath('//li[@class="li"]/a/text()') + print(result) # [] 匹配不到 + + result = html.xpath('//li[contains(@class, "li")]/a/text()') + print(result) # ['first item'] contains匹配到了 + + +# 多属性匹配 +def fieldssGet(): + text = ''' +
  • first item
  • + ''' + html = etree.HTML(text) + # 多属性用and连接 + result = html.xpath('//li[contains(@class, "li") and @name="item"]/a/text()') + print(result) + + +# 按序选择 +def orderGet(): + text = ''' +
    + +
    + ''' + html = etree.HTML(text) + result = html.xpath('//li[1]/a/text()') + print(result) # ['first item'] + result = html.xpath('//li[last()]/a/text()') + print(result) # ['fifth item'] + result = html.xpath('//li[position()<3]/a/text()') + print(result) # ['first item', 'second item'] + result = html.xpath('//li[last()-2]/a/text()') + print(result) # ['third item'] + + +def nodeSelect(): + text = ''' +
    + +
    + ''' + html = etree.HTML(text) + result = html.xpath('//li[1]/ancestor::*') + print(result) + # ancestor获取祖先 + result = html.xpath('//li[1]/ancestor::div') + print(result) + # attribute获取所有属性 + result = html.xpath('//li[1]/attribute::*') + print(result) + # child获取子节点 + result = html.xpath('//li[1]/child::a[@href="link1.html"]') + print(result) + # descendant获取子孙结点 + result = html.xpath('//li[1]/descendant::span') + print(result) + # following获取当前节点之后的所有节点 + result = html.xpath('//li[1]/following::*[2]') + print(result) + # following-sibling获取当前节点之后的同级节点 + result = html.xpath('//li[1]/following-sibling::*') + print(result) + +if __name__ == '__main__': + nodeSelect() diff --git a/Spider/Chapter03_网页数据的提取/XPath库/__init__.py b/Spider/Chapter03_网页数据的提取/XPath库/__init__.py new file mode 100644 index 0000000..d92f042 --- /dev/null +++ b/Spider/Chapter03_网页数据的提取/XPath库/__init__.py @@ -0,0 +1,8 @@ +#-*- encoding:utf-8 -*- + +''' +@Author : dingjiawen +@Date : 2023/11/8 15:15 +@Usage : +@Desc : +''' \ No newline at end of file diff --git a/Spider/Chapter03_网页数据的提取/XPath库/test.html b/Spider/Chapter03_网页数据的提取/XPath库/test.html new file mode 100644 index 0000000..cb77f50 --- /dev/null +++ b/Spider/Chapter03_网页数据的提取/XPath库/test.html @@ -0,0 +1,9 @@ +
    + +
    \ No newline at end of file diff --git a/Spider/Chapter03_网页数据的提取/__init__.py b/Spider/Chapter03_网页数据的提取/__init__.py new file mode 100644 index 0000000..2e0ad3a --- /dev/null +++ b/Spider/Chapter03_网页数据的提取/__init__.py @@ -0,0 +1,8 @@ +#-*- encoding:utf-8 -*- + +''' +@Author : dingjiawen +@Date : 2023/11/8 15:12 +@Usage : +@Desc : +''' \ No newline at end of file diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/NewtonInsert.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/NewtonInsert.py new file mode 100644 index 0000000..82c4854 --- /dev/null +++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/NewtonInsert.py @@ -0,0 +1,153 @@ +# _*_ coding: UTF-8 _*_ + + +''' +@Author : dingjiawen +@Date : 2022/7/11 12:55 +@Usage : +@Desc : +''' + +import numpy as np +import pandas as pd +import time +# 只计算了该程序运行CPU的时间 +import timeit + +# cat_sale = pd.read_excel('data/catering_sale.xls') +path = "G:\data\SCADA数据\jb4q_8.csv" +cat_sale = pd.read_csv(path) +# cat_sale.drop('日期', axis=1, inplace=True) + +# 过滤异常值,并置为空值 +# cat_sale['销量'][(cat_sale['销量'] < 400) | (cat_sale['销量'] > 5000)] = np.NAN +# 将0值变成NAN 通过双中括号进行索引任意位置 +# print(df['realtime'][1]) +cat_sale[:][cat_sale[:] == 0] = np.nan # 在索引比较的时候,要转换成同一类型,使用astype + +# 分别定义求插商与求w的函数 +''' +:param x:差值前后的索引值 +:param y:差值前后的数值 +''' +def cal_f(x, y): + """ + 计算插商 + """ + f0 = np.zeros((len(x), len(y))) # 定义一个存储插商的数组 + for k in range(len(y) + 1): # 遍历列 + for i in range(k, len(x)): # 遍历行 + if k == 0: + f0[i, k] = y[i] + else: + f0[i, k] = (f0[i, k - 1] - f0[i - 1, k - 1]) / (x[i] - x[i - 1]) + # print('差商表', '\n', f0) + return f0 + + +''' +:param x:差值前后的索引值 +:param y:差值前后的数值 +:param x_j:需要差值的索引 +''' +def newton(x, y, x_j): + """ + 牛顿差值多项式 + """ + f0 = cal_f(x, y) # 计算插商 + f0 = f0.diagonal() # 插商对角线 + # 与w相乘 + f1 = 0 + for i in range(len(f0)): + s = 1 + k = 0 + while k < i: + s = s * (x_j - x[k]) + k += 1 + f1 = f1 + f0[i] * s + return f1 + + +# 自定义列向量插值函数,获取需差值的前后几个数 +''' +:param s:整个差值的序列 +:param n:需要差值的索引 +:param x_j:需要差值的索引 +:param is_fast:是否需要快速差值(无论前后是否是零值均采用);反之则一直找到不为0值的进行计算 +:param k:取前后多少个数 +''' +def ployinterp_columns(s, n, x_j, is_fast: bool = False, k=3): + X = [] + Y = [] + if is_fast: + # 如果最前面的值不够k个 + if n < k: + a = list(range(0, n)) + list(range(n + 1, n + k + 1)) + y = s[list(range(0, n)) + list(range(n + 1, n + k + 1))] + # 如果最后面的值不够k个 + elif n > len(s) - k - 1: + y = s[list(range(n - k, n)) + list(range(n + 1, len(s)))] + # 前后均有k个 + else: + y = s[list(range(n - k, n)) + list(range(n + 1, n + k + 1))] # 取空值处的前后5个数 + y = y[y.notnull()] # 剔除空值 + X = y.index + Y = list(y) + else: + # 先取序列前后各k个不为空的值 + index = n - 1 + while len(X) < k and index >= 0: + if not np.isnan(s[index]): + Y.append(s[index]) + X.append(index) + index -= 1 + index = n + 1 + X.reverse() + Y.reverse() + + while len(X) < 2 * k and index <= len(s): + if not np.isnan(s[index]): + Y.append(s[index]) + X.append(index) + index += 1 + # print(X) + # print(Y) + + return newton(X, Y, x_j) # 插值并返回插值结果 + + +def execute(): + cat_sale[:][cat_sale[:] == 0] = np.nan # 在索引比较的时候,要转换成同一类型,使用astype + for i in cat_sale.columns: + temp = cat_sale[i].isnull() + if temp[:][temp[:] == True].__len__() > 0: + print("{0}列处理前空行数:{1}".format(i, cat_sale[i].isnull().sum())) + for j in range(len(cat_sale)): + if (cat_sale[i].isnull())[j]: + x_j = cat_sale.index[j] + cat_sale.loc[j,i] = ployinterp_columns(cat_sale[i], j, x_j) + print('第{0}行牛顿插值为{1}'.format(j, cat_sale.loc[j, i])) + print("{0}列处理后空行数:{1}".format(i, cat_sale[i].isnull().sum())) + print("========================================") + print(cat_sale) + cat_sale.to_csv("G:\data\SCADA数据\jb4q_8_dealed.csv") + # cat_sale.to_excel('saless.xls') + + +def test(): + cat_sale[:][cat_sale[:] == 0] = np.nan # 在索引比较的时候,要转换成同一类型,使用astype + for j in range(len(cat_sale['num_gearbox_sumptemp'])): + if (cat_sale['num_gearbox_sumptemp'].isnull())[j]: + x_j = cat_sale.index[j] + cat_sale.loc[j,'num_gearbox_sumptemp'] = ployinterp_columns(cat_sale['num_gearbox_sumptemp'], j, x_j,is_fast=True) + # print('第{0}行牛顿插值为{1}'.format(j, cat_sale.loc[j,'num_gearbox_sumptemp'])) + + +if __name__ == '__main__': + start = timeit.default_timer() + # execute() + test() + end = timeit.default_timer() + print('Running time: %s Seconds' % (end - start)) + # 返回值是浮点数 + diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/__init__.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/lagrangeInsert.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/lagrangeInsert.py new file mode 100644 index 0000000..3591cdf --- /dev/null +++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/lagrangeInsert.py @@ -0,0 +1,96 @@ +# _*_ coding: UTF-8 _*_ + + +''' +@Author : dingjiawen +@Date : 2022/7/11 11:43 +@Usage : +@Desc : +''' + + +import numpy as np +import pandas as pd + + +# 拉格朗日插值算法 +def LagrangeInterpolation(slices, x, k=5): + # slices(series) :the defining points + # k :the number of defining points of Lagrange poly 前后各k个值 + # slices index :the corresponding value on each defining point + # x :the point whose value we are interested + # print(slices[x]) + # print(np.isnan(slices[x])) + result = 0 # later to save final result + X = [] + Y = [] + # 先取序列前后各k个不为空的值 + index = x - 1 + while len(X) < k and index >= 0: + if not np.isnan(slices[index]): + Y.append(slices[index]) + X.append(index) + index -= 1 + index = x + 1 + X.reverse() + Y.reverse() + + while len(X) < 2 * k and index <= len(slices): + if not np.isnan(slices[index]): + Y.append(slices[index]) + X.append(index) + index += 1 + # print(X) + # print(Y) + + for j in range(len(X)): + # result_l 基函数 + result_l = 1 + for i in range(len(X)): + if i != j: + result_l = result_l * (x - X[i]) / (X[j] - X[i]) + # 取值 slices[j] + result = result + slices[j] * result_l + + return result + + + + + + +if __name__ == '__main__': + path = "G:\data\SCADA数据\jb4q_8.csv" + + df = pd.read_csv(path) + columns = df.columns + print(df.columns) + + # 将0值变成NAN 通过双中括号进行索引任意位置 + # print(df['realtime'][1]) + df[:][df[:] == 0] = np.nan # 在索引比较的时候,要转换成同一类型,使用astype + + # TODO 测试单点插值 + print(df['num_gearbox_sumptemp'].isnull()) + # print("插值为:", LagrangeInterpolation(df['num_gearbox_sumptemp'], 47, 2)) + + # TODO 单列测试插值 + print("之前的空值数量:", df['num_gearbox_sumptemp'].isnull().sum()) + for j in range(len(df)): + if (df['num_gearbox_sumptemp'].isnull())[j]: + s = df['num_gearbox_sumptemp'] + df.loc[j, 'num_gearbox_sumptemp'] = LagrangeInterpolation(s, j, 5) + print("插值之后的空值数量:", df['num_gearbox_sumptemp'].isnull().sum()) + + # # TODO 整体处理 + print("之前的空值数量:", df.isnull().sum()) + for i in columns: + temp = df[i].isnull() + if temp[:][temp[:] == True].__len__() > 0: + for j in range(len(df)): + if (df[i].isnull())[j]: + s = df[columns[i]] + df.loc[j, i] = LagrangeInterpolation(s, j, 3) + + print("插值之后的空值数量:",df.isnull().sum()) + df.to_csv("G:\实验室/2022项目中期\数据治理算法\jb4q_8_lagrange.csv") diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/scada_data_process_for_JBYQ_YSD.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/scada_data_process_for_JBYQ_YSD.py new file mode 100644 index 0000000..68755dc --- /dev/null +++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/dataETL/scada_data_process_for_JBYQ_YSD.py @@ -0,0 +1,149 @@ +# -*- coding: utf-8 -*- +""" +Created on Sun Jun 7 09:23:31 2020 + +@author: AlbertHu +""" + +# -*- coding: utf-8 -*- +""" +Created on Fri Jun 5 21:33:46 2020 + +@author: AlbertHu +""" + +# -*- coding: utf-8 -*- +""" +Created on Fri Jun 5 10:40:27 2020 + +@author: AlbertHu +""" + +import os +import time +import numpy as np +import pandas as pd +import datetime + +def findallfiles(cmsfilesfatherpath): #返回父目录包括子目录下所有文件的地址 + cmsfilepaths = [] + files = os.listdir(cmsfilesfatherpath) + for fi in files: + fi_d = os.path.join(cmsfilesfatherpath, fi) + if os.path.isdir(fi_d): + # files.extend(findcmsfiles(fi_d)) + pass + else: + cmsfilepaths.append(fi_d) + return cmsfilepaths +def findIndexOfExceptPoint(data): + indexList2D = [] + indexList1 = [] + indexList2 = [] + indexList3 = [] + indexList4 = [] + indexList5 = [] + print("开始清洗") + for i in data.index: + if i % 10000 == 0: + print("已处理了{}组数据".format(i)) + #条件1 + if data[' 瞬时风速'][i] < 3.5 and data[' 1#叶片变桨角度'][i] > 89: + indexList1.append(i) + elif data[' 瞬时风速'][i] >= 3.5 and data[' 瞬时风速'][i] <= 10 and data[' 1#叶片变桨角度'][i] > 0.5: + indexList1.append(i) + elif data[' 瞬时风速'][i] >= 11 and data[' 瞬时风速'][i] <= 25 and (data[' 有功功率'][i] < 1800 and data[' 1#叶片变桨角度'][i] > 1.5): + indexList1.append(i) + elif data[' 瞬时风速'][i] > 25 and data[' 有功功率'][i] >0: + indexList1.append(i) + else: + pass + #条件2 + if abs(data[' 齿轮箱高速轴前端温度'][i])>200 or abs(data[' 齿轮箱高速轴后端温度'][i])>200 or abs(data[' 齿轮箱冷却水温'][i])>200 or abs(data[' 齿轮箱进口油温'][i])>200 or abs(data[' 齿轮箱油池温度'][i])>200 or abs(data[' 环境温度'][i]>200): + indexList2.append(i) + else: + pass + #条件3 #条件6 + if data[' 齿轮箱高速轴前端温度'][i] > 80 or data[' 齿轮箱高速轴后端温度'][i] > 80 or abs(data[' 齿轮箱高速轴前端温度'][i] - data[' 齿轮箱高速轴后端温度'][i]) > 20: + indexList3.append(i) + else: + pass + #条件4 + if data[' 有功功率'][i] > 100 and data[' 齿轮箱进口压力'][i] <= 0: + indexList4.append(i) + else: + pass + #条件5 + if abs(data[' 齿轮箱进口压力'][i] - data[' 齿轮箱泵出口压力'][i]) > 5: + indexList5.append(i) + else: + pass + indexList2D = [indexList1,indexList2,indexList3,indexList4,indexList5] + return indexList2D +# #条件6 +# if data[' 齿轮箱高速轴前端温度'][i] > 80 or data[' 齿轮箱高速轴后端温度'][i]) > 80: + + + + +fathpath = r'D:\1.SCADA_风电数据\靖边二期2019_已处理' +allfilepaths = findallfiles(fathpath) +testpath = allfilepaths[0] +#allfilepaths = [r'F:\scada_ewma本地数据2(重要)\data\DataResult(靖边二期2019)\风机7.csv'] + +#testpath=r'F:\scada_ewma本地数据2(重要)\data\DataResult(粤水电达坂城2020.1月-5月)\风机1.csv' +for testpath in allfilepaths: + data = pd.read_csv(testpath,encoding='gbk',parse_dates = ['时间']) + data.columns + + indexList2D = findIndexOfExceptPoint(data) + + savePath = r'./cleanScada/JB2Q615/风机{}'.format(data['风机号'][1]) + if not os.path.exists(savePath): + os.makedirs(savePath) + file = open(savePath + '/IndexOfExceptPoint.txt','w') + a = 1 + for List in indexList2D: + for i in List: + file.write(str(i)+',') + try: + data.drop([i],inplace=True) + except: + continue + file.write('第{}组\n'.format(a)) + a += 1 + file.close() + + data.to_csv(savePath+'.csv',encoding='gbk') + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/baseETL.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/baseETL.py new file mode 100644 index 0000000..402886e --- /dev/null +++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/baseETL.py @@ -0,0 +1,67 @@ +# _*_ coding: UTF-8 _*_ + + +''' +@Author : dingjiawen +@Date : 2022/7/7 10:29 +@Usage : 对SCADA数据进行基础的清洗工作 +@Desc : +''' + +import tensorflow as tf +import pandas as pd +import numpy as np +import os +import time +from condition_monitoring.lib.IOBase import ioLib + +''' +超参数设置 +''' +# 需处理文件的父目录 +fatherPath = "G:\data\SCADA数据\华能三塘湖" +# 处理好文件的父目录 +fatherDealedPath = "G:\data\SCADA数据\华能三塘湖\dealed" + +baseUseCols = ["时间", "风机号", "发电机转矩", "发电机无功功率", "发电机转速", "发电机有功功率", "发电机绕组最高温度", "齿轮箱油池温度", "齿轮箱进口油温", "齿轮箱进口压力", + "齿轮箱油泵出口压力", "齿轮箱冷却水温度", "有功功率", "60s平均有功功率", "10min平均有功功率", "10s平均有功功率", "10s平均无功功率", "无功功率", "瞬时风速", + "机舱温度"] + +baseWinds = [] + +# 列出父目录下所有文件 +def listFile(fatherPath = fatherPath): + filepaths = [] + files = os.listdir(fatherPath) + for file in files: + fi_d = os.path.join(fatherPath, file) + if os.path.isdir(fi_d): + pass + # files.extend(findcmsfiles(fi_d)) + else: + filepaths.append(fi_d) + + return filepaths + + +def dropNa(filePath): + data = pd.read_csv(filePath, low_memory=False, encoding='gbk', usecols=baseUseCols, parse_dates=['时间']) + print(data) + data.dropna(axis=0, how='any', inplace=True) + print(data) + data.append() + ioLib.saveCSV(data=data, savePath=fatherDealedPath) + + + +def separateByWindNum(data): + indexLists = [] + windList1 = [] + windList2 = [] + + + +if __name__ == '__main__': + filePath = "G:\data\SCADA数据\华能三塘湖/1华能三塘湖20180730-20180803.csv" + + diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData.py new file mode 100644 index 0000000..f110cb1 --- /dev/null +++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData.py @@ -0,0 +1,228 @@ +import pandas as pd +import numpy as np +import tensorflow as tf +import csv +import os +import matplotlib.pyplot as plt +import seaborn as sns + +'''设置数据源文件路径''' +# source_path = r'G:\data\SCADA数据\jb4q_8.csv' +source_path = "G:\data\SCADA数据\jb4q_8_delete_total_zero.csv" + +'''修改后的数据源存储路径''' +save_path = r'G:\data\SCADA数据\jb4q_8_delete_total_zero.csv' + +'''需要的列''' + + +# baseUseCols = ["num_gearbox_sumptemp","num_gearbox_inletoiltemp","num_gearbox_inletpress","num_gearbox_coolingwatertemp"] + +# target_path = r'G:\data\SCADA数据\华能三塘湖/dealed/后十万2018.01.16.csv' +# target_folder = r'G:\data\SCADA数据\华能三塘湖/dealed' + + +# 生成文件夹 +def folderGenerate(folder_name): + if not os.path.exists(folder_name): + os.makedirs(folder_name) + + +# 皮尔逊相关系数 +def cal_correlation_coefficient(data, label): + print("计算皮尔逊相关系数") + print(data) + print(data.shape) + pd_data = pd.DataFrame(data) + person = pd_data.corr() + print(person) + # 画热点图heatmap + # cmap = sns.heatmap(person, annot=True, xticklabels=label, yticklabels=label) + # plt.figure(1, figsize=(6.0, 2.68)) + # plt.subplots_adjust(left=0.1, right=0.94, bottom=0.2, top=0.9, wspace=None, + # hspace=None) + # plt.tight_layout() + # font1 = {'family': 'Times New Roman', 'weight': 'normal', 'size': 10} # 设置坐标标签的字体大小,字体 + # font2 = {'family': 'Times New Roman', 'weight': 'normal', 'size': 15} # 设置坐标标签的字体大小,字体 + # plt.xlabel("X", size=10,fontdict=font1) + # plt.ylabel("Y", size=10,fontdict=font1) + # plt.title("Heatmap of correlation coefficient matrix", size=20,fontdict=font1) + # + # # 调整色带的标签: + # cbar = cmap.collections[0].colorbar + # cbar.ax.tick_params(labelsize=15, labelcolor="black") + # cbar.ax.set_ylabel(ylabel="color scale", color="red", loc="center",fontdict=font2) + # + # plt.show() + return person + + +def get_most_N_correlation_coefficient(person, N=10): + print("获得相关度最高的{}个值".format(N)) + # total_correlation = person[1:, 1:] + abs_correlation = np.abs(person) + one = np.ones(shape=abs_correlation.shape) + two = np.subtract(one, abs_correlation) + rows, cols = two.shape + total_sum = [] + for i in range(cols): + # print(two[i]) + total = np.sum(two[i]) + total_sum.append(total) + + print("total_sum:", total_sum) + # 取最小的N个数,因为是与1减了以后的,越小相关系数越大 + print("arg:",np.argpartition(total_sum, N)) + min = np.argpartition(total_sum, N)[:N] + max = np.argpartition(total_sum, N)[total_sum.__len__() - N:] + print("min:",min) + return min + + +# 过滤或者线性填充 +def findIndexOfExceptPoint(data: pd.DataFrame): + # indexList2D = [] + # indexList = [] + # indexList2 = [] + # indexList3 = [] + # indexList4 = [] + indexList = [] + print("开始清洗") + for i in data.index: + if i % 10000 == 0: + print("已处理了{}条数据".format(i)) + ## 删除绝大多数0 + # if data['num_gearbox_sumptemp'][i] != 0 and (i < 416166 or i > 432766) and ( + # data['num_gearbox_pumpoutletpress'][i] == 0 or data['num_activepower'][i] == 0 or + # data['num_gen_torque'][i] == 0): + # indexList.append(i) + # 删除全部有0 + # if (i < 416166 or i > 432766) and ( + # data['num_gearbox_pumpoutletpress'][i] == 0 or data['num_activepower'][i] == 0 or + # data['num_gen_torque'][i] == 0): + # indexList.append(i) + # 只删除全部0 + if (i < 416166 or i > 432766) and ( + data['num_gearbox_sumptemp'][i] == 0 and data['num_gearbox_inletoiltemp'][i] == 0 and + data['num_gearbox_inletpress'][i] == 0): + indexList.append(i) + else: + pass + + # indexList2D = [indexList1, indexList2, indexList3, indexList4, indexList5] + indexList2D = set(indexList) + print("要移除的index:", indexList2D) + return indexList2D + + +# 根据index移除异常数据 +def removeDataByIndex(indexList, data): + print("开始移除异常index的数据") + a = 1 + data.drop(indexList, inplace=True) + # for i in indexList: + # try: + # data.drop([i], inplace=True) + # except: + # continue + # # print('第{}组\n'.format(a)) + # # a += 1 + return data + + +# 处理数据(移除,重新赋值,或者是其他操作) +def dealData(scada_data: pd.DataFrame): + # 是否保存处理好的数据 + Is_save = True + indexList = findIndexOfExceptPoint(scada_data) + removeDataByIndex(indexList=indexList, data=scada_data) + print("处理后的数据为:") + print(scada_data) + if Is_save: + print("============保存处理好的数据,路径为{}============".format(save_path)) + scada_data.to_csv(save_path, index=False, encoding='gbk') + + return scada_data + + +# 读取数据,转为numpy数组或者tf数组 +def read_data(file_name, isNew: bool = False): + ''' 导入数据 ''' + with open(file_name, 'r') as f: + if isNew: + # scada_data = pd.read_csv(f,low_memory=False, encoding='gbk', usecols=baseUseCols, parse_dates=['时间']) + scada_data = pd.read_csv(f, low_memory=False, encoding='gbk', parse_dates=['realtime']) + print(scada_data) + scada_data = dealData(scada_data=scada_data) + print(scada_data.head) + scada_data = np.array(scada_data) + else: + scada_data = np.loadtxt(f, str, delimiter=",") + label = scada_data[0, 3:] + label=list(['Gs','Gio','Gip','Gp','Gwt','En','Gft','Grt','Gwt','Et','Rs','Ap','Ws','Dw','Ges','Gt','Vx','Vy']) + print("导入数据成功,将数据转为numpy或tf数组...") + needed_data = scada_data[1:, 3:].astype(dtype=np.float) + ## needed_data = tf.cast(needed_data, tf.float32) tensor无法转为pd.DataFrame + print(needed_data) + print("转换成功,并返回...") + return needed_data, label + + +def plot_original_data(data): + rows, cols = data.shape + print("开始画图...") + + for i in range(cols): + plt.figure(i) + plt.plot(data[:, i]) + plt.show() + + +def execute(file_name=source_path,N=10): + needed_data, label = read_data(file_name=file_name, isNew=False) + print(needed_data) + print(needed_data.shape) + # plot_original_data(needed_data) + person = cal_correlation_coefficient(needed_data, label) + person = np.array(person) + min = get_most_N_correlation_coefficient(person, N=N) + + for index in min: + if index == min[0]: + total_data = np.expand_dims(needed_data[:, index], axis=-1) + else: + total_data = np.concatenate([total_data, np.expand_dims(needed_data[:, index], axis=-1)], axis=-1) + + return total_data + + +def deal_data(file_name=source_path): + ''' 导入数据 ''' + with open(file_name, 'r') as f: + + # scada_data = pd.read_csv(f,low_memory=False, encoding='gbk', usecols=baseUseCols, parse_dates=['时间']) + scada_data = pd.read_csv(f, low_memory=False, encoding='gbk', parse_dates=['realtime']) + print(scada_data) + scada_data = dealData(scada_data=scada_data) + print(scada_data.head) + scada_data = np.array(scada_data) + + scada_data = np.loadtxt(f, str, delimiter=",") + label = scada_data[0, 3:] + label = list( + ['Gs', 'Gio', 'Gip', 'Gp', 'Gwt', 'En', 'Gft', 'Grt', 'Gwt', 'Et', 'Rs', 'Ap', 'Ws', 'Dw', 'Ges', 'Gt', + 'Vx', 'Vy']) + print("导入数据成功,将数据转为numpy或tf数组...") + needed_data = scada_data[1:, 3:].astype(dtype=np.float) + ## needed_data = tf.cast(needed_data, tf.float32) tensor无法转为pd.DataFrame + print(needed_data) + print("转换成功,并返回...") + return needed_data, label + pass + + +if __name__ == '__main__': + total_data = execute(N=10, file_name=source_path) + # print(total_data) + # print(total_data.shape) + # plot_original_data() diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData_daban.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData_daban.py new file mode 100644 index 0000000..4a0f33b --- /dev/null +++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/loadData_daban.py @@ -0,0 +1,207 @@ +import pandas as pd +import numpy as np +import tensorflow as tf +import csv +import os +import matplotlib.pyplot as plt +import seaborn as sns + +'''设置数据源文件路径''' +# source_path = r'G:\data\SCADA数据\jb4q_8.csv' +source_path = "G:\data\SCADA数据\jb4q_8_delete_total_zero.csv" + +'''修改后的数据源存储路径''' +save_path = r'G:\data\SCADA数据\jb4q_8_delete_total_zero.csv' + +'''需要的列''' + + +# baseUseCols = ["num_gearbox_sumptemp","num_gearbox_inletoiltemp","num_gearbox_inletpress","num_gearbox_coolingwatertemp"] + +# target_path = r'G:\data\SCADA数据\华能三塘湖/dealed/后十万2018.01.16.csv' +# target_folder = r'G:\data\SCADA数据\华能三塘湖/dealed' + +#96748 107116 + + +# 生成文件夹 +def folderGenerate(folder_name): + if not os.path.exists(folder_name): + os.makedirs(folder_name) + + +# 皮尔逊相关系数 +def cal_correlation_coefficient(data, label): + print("计算皮尔逊相关系数") + pd_data = pd.DataFrame(data) + person = pd_data.corr() + print(person) + # 画热点图heatmap + # cmap = sns.heatmap(person, annot=True, xticklabels=label, yticklabels=label) + # plt.figure(1, figsize=(6.0, 2.68)) + # plt.subplots_adjust(left=0.1, right=0.94, bottom=0.2, top=0.9, wspace=None, + # hspace=None) + # plt.tight_layout() + # font1 = {'family': 'Times New Roman', 'weight': 'normal', 'size': 10} # 设置坐标标签的字体大小,字体 + # font2 = {'family': 'Times New Roman', 'weight': 'normal', 'size': 15} # 设置坐标标签的字体大小,字体 + # plt.xlabel("X", size=10,fontdict=font1) + # plt.ylabel("Y", size=10,fontdict=font1) + # plt.title("Heatmap of correlation coefficient matrix", size=20,fontdict=font1) + # + # # 调整色带的标签: + # cbar = cmap.collections[0].colorbar + # cbar.ax.tick_params(labelsize=15, labelcolor="black") + # cbar.ax.set_ylabel(ylabel="color scale", color="red", loc="center",fontdict=font2) + # + # plt.show() + return person + + +def get_most_N_correlation_coefficient(person, N=10): + print("获得相关度最高的{}个值".format(N)) + # total_correlation = person[1:, 1:] + abs_correlation = np.abs(person) + one = np.ones(shape=abs_correlation.shape) + two = np.subtract(one, abs_correlation) + rows, cols = two.shape + total_sum = [] + for i in range(cols): + # print(two[i]) + total = np.sum(two[i]) + total_sum.append(total) + + print("total_sum:", total_sum) + # 取最小的N个数,因为是与1减了以后的,越小相关系数越大 + print("arg:",np.argpartition(total_sum, N)) + min = np.argpartition(total_sum, N)[:N] + max = np.argpartition(total_sum, N)[total_sum.__len__() - N:] + print("min:",min) + return min + + +# 过滤或者线性填充 +def findIndexOfExceptPoint(data: pd.DataFrame): + # indexList2D = [] + # indexList = [] + # indexList2 = [] + # indexList3 = [] + # indexList4 = [] + indexList = [] + print("开始清洗") + for i in data.index: + if i % 10000 == 0: + print("已处理了{}条数据".format(i)) + ## 删除绝大多数0 + # if data['num_gearbox_sumptemp'][i] != 0 and (i < 416166 or i > 432766) and ( + # data['num_gearbox_pumpoutletpress'][i] == 0 or data['num_activepower'][i] == 0 or + # data['num_gen_torque'][i] == 0): + # indexList.append(i) + # 删除全部有0 + # if (i < 416166 or i > 432766) and ( + # data['num_gearbox_pumpoutletpress'][i] == 0 or data['num_activepower'][i] == 0 or + # data['num_gen_torque'][i] == 0): + # indexList.append(i) + # 只删除全部0 + if (i < 416166 or i > 432766) and ( + data['num_gearbox_sumptemp'][i] == 0 and data['num_gearbox_inletoiltemp'][i] == 0 and + data['num_gearbox_inletpress'][i] == 0): + indexList.append(i) + else: + pass + + # indexList2D = [indexList1, indexList2, indexList3, indexList4, indexList5] + indexList2D = set(indexList) + print("要移除的index:", indexList2D) + return indexList2D + + +# 根据index移除异常数据 +def removeDataByIndex(indexList, data): + print("开始移除异常index的数据") + a = 1 + data.drop(indexList, inplace=True) + # for i in indexList: + # try: + # data.drop([i], inplace=True) + # except: + # continue + # # print('第{}组\n'.format(a)) + # # a += 1 + return data + + +# 处理数据(移除,重新赋值,或者是其他操作) +def dealData(scada_data: pd.DataFrame): + # 是否保存处理好的数据 + Is_save = True + indexList = findIndexOfExceptPoint(scada_data) + removeDataByIndex(indexList=indexList, data=scada_data) + print("处理后的数据为:") + print(scada_data) + if Is_save: + print("============保存处理好的数据,路径为{}============".format(save_path)) + scada_data.to_csv(save_path, index=False, encoding='gbk') + + return scada_data + + +# 读取数据,转为numpy数组或者tf数组 +def read_data(file_name, isNew: bool = False): + ''' 导入数据 ''' + with open(file_name, 'r') as f: + if isNew: + # scada_data = pd.read_csv(f,low_memory=False, encoding='gbk', usecols=baseUseCols, parse_dates=['时间']) + scada_data = pd.read_csv(f, low_memory=False, encoding='gbk', parse_dates=['realtime']) + print(scada_data) + scada_data = dealData(scada_data=scada_data) + print(scada_data.head) + scada_data = np.array(scada_data) + else: + scada_data = np.loadtxt(f, str, delimiter=",") + label = scada_data[0, 4:] + label=list(['Gs','Gio','Gip','Gp','Gwt','En','Gft','Grt','Gwt','Et','Rs','Ap','Ws','Dw','Ges','Gt','Vx','Vy']) + print("导入数据成功,将数据转为numpy或tf数组...") + needed_data = scada_data[1:, 4:].astype(dtype=np.float) + ## needed_data = tf.cast(needed_data, tf.float32) tensor无法转为pd.DataFrame + print(needed_data) + print("转换成功,并返回...") + return needed_data, label + + +def plot_original_data(data): + rows, cols = data.shape + print("开始画图...") + + for i in range(cols): + plt.figure(i) + plt.plot(data[:, i]) + plt.show() + + +def execute(file_name=source_path,N=10): + needed_data, label = read_data(file_name=file_name, isNew=False) + print(needed_data) + print(needed_data.shape) + # plot_original_data(needed_data) + person = cal_correlation_coefficient(needed_data, label) + person = np.array(person) + min = get_most_N_correlation_coefficient(person, N=N) + + for index in min: + if index == min[0]: + total_data = np.expand_dims(needed_data[:, index], axis=-1) + else: + total_data = np.concatenate([total_data, np.expand_dims(needed_data[:, index], axis=-1)], axis=-1) + + return total_data + + +if __name__ == '__main__': + # total_data = execute(N=10, file_name=source_path) + # print(total_data) + # print(total_data.shape)7 10 13 + # 15中间有一段差别很大 + file_name='H:\data\SCADA数据\SCADA_已处理_粤水电达坂城2020.1月-5月/风机15.csv' + needed_data, label = read_data(file_name=file_name, isNew=False) + print(needed_data.shape) + plot_original_data(needed_data) diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/plot_raw_data.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/plot_raw_data.py new file mode 100644 index 0000000..b0258fa --- /dev/null +++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/data_deal/plot_raw_data.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- + +# coding: utf-8 + +''' +@Author : dingjiawen +@Date : 2022/11/2 12:59 +@Usage : 画原始数据 +@Desc : +''' +import pandas as pd +import numpy as np + + + + +source_path = "G:\data\SCADA数据\jb4q_8_delete_total_zero.csv" + +def deal_data(file_name=source_path): + ''' 导入数据 ''' + with open(file_name, 'r') as f: + scada_data = np.loadtxt(f, str, delimiter=",") + label = scada_data[0, 3:] + label = list( + ['Gs', 'Gio', 'Gip', 'Gp', 'Gwt', 'En', 'Gft', 'Grt', 'Gwt', 'Et', 'Rs', 'Ap', 'Ws', 'Dw', 'Ges', 'Gt', + 'Vx', 'Vy']) + print("导入数据成功,将数据转为numpy或tf数组...") + needed_data = scada_data[1:37000, 3:].astype(dtype=np.float) + ## needed_data = tf.cast(needed_data, tf.float32) tensor无法转为pd.DataFrame + print(needed_data) + print("转换成功,并返回...") + return needed_data, label + pass + + +# 归一化 +def normalization(data): + rows, cols = data.shape + print("归一化之前:", data) + print(data.shape) + print("======================") + + # 归一化 + max = np.max(data, axis=0) + max = np.broadcast_to(max, [rows, cols]) + min = np.min(data, axis=0) + min = np.broadcast_to(min, [rows, cols]) + + data = (data - min) / (max - min) + print("归一化之后:", data) + print(data.shape) + + return data + + +if __name__ == '__main__': + needed_data, label=deal_data() + data=normalization(data=needed_data) + np.savetxt('G:\data\SCADA数据/normalization.csv',data,delimiter=',') + print(data.shape) \ No newline at end of file diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/others_idea/CNN_GRU.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/others_idea/CNN_GRU.py new file mode 100644 index 0000000..e2bb0f0 --- /dev/null +++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/others_idea/CNN_GRU.py @@ -0,0 +1,262 @@ +import tensorflow as tf +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from condition_monitoring.data_deal import loadData +from keras.callbacks import EarlyStopping +import os +import shutil + +# 孔师兄idea:CNN+GRU + + +'''超参数设置''' +time_stamp = 120 +feature_num = 10 +batch_size = 8 +learning_rate = 0.01 +EPOCH = 101 +model_name = "CNN_GRU" +'''EWMA超参数''' +K = 18 +namuda = 0.01 +'''保存名称''' +save_name = "../model/{0}_timestamp{1}_featureNum{2}_batch_size{3}_Epoch{4}.h5".format(model_name, + time_stamp, feature_num, + batch_size, EPOCH) +'''文件名''' +file_name = "G:\data\SCADA数据\jb4q_8_delete_all_zero.csv" + +def remove(data, time_stamp=time_stamp): + rows, cols = data.shape + print("remove_data.shape:", data.shape) + num = int(rows / time_stamp) + + return data[:num * time_stamp, :] + pass + + +# 不重叠采样 +def get_training_data(data, time_stamp=time_stamp): + removed_data = remove(data=data) + rows, cols = removed_data.shape + # print("removed_data.shape:", data.shape) + # print("removed_data:", removed_data) + train_data = np.reshape(removed_data, [-1, time_stamp, cols]) + # print("train_data:", train_data) + batchs, time_stamp, cols = train_data.shape + + for i in range(1, batchs): + each_label = np.expand_dims(train_data[i, 0, :], axis=0) + if i == 1: + train_label = each_label + else: + train_label = np.concatenate([train_label, each_label], axis=0) + + # print("train_data.shape:", train_data.shape) + # print("train_label.shape", train_label.shape) + return train_data[:-1, :], train_label + + +# 重叠采样 +def get_training_data_overlapping(data,time_stamp=time_stamp): + + rows,cols = data.shape + train_data = np.empty(shape=[rows-time_stamp-1,time_stamp,cols]) + train_label = np.empty(shape=[rows-time_stamp-1,cols]) + for i in range(rows): + if i +time_stamp >= rows: + break + if i + time_stamp < rows - 1: + train_data[i] = data[i:i+time_stamp] + train_label[i] = data[i+time_stamp] + + print("重叠采样以后:") + print("data:",train_data) + print("label:",train_label) + + return train_data,train_label + + + +def condition_monitoring_model(): + input = tf.keras.Input(shape=[time_stamp, feature_num]) + conv1 = tf.keras.layers.Conv1D(filters=256, kernel_size=1)(input) + GRU1 = tf.keras.layers.GRU(128, return_sequences=False)(conv1) + d1 = tf.keras.layers.Dense(300)(GRU1) + output = tf.keras.layers.Dense(10)(d1) + model = tf.keras.Model(inputs=input, outputs=output) + + return model + + +# 归一化 +def normalization(data): + rows, cols = data.shape + print("归一化之前:", data) + print(data.shape) + print("======================") + + # 归一化 + max = np.max(data, axis=0) + max = np.broadcast_to(max, [rows, cols]) + min = np.min(data, axis=0) + min = np.broadcast_to(min, [rows, cols]) + + data = (data - min) / (max - min) + print("归一化之后:", data) + print(data.shape) + + return data + + +# 正则化 +def Regularization(data): + rows, cols = data.shape + print("正则化之前:", data) + print(data.shape) + print("======================") + + # 正则化 + mean = np.mean(data, axis=0) + mean = np.broadcast_to(mean, shape=[rows, cols]) + dst = np.sqrt(np.var(data, axis=0)) + dst = np.broadcast_to(dst, shape=[rows, cols]) + data = (data - mean) / dst + print("正则化之后:", data) + print(data.shape) + + return data + pass + + +def EWMA(data, K=K, namuda=namuda): + # t是啥暂时未知 + t = 0 + mid = np.mean(data, axis=0) + standard = np.sqrt(np.var(data, axis=0)) + UCL = mid + K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t)) + LCL = mid - K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t)) + return mid, UCL, LCL + pass + + +def get_MSE(data, label, new_model): + predicted_data = new_model.predict(data) + + temp = np.abs(predicted_data - label) + temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape)) + temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape) + temp3 = temp1/temp2 + mse = np.sum((temp1 / temp2) ** 2, axis=1) + print("z:", mse) + print(mse.shape) + + # mse=np.mean((predicted_data-label)**2,axis=1) + print("mse", mse) + + dims, = mse.shape + + mean = np.mean(mse) + std = np.sqrt(np.var(mse)) + max = mean + 3 * std + # min = mean-3*std + max = np.broadcast_to(max, shape=[dims, ]) + # min = np.broadcast_to(min,shape=[dims,]) + mean = np.broadcast_to(mean, shape=[dims, ]) + + # plt.plot(max) + # plt.plot(mse) + # plt.plot(mean) + # # plt.plot(min) + # plt.show() + # + # + return mse,mean,max + # pass + + +if __name__ == '__main__': + total_data = loadData.execute(N=feature_num,file_name=file_name) + total_data = normalization(data=total_data) + train_data, train_label = get_training_data_overlapping(total_data[:300455, :]) + + ## TODO training + # model = condition_monitoring_model() + # checkpoint = tf.keras.callbacks.ModelCheckpoint( + # filepath=save_name, + # monitor='val_loss', + # verbose=1, + # save_best_only=True, + # mode='min', + # period=1) + # lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.0001) + # early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=30, mode='min', verbose=1) + # model.compile(optimizer=tf.optimizers.Adam(learning_rate=learning_rate), loss=tf.losses.mse) + # model.summary() + # model.fit(train_data, train_label, batch_size=batch_size, epochs=EPOCH, validation_split=0.1, + # callbacks=[checkpoint, lr_scheduler, early_stop]) + + ## TODO testing + print("===============================") + print(total_data.shape) + print("===============================") + test_data, test_label = get_training_data(total_data[:300455, :]) + newModel = tf.keras.models.load_model(save_name) + mse,mean,max = get_MSE(test_data, test_label, new_model=newModel) + print("===============================") + print("mse:",mse) + print(mse.shape) + print("===============================") + + + test_data, test_label = get_training_data(total_data[20000:, :]) + predicted_data = newModel.predict(test_data) + rows, cols = predicted_data.shape + print("=====================================") + print(predicted_data) + print(predicted_data.shape) + print("=====================================") + + temp = np.abs(predicted_data - test_label) + temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape)) + temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape) + temp3 = temp1 / temp2 + mse = np.sum((temp1 / temp2) ** 2, axis=1) + print("====================") + print("new_mse:",mse) + print(mse.shape) + np.savetxt("mse", mse, delimiter=',') + print("===================") + + plt.plot(mse[2000:]) + plt.plot(mean) + plt.plot(max) + plt.show() + + + + + + + data = pd.DataFrame(mse).ewm(span=3).mean() + print(data) + data =np.array(data) + + index,_ = data.shape + + + + for i in range(2396): + if data[i,0] >5: + data[i,0] = data[i-1,:] + print(data) + mean = data[2000:2396,:].mean() + std = data[2000:2396,:].std() + mean=np.broadcast_to(mean,shape=[500,]) + std=np.broadcast_to(std,shape=[500,]) + plt.plot(data[2000:2396,:]) + plt.plot(mean) + plt.plot(mean+3*std) + plt.plot(mean-3*std) + plt.show() diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/others_idea/__init__.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/others_idea/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring.py new file mode 100644 index 0000000..939628a --- /dev/null +++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring.py @@ -0,0 +1,526 @@ +# -*- coding: utf-8 -*- + +# coding: utf-8 +import tensorflow as tf +import tensorflow.keras +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from model.DepthwiseCon1D.DepthwiseConv1D import DepthwiseConv1D +from model.Dynamic_channelAttention.Dynamic_channelAttention import DynamicChannelAttention +from condition_monitoring.data_deal import loadData +from model.Joint_Monitoring.Joint_Monitoring2 import Joint_Monitoring + +from model.CommonFunction.CommonFunction import * +from sklearn.model_selection import train_test_split +from tensorflow.keras.models import load_model, save_model + +''' +@Author : dingjiawen +@Date : 2022/7/8 10:29 +@Usage : 尝试将预测和分类两种方式相结合,联合监测 +@Desc :REPVGG+unsampling+GRU进行重构,后面接GDP=全局动态池化+分类器 +随epoch衰减的MSELoss+随epoch增强的crossEntropy +''' + +'''超参数设置''' +time_stamp = 120 +feature_num = 10 +batch_size = 16 +learning_rate = 0.001 +EPOCH = 101 +model_name = "joint" +'''EWMA超参数''' +K = 18 +namuda = 0.01 +'''保存名称''' + +save_name = "../model/weight/{0}_timestamp{1}_feature{2}_Epoch{4}_weight/weight".format(model_name, + time_stamp, + feature_num, + batch_size, + EPOCH) +save_step_two_name = "../model/two_weight/{0}_timestamp{1}_feature{2}_weight/weight".format(model_name, + time_stamp, + feature_num, + batch_size, + EPOCH) + +# save_name = "../model/joint/{0}_timestamp{1}_feature{2}.h5".format(model_name, +# time_stamp, +# feature_num, +# batch_size, +# EPOCH) +# save_step_two_name = "../model/joint_two/{0}_timestamp{1}_feature{2}.h5".format(model_name, +# time_stamp, +# feature_num, +# batch_size, +# EPOCH) +'''文件名''' +file_name = "G:\data\SCADA数据\jb4q_8_delete_all_zero.csv" + +''' +文件说明:jb4q_8_delete_all_zero.csv是删除了除异常以外的所有0值的文件 +文件从0:300454行均是正常值(2019/7.30 00:00:00 - 2019/9/18 11:21:00) +从300455:317052行均是异常值(2019/9/18 11:21:01 - 2019/9/29 23:59:00) +''' +'''文件参数''' +# 最后正常的时间点 +healthy_date = 300454 +# 最后异常的时间点 +unhealthy_date = 317052 +# 异常容忍程度 +unhealthy_patience = 5 + + +def remove(data, time_stamp=time_stamp): + rows, cols = data.shape + print("remove_data.shape:", data.shape) + num = int(rows / time_stamp) + + return data[:num * time_stamp, :] + pass + + +# 不重叠采样 +def get_training_data(data, time_stamp: int = time_stamp): + removed_data = remove(data=data) + rows, cols = removed_data.shape + print("removed_data.shape:", data.shape) + print("removed_data:", removed_data) + train_data = np.reshape(removed_data, [-1, time_stamp, cols]) + print("train_data:", train_data) + batchs, time_stamp, cols = train_data.shape + + for i in range(1, batchs): + each_label = np.expand_dims(train_data[i, 0, :], axis=0) + if i == 1: + train_label = each_label + else: + train_label = np.concatenate([train_label, each_label], axis=0) + + print("train_data.shape:", train_data.shape) + print("train_label.shape", train_label.shape) + return train_data[:-1, :], train_label + + +# 重叠采样 +def get_training_data_overlapping(data, time_stamp: int = time_stamp, is_Healthy: bool = True): + rows, cols = data.shape + train_data = np.empty(shape=[rows - time_stamp - 1, time_stamp, cols]) + train_label = np.empty(shape=[rows - time_stamp - 1, cols]) + for i in range(rows): + if i + time_stamp >= rows: + break + if i + time_stamp < rows - 1: + train_data[i] = data[i:i + time_stamp] + train_label[i] = data[i + time_stamp] + + print("重叠采样以后:") + print("data:", train_data) # (300334,120,10) + print("label:", train_label) # (300334,10) + + if is_Healthy: + train_label2 = np.ones(shape=[train_label.shape[0]]) + else: + train_label2 = np.zeros(shape=[train_label.shape[0]]) + + print("label2:", train_label2) + + return train_data, train_label, train_label2 + + +# RepConv重参数化卷积 +def RepConv(input_tensor, k=3): + _, _, output_dim = input_tensor.shape + conv1 = tf.keras.layers.Conv1D(filters=output_dim, kernel_size=k, strides=1, padding='SAME')(input_tensor) + b1 = tf.keras.layers.BatchNormalization()(conv1) + + conv2 = tf.keras.layers.Conv1D(filters=output_dim, kernel_size=1, strides=1, padding='SAME')(input_tensor) + b2 = tf.keras.layers.BatchNormalization()(conv2) + + b3 = tf.keras.layers.BatchNormalization()(input_tensor) + + out = tf.keras.layers.Add()([b1, b2, b3]) + out = tf.nn.relu(out) + return out + + +# RepBlock模块 +def RepBlock(input_tensor, num: int = 3): + for i in range(num): + input_tensor = RepConv(input_tensor) + return input_tensor + + +# GAP 全局平均池化 +def Global_avg_channelAttention(input_tensor): + _, length, channel = input_tensor.shape + DWC1 = DepthwiseConv1D(kernel_size=1, padding='SAME')(input_tensor) + GAP = tf.keras.layers.GlobalAvgPool1D()(DWC1) + c1 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GAP) + s1 = tf.nn.sigmoid(c1) + output = tf.multiply(input_tensor, s1) + return output + + +# GDP 全局动态池化 +def Global_Dynamic_channelAttention(input_tensor): + _, length, channel = input_tensor.shape + DWC1 = DepthwiseConv1D(kernel_size=1, padding='SAME')(input_tensor) + + # GAP + GAP = tf.keras.layers.GlobalAvgPool1D()(DWC1) + c1 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GAP) + s1 = tf.nn.sigmoid(c1) + + # GMP + GMP = tf.keras.layers.GlobalMaxPool1D()(DWC1) + c2 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GMP) + s3 = tf.nn.sigmoid(c2) + + output = tf.multiply(input_tensor, s1) + return output + + +# 归一化 +def normalization(data): + rows, cols = data.shape + print("归一化之前:", data) + print(data.shape) + print("======================") + + # 归一化 + max = np.max(data, axis=0) + max = np.broadcast_to(max, [rows, cols]) + min = np.min(data, axis=0) + min = np.broadcast_to(min, [rows, cols]) + + data = (data - min) / (max - min) + print("归一化之后:", data) + print(data.shape) + + return data + + +# 正则化 +def Regularization(data): + rows, cols = data.shape + print("正则化之前:", data) + print(data.shape) + print("======================") + + # 正则化 + mean = np.mean(data, axis=0) + mean = np.broadcast_to(mean, shape=[rows, cols]) + dst = np.sqrt(np.var(data, axis=0)) + dst = np.broadcast_to(dst, shape=[rows, cols]) + data = (data - mean) / dst + print("正则化之后:", data) + print(data.shape) + + return data + pass + + +def EWMA(data, K=K, namuda=namuda): + # t是啥暂时未知 + t = 0 + mid = np.mean(data, axis=0) + standard = np.sqrt(np.var(data, axis=0)) + UCL = mid + K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t)) + LCL = mid - K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t)) + return mid, UCL, LCL + pass + + +def get_MSE(data, label, new_model): + predicted_data = new_model.predict(data) + + temp = np.abs(predicted_data - label) + temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape)) + temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape) + temp3 = temp1 / temp2 + mse = np.sum((temp1 / temp2) ** 2, axis=1) + print("z:", mse) + print(mse.shape) + + # mse=np.mean((predicted_data-label)**2,axis=1) + print("mse", mse) + + dims, = mse.shape + + mean = np.mean(mse) + std = np.sqrt(np.var(mse)) + max = mean + 3 * std + # min = mean-3*std + max = np.broadcast_to(max, shape=[dims, ]) + # min = np.broadcast_to(min,shape=[dims,]) + mean = np.broadcast_to(mean, shape=[dims, ]) + + # plt.plot(max) + # plt.plot(mse) + # plt.plot(mean) + # # plt.plot(min) + # plt.show() + # + # + return mse, mean, max + # pass + + +def condition_monitoring_model(): + input = tf.keras.Input(shape=[time_stamp, feature_num]) + conv1 = tf.keras.layers.Conv1D(filters=256, kernel_size=1)(input) + GRU1 = tf.keras.layers.GRU(128, return_sequences=False)(conv1) + d1 = tf.keras.layers.Dense(300)(GRU1) + output = tf.keras.layers.Dense(10)(d1) + + model = tf.keras.Model(inputs=input, outputs=output) + + return model + + +# trian_data:(300455,120,10) +# trian_label1:(300455,10) +# trian_label2:(300455,) +def shuffle(train_data, train_label1, train_label2, is_split: bool = False, split_size: float = 0.2): + (train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(train_data, + train_label1, + train_label2, + test_size=split_size, + shuffle=True, + random_state=100) + if is_split: + return train_data, train_label1, train_label2, test_data, test_label1, test_label2 + train_data = np.concatenate([train_data, test_data], axis=0) + train_label1 = np.concatenate([train_label1, test_label1], axis=0) + train_label2 = np.concatenate([train_label2, test_label2], axis=0) + # print(train_data.shape) + # print(train_label1.shape) + # print(train_label2.shape) + # print(train_data.shape) + + return train_data, train_label1, train_label2 + pass + + +def split_test_data(healthy_data, healthy_label1, healthy_label2, unhealthy_data, unhealthy_label1, unhealthy_label2, + split_size: float = 0.2): + data = np.concatenate([healthy_data, unhealthy_data], axis=0) + label1 = np.concatenate([healthy_label1, unhealthy_label1], axis=0) + label2 = np.concatenate([healthy_label2, unhealthy_label2], axis=0) + (train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(data, + label1, + label2, + test_size=split_size, + shuffle=True, + random_state=100) + + # print(train_data.shape) + # print(train_label1.shape) + # print(train_label2.shape) + # print(train_data.shape) + + return train_data, train_label1, train_label2, test_data, test_label1, test_label2 + + pass + + +# trian_data:(300455,120,10) +# trian_label1:(300455,10) +# trian_label2:(300455,) +def train_step_one(train_data, train_label1, train_label2): + model = Joint_Monitoring() + # # # # TODO 需要运行编译一次,才能打印model.summary() + # model.build(input_shape=(batch_size, filter_num, dims)) + # model.summary() + history_loss = [] + history_val_loss = [] + learning_rate = 1e-3 + for epoch in range(EPOCH): + + print() + print("EPOCH:", epoch, "/", EPOCH, ":") + train_data, train_label1, train_label2 = shuffle(train_data, train_label1, train_label2) + if epoch == 0: + train_data, train_label1, train_label2, val_data, val_label1, val_label2 = shuffle(train_data, train_label1, + train_label2, + is_split=True) + # print() + # print("EPOCH:", epoch, "/", EPOCH, ":") + # 用于让train知道,这是这个epoch中的第几次训练 + z = 0 + # 用于batch_size次再训练 + k = 1 + for data_1, label_1, label_2 in zip(train_data, train_label1, train_label2): + size, _, _ = train_data.shape + data_1 = tf.expand_dims(data_1, axis=0) + label_1 = tf.expand_dims(label_1, axis=0) + label_2 = tf.expand_dims(label_2, axis=0) + if batch_size != 1: + if k % batch_size == 1: + data = data_1 + label1 = label_1 + label2 = label_2 + else: + data = tf.concat([data, data_1], axis=0) + label1 = tf.concat([label1, label_1], axis=0) + label2 = tf.concat([label2, label_2], axis=0) + else: + data = data_1 + label1 = label_1 + label2 = label_2 + + if k % batch_size == 0: + # label = tf.expand_dims(label, axis=-1) + loss_value = model.train(input_tensor=data, label1=label1, label2=label2, learning_rate=learning_rate, + is_first_time=True) + print(z * batch_size, "/", size, ":===============>", "loss:", loss_value.numpy()) + k = 0 + z = z + 1 + k = k + 1 + val_loss = model.get_val_loss(val_data=val_data, val_label1=val_label1, val_label2=val_label2, + is_first_time=True) + SaveBestModel(model=model, save_name=save_name, history_loss=history_val_loss, loss_value=val_loss.numpy()) + # SaveBestH5Model(model=model, save_name=save_name, history_loss=history_val_loss, loss_value=val_loss.numpy()) + history_val_loss.append(val_loss) + history_loss.append(loss_value.numpy()) + print('Training loss is :', loss_value.numpy()) + print('Validating loss is :', val_loss.numpy()) + if IsStopTraining(history_loss=history_val_loss, patience=7): + break + if Is_Reduce_learning_rate(history_loss=history_val_loss, patience=3): + if learning_rate >= 1e-4: + learning_rate = learning_rate * 0.1 + pass + + +def train_step_two(step_one_model, step_two_model, train_data, train_label1, train_label2): + # step_two_model = Joint_Monitoring() + # step_two_model.build(input_shape=(batch_size, time_stamp, feature_num)) + # step_two_model.summary() + history_loss = [] + history_val_loss = [] + history_accuracy = [] + learning_rate = 1e-3 + for epoch in range(EPOCH): + print() + print("EPOCH:", epoch, "/", EPOCH, ":") + train_data, train_label1, train_label2 = shuffle(train_data, train_label1, train_label2) + if epoch == 0: + train_data, train_label1, train_label2, val_data, val_label1, val_label2 = shuffle(train_data, train_label1, + train_label2, + is_split=True) + # print() + # print("EPOCH:", epoch, "/", EPOCH, ":") + # 用于让train知道,这是这个epoch中的第几次训练 + z = 0 + # 用于batch_size次再训练 + k = 1 + accuracy_num = 0 + for data_1, label_1, label_2 in zip(train_data, train_label1, train_label2): + size, _, _ = train_data.shape + data_1 = tf.expand_dims(data_1, axis=0) + label_1 = tf.expand_dims(label_1, axis=0) + label_2 = tf.expand_dims(label_2, axis=0) + if batch_size != 1: + if k % batch_size == 1: + data = data_1 + label1 = label_1 + label2 = label_2 + else: + data = tf.concat([data, data_1], axis=0) + label1 = tf.concat([label1, label_1], axis=0) + label2 = tf.concat([label2, label_2], axis=0) + else: + data = data_1 + label1 = label_1 + label2 = label_2 + + if k % batch_size == 0: + # label = tf.expand_dims(label, axis=-1) + output1, output2, output3, _ = step_one_model.call(inputs=data, is_first_time=True) + loss_value, accuracy_value = step_two_model.train(input_tensor=data, label1=label1, label2=label2, + learning_rate=learning_rate, + is_first_time=False, pred_3=output1, pred_4=output2, + pred_5=output3) + accuracy_num += accuracy_value + print(z * batch_size, "/", size, ":===============>", "loss:", loss_value.numpy(), "| accuracy:", + accuracy_num / ((z + 1) * batch_size)) + k = 0 + z = z + 1 + k = k + 1 + + val_loss, val_accuracy = step_two_model.get_val_loss(val_data=val_data, val_label1=val_label1, + val_label2=val_label2, + is_first_time=False, step_one_model=step_one_model) + SaveBestModelByAccuracy(model=step_two_model, save_name=save_step_two_name, history_accuracy=history_accuracy, + accuracy_value=val_accuracy) + history_val_loss.append(val_loss) + history_loss.append(loss_value.numpy()) + print('Training loss is : {0} | Training accuracy is : {1}'.format(loss_value.numpy(), + accuracy_num / ((z + 1) * batch_size))) + print('Validating loss is : {0} | Validating accuracy is : {1}'.format(val_loss.numpy(), val_accuracy)) + if IsStopTraining(history_loss=history_val_loss, patience=7): + break + if Is_Reduce_learning_rate(history_loss=history_val_loss, patience=3): + if learning_rate >= 1e-4: + learning_rate = learning_rate * 0.1 + pass + + +def test(step_one_model, step_two_model, test_data, test_label1, test_label2): + history_loss = [] + history_val_loss = [] + + val_loss, val_accuracy = step_two_model.get_val_loss(val_data=test_data, val_label1=test_label1, + val_label2=test_label2, + is_first_time=False, step_one_model=step_one_model) + + history_val_loss.append(val_loss) + print("val_accuracy:", val_accuracy) + print("val_loss:", val_loss) + + +if __name__ == '__main__': + total_data = loadData.execute(N=feature_num, file_name=file_name) + total_data = normalization(data=total_data) + train_data_healthy, train_label1_healthy, train_label2_healthy = get_training_data_overlapping( + total_data[:healthy_date, :], is_Healthy=True) + train_data_unhealthy, train_label1_unhealthy, train_label2_unhealthy = get_training_data_overlapping( + total_data[healthy_date - time_stamp + unhealthy_patience:unhealthy_date, :], + is_Healthy=False) + # TODO 第一步训练 + # 单次测试 + # train_step_one(train_data=train_data_healthy[:32, :, :], train_label1=train_label1_healthy[:32, :],train_label2=train_label2_healthy[:32, ]) + # train_step_one(train_data=train_data_healthy, train_label1=train_label1_healthy,train_label2=train_label2_healthy) + + # 导入第一步已经训练好的模型,一个继续训练,一个只输出结果 + step_one_model = Joint_Monitoring() + step_one_model.load_weights(save_name) + # + # step_two_model = Joint_Monitoring() + # step_two_model.load_weights(save_name) + + # TODO 第二步训练 + ### healthy_data.shape: (300333,120,10) + ### unhealthy_data.shape: (16594,10) + healthy_size, _, _ = train_data_healthy.shape + unhealthy_size, _, _ = train_data_unhealthy.shape + train_data, train_label1, train_label2, test_data, test_label1, test_label2 = split_test_data( + healthy_data=train_data_healthy[healthy_size - 2 * unhealthy_size:, :, :], + healthy_label1=train_label1_healthy[healthy_size - 2 * unhealthy_size:, :], + healthy_label2=train_label2_healthy[healthy_size - 2 * unhealthy_size:, ], unhealthy_data=train_data_unhealthy, + unhealthy_label1=train_label1_unhealthy, unhealthy_label2=train_label2_unhealthy) + # train_step_two(step_one_model=step_one_model, step_two_model=step_two_model, + # train_data=train_data, + # train_label1=train_label1, train_label2=np.expand_dims(train_label2, axis=-1)) + + # TODO 测试测试集 + step_two_model = Joint_Monitoring() + step_two_model.load_weights(save_step_two_name) + test(step_one_model=step_one_model, step_two_model=step_two_model, test_data=test_data, test_label1=test_label1, + test_label2=np.expand_dims(test_label2, axis=-1)) + + pass diff --git a/TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring_hard.py b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring_hard.py new file mode 100644 index 0000000..7ab3cf3 --- /dev/null +++ b/TensorFlow_eaxmple/Model_train_test/condition_monitoring/self_try/Joint_Monitoring_hard.py @@ -0,0 +1,576 @@ +# -*- coding: utf-8 -*- + +# coding: utf-8 +import tensorflow as tf +import tensorflow.keras +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +from model.DepthwiseCon1D.DepthwiseConv1D import DepthwiseConv1D +from model.Dynamic_channelAttention.Dynamic_channelAttention import DynamicChannelAttention +from condition_monitoring.data_deal import loadData +from model.Joint_Monitoring.Joint_Monitoring3 import Joint_Monitoring + +from model.CommonFunction.CommonFunction import * +from sklearn.model_selection import train_test_split +from tensorflow.keras.models import load_model, save_model + +''' +@Author : dingjiawen +@Date : 2022/7/8 10:29 +@Usage : 尝试将预测和分类两种方式相结合,联合监测 +@Desc :REPVGG+unsampling+GRU进行重构,后面接GDP=全局动态池化+分类器 +随epoch衰减的MSELoss+随epoch增强的crossEntropy +''' + +'''超参数设置''' +time_stamp = 120 +feature_num = 10 +batch_size = 16 +learning_rate = 0.001 +EPOCH = 101 +model_name = "joint" +'''EWMA超参数''' +K = 18 +namuda = 0.01 +'''保存名称''' + +save_name = "../hard_model/weight/{0}_timestamp{1}_feature{2}_weight_epoch8/weight".format(model_name, + time_stamp, + feature_num, + batch_size, + EPOCH) +save_step_two_name = "../hard_model/two_weight/{0}_timestamp{1}_feature{2}_weight_epoch14/weight".format(model_name, + time_stamp, + feature_num, + batch_size, + EPOCH) + +# save_name = "../model/joint/{0}_timestamp{1}_feature{2}.h5".format(model_name, +# time_stamp, +# feature_num, +# batch_size, +# EPOCH) +# save_step_two_name = "../model/joint_two/{0}_timestamp{1}_feature{2}.h5".format(model_name, +# time_stamp, +# feature_num, +# batch_size, +# EPOCH) +'''文件名''' +file_name = "G:\data\SCADA数据\jb4q_8_delete_total_zero.csv" + +''' +文件说明:jb4q_8_delete_total_zero.csv是删除了只删除了全是0的列的文件 +文件从0:415548行均是正常值(2019/7.30 00:00:00 - 2019/9/18 11:14:00) +从415549:432153行均是异常值(2019/9/18 11:21:01 - 2021/1/18 00:00:00) +''' +'''文件参数''' +# 最后正常的时间点 +healthy_date = 415548 +# 最后异常的时间点 +unhealthy_date = 432153 +# 异常容忍程度 +unhealthy_patience = 5 + + +def remove(data, time_stamp=time_stamp): + rows, cols = data.shape + print("remove_data.shape:", data.shape) + num = int(rows / time_stamp) + + return data[:num * time_stamp, :] + pass + + +# 不重叠采样 +def get_training_data(data, time_stamp: int = time_stamp): + removed_data = remove(data=data) + rows, cols = removed_data.shape + print("removed_data.shape:", data.shape) + print("removed_data:", removed_data) + train_data = np.reshape(removed_data, [-1, time_stamp, cols]) + print("train_data:", train_data) + batchs, time_stamp, cols = train_data.shape + + for i in range(1, batchs): + each_label = np.expand_dims(train_data[i, 0, :], axis=0) + if i == 1: + train_label = each_label + else: + train_label = np.concatenate([train_label, each_label], axis=0) + + print("train_data.shape:", train_data.shape) + print("train_label.shape", train_label.shape) + return train_data[:-1, :], train_label + + +# 重叠采样 +def get_training_data_overlapping(data, time_stamp: int = time_stamp, is_Healthy: bool = True): + rows, cols = data.shape + train_data = np.empty(shape=[rows - time_stamp - 1, time_stamp, cols]) + train_label = np.empty(shape=[rows - time_stamp - 1, cols]) + for i in range(rows): + if i + time_stamp >= rows: + break + if i + time_stamp < rows - 1: + train_data[i] = data[i:i + time_stamp] + train_label[i] = data[i + time_stamp] + + print("重叠采样以后:") + print("data:", train_data) # (300334,120,10) + print("label:", train_label) # (300334,10) + + if is_Healthy: + train_label2 = np.ones(shape=[train_label.shape[0]]) + else: + train_label2 = np.zeros(shape=[train_label.shape[0]]) + + print("label2:", train_label2) + + return train_data, train_label, train_label2 + + +# RepConv重参数化卷积 +def RepConv(input_tensor, k=3): + _, _, output_dim = input_tensor.shape + conv1 = tf.keras.layers.Conv1D(filters=output_dim, kernel_size=k, strides=1, padding='SAME')(input_tensor) + b1 = tf.keras.layers.BatchNormalization()(conv1) + + conv2 = tf.keras.layers.Conv1D(filters=output_dim, kernel_size=1, strides=1, padding='SAME')(input_tensor) + b2 = tf.keras.layers.BatchNormalization()(conv2) + + b3 = tf.keras.layers.BatchNormalization()(input_tensor) + + out = tf.keras.layers.Add()([b1, b2, b3]) + out = tf.nn.relu(out) + return out + + +# RepBlock模块 +def RepBlock(input_tensor, num: int = 3): + for i in range(num): + input_tensor = RepConv(input_tensor) + return input_tensor + + +# GAP 全局平均池化 +def Global_avg_channelAttention(input_tensor): + _, length, channel = input_tensor.shape + DWC1 = DepthwiseConv1D(kernel_size=1, padding='SAME')(input_tensor) + GAP = tf.keras.layers.GlobalAvgPool1D()(DWC1) + c1 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GAP) + s1 = tf.nn.sigmoid(c1) + output = tf.multiply(input_tensor, s1) + return output + + +# GDP 全局动态池化 +def Global_Dynamic_channelAttention(input_tensor): + _, length, channel = input_tensor.shape + DWC1 = DepthwiseConv1D(kernel_size=1, padding='SAME')(input_tensor) + + # GAP + GAP = tf.keras.layers.GlobalAvgPool1D()(DWC1) + c1 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GAP) + s1 = tf.nn.sigmoid(c1) + + # GMP + GMP = tf.keras.layers.GlobalMaxPool1D()(DWC1) + c2 = tf.keras.layers.Conv1D(filters=channel, kernel_size=1, padding='SAME')(GMP) + s3 = tf.nn.sigmoid(c2) + + output = tf.multiply(input_tensor, s1) + return output + + +# 归一化 +def normalization(data): + rows, cols = data.shape + print("归一化之前:", data) + print(data.shape) + print("======================") + + # 归一化 + max = np.max(data, axis=0) + max = np.broadcast_to(max, [rows, cols]) + min = np.min(data, axis=0) + min = np.broadcast_to(min, [rows, cols]) + + data = (data - min) / (max - min) + print("归一化之后:", data) + print(data.shape) + + return data + + +# 正则化 +def Regularization(data): + rows, cols = data.shape + print("正则化之前:", data) + print(data.shape) + print("======================") + + # 正则化 + mean = np.mean(data, axis=0) + mean = np.broadcast_to(mean, shape=[rows, cols]) + dst = np.sqrt(np.var(data, axis=0)) + dst = np.broadcast_to(dst, shape=[rows, cols]) + data = (data - mean) / dst + print("正则化之后:", data) + print(data.shape) + + return data + pass + + +def EWMA(data, K=K, namuda=namuda): + # t是啥暂时未知 + t = 0 + mid = np.mean(data, axis=0) + standard = np.sqrt(np.var(data, axis=0)) + UCL = mid + K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t)) + LCL = mid - K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t)) + return mid, UCL, LCL + pass + + +def get_MSE(data, label, new_model): + predicted_data = new_model.predict(data) + + temp = np.abs(predicted_data - label) + temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape)) + temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape) + temp3 = temp1 / temp2 + mse = np.sum((temp1 / temp2) ** 2, axis=1) + print("z:", mse) + print(mse.shape) + + # mse=np.mean((predicted_data-label)**2,axis=1) + print("mse", mse) + + dims, = mse.shape + + mean = np.mean(mse) + std = np.sqrt(np.var(mse)) + max = mean + 3 * std + # min = mean-3*std + max = np.broadcast_to(max, shape=[dims, ]) + # min = np.broadcast_to(min,shape=[dims,]) + mean = np.broadcast_to(mean, shape=[dims, ]) + + # plt.plot(max) + # plt.plot(mse) + # plt.plot(mean) + # # plt.plot(min) + # plt.show() + # + # + return mse, mean, max + # pass + + +def condition_monitoring_model(): + input = tf.keras.Input(shape=[time_stamp, feature_num]) + conv1 = tf.keras.layers.Conv1D(filters=256, kernel_size=1)(input) + GRU1 = tf.keras.layers.GRU(128, return_sequences=False)(conv1) + d1 = tf.keras.layers.Dense(300)(GRU1) + output = tf.keras.layers.Dense(10)(d1) + + model = tf.keras.Model(inputs=input, outputs=output) + + return model + + +# trian_data:(300455,120,10) +# trian_label1:(300455,10) +# trian_label2:(300455,) +def shuffle(train_data, train_label1, train_label2, is_split: bool = False, split_size: float = 0.2): + (train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(train_data, + train_label1, + train_label2, + test_size=split_size, + shuffle=True, + random_state=100) + if is_split: + return train_data, train_label1, train_label2, test_data, test_label1, test_label2 + train_data = np.concatenate([train_data, test_data], axis=0) + train_label1 = np.concatenate([train_label1, test_label1], axis=0) + train_label2 = np.concatenate([train_label2, test_label2], axis=0) + # print(train_data.shape) + # print(train_label1.shape) + # print(train_label2.shape) + # print(train_data.shape) + + return train_data, train_label1, train_label2 + pass + + +def split_test_data(healthy_data, healthy_label1, healthy_label2, unhealthy_data, unhealthy_label1, unhealthy_label2, + split_size: float = 0.2, shuffle: bool = True): + data = np.concatenate([healthy_data, unhealthy_data], axis=0) + label1 = np.concatenate([healthy_label1, unhealthy_label1], axis=0) + label2 = np.concatenate([healthy_label2, unhealthy_label2], axis=0) + (train_data, test_data, train_label1, test_label1, train_label2, test_label2) = train_test_split(data, + label1, + label2, + test_size=split_size, + shuffle=shuffle, + random_state=100) + + # print(train_data.shape) + # print(train_label1.shape) + # print(train_label2.shape) + # print(train_data.shape) + + return train_data, train_label1, train_label2, test_data, test_label1, test_label2 + + pass + + +# trian_data:(300455,120,10) +# trian_label1:(300455,10) +# trian_label2:(300455,) +def train_step_one(train_data, train_label1, train_label2): + model = Joint_Monitoring() + # # # # TODO 需要运行编译一次,才能打印model.summary() + # model.build(input_shape=(batch_size, filter_num, dims)) + # model.summary() + history_loss = [] + history_val_loss = [] + learning_rate = 1e-3 + for epoch in range(EPOCH): + + print() + print("EPOCH:", epoch, "/", EPOCH, ":") + train_data, train_label1, train_label2 = shuffle(train_data, train_label1, train_label2) + if epoch == 0: + train_data, train_label1, train_label2, val_data, val_label1, val_label2 = shuffle(train_data, train_label1, + train_label2, + is_split=True) + # print() + # print("EPOCH:", epoch, "/", EPOCH, ":") + # 用于让train知道,这是这个epoch中的第几次训练 + z = 0 + # 用于batch_size次再训练 + k = 1 + for data_1, label_1, label_2 in zip(train_data, train_label1, train_label2): + size, _, _ = train_data.shape + data_1 = tf.expand_dims(data_1, axis=0) + label_1 = tf.expand_dims(label_1, axis=0) + label_2 = tf.expand_dims(label_2, axis=0) + if batch_size != 1: + if k % batch_size == 1: + data = data_1 + label1 = label_1 + label2 = label_2 + else: + data = tf.concat([data, data_1], axis=0) + label1 = tf.concat([label1, label_1], axis=0) + label2 = tf.concat([label2, label_2], axis=0) + else: + data = data_1 + label1 = label_1 + label2 = label_2 + + if k % batch_size == 0: + # label = tf.expand_dims(label, axis=-1) + loss_value, accuracy_value = model.train(input_tensor=data, label1=label1, label2=label2, + learning_rate=learning_rate, + is_first_time=True) + print(z * batch_size, "/", size, ":===============>", "loss:", loss_value.numpy()) + k = 0 + z = z + 1 + k = k + 1 + val_loss, val_accuracy = model.get_val_loss(val_data=val_data, val_label1=val_label1, val_label2=val_label2, + is_first_time=True) + SaveBestModel(model=model, save_name=save_name, history_loss=history_val_loss, loss_value=val_loss.numpy()) + # SaveBestH5Model(model=model, save_name=save_name, history_loss=history_val_loss, loss_value=val_loss.numpy()) + history_val_loss.append(val_loss) + history_loss.append(loss_value.numpy()) + print('Training loss is :', loss_value.numpy()) + print('Validating loss is :', val_loss.numpy()) + if IsStopTraining(history_loss=history_val_loss, patience=7): + break + if Is_Reduce_learning_rate(history_loss=history_val_loss, patience=3): + if learning_rate >= 1e-4: + learning_rate = learning_rate * 0.1 + pass + + +def train_step_two(step_one_model, step_two_model, train_data, train_label1, train_label2): + # step_two_model = Joint_Monitoring() + # step_two_model.build(input_shape=(batch_size, time_stamp, feature_num)) + # step_two_model.summary() + history_loss = [] + history_val_loss = [] + history_accuracy = [] + learning_rate = 1e-3 + for epoch in range(EPOCH): + print() + print("EPOCH:", epoch, "/", EPOCH, ":") + train_data, train_label1, train_label2 = shuffle(train_data, train_label1, train_label2) + if epoch == 0: + train_data, train_label1, train_label2, val_data, val_label1, val_label2 = shuffle(train_data, train_label1, + train_label2, + is_split=True) + # print() + # print("EPOCH:", epoch, "/", EPOCH, ":") + # 用于让train知道,这是这个epoch中的第几次训练 + z = 0 + # 用于batch_size次再训练 + k = 1 + accuracy_num = 0 + for data_1, label_1, label_2 in zip(train_data, train_label1, train_label2): + size, _, _ = train_data.shape + data_1 = tf.expand_dims(data_1, axis=0) + label_1 = tf.expand_dims(label_1, axis=0) + label_2 = tf.expand_dims(label_2, axis=0) + if batch_size != 1: + if k % batch_size == 1: + data = data_1 + label1 = label_1 + label2 = label_2 + else: + data = tf.concat([data, data_1], axis=0) + label1 = tf.concat([label1, label_1], axis=0) + label2 = tf.concat([label2, label_2], axis=0) + else: + data = data_1 + label1 = label_1 + label2 = label_2 + + if k % batch_size == 0: + # label = tf.expand_dims(label, axis=-1) + output1, output2, output3, _ = step_one_model.call(inputs=data, is_first_time=True) + loss_value, accuracy_value = step_two_model.train(input_tensor=data, label1=label1, label2=label2, + learning_rate=learning_rate, + is_first_time=False, pred_3=output1, pred_4=output2, + pred_5=output3) + accuracy_num += accuracy_value + print(z * batch_size, "/", size, ":===============>", "loss:", loss_value.numpy(), "| accuracy:", + accuracy_num / ((z + 1) * batch_size)) + k = 0 + z = z + 1 + k = k + 1 + + val_loss, val_accuracy = step_two_model.get_val_loss(val_data=val_data, val_label1=val_label1, + val_label2=val_label2, + is_first_time=False, step_one_model=step_one_model) + SaveBestModelByAccuracy(model=step_two_model, save_name=save_step_two_name, history_accuracy=history_accuracy, + accuracy_value=val_accuracy) + history_val_loss.append(val_loss) + history_loss.append(loss_value.numpy()) + history_accuracy.append(val_accuracy) + print('Training loss is : {0} | Training accuracy is : {1}'.format(loss_value.numpy(), + accuracy_num / ((z + 1) * batch_size))) + print('Validating loss is : {0} | Validating accuracy is : {1}'.format(val_loss.numpy(), val_accuracy)) + if IsStopTraining(history_loss=history_val_loss, patience=7): + break + if Is_Reduce_learning_rate(history_loss=history_val_loss, patience=3): + if learning_rate >= 1e-4: + learning_rate = learning_rate * 0.1 + pass + + +def test(step_one_model, step_two_model, test_data, test_label1, test_label2): + history_loss = [] + history_val_loss = [] + + val_loss, val_accuracy = step_two_model.get_val_loss(val_data=test_data, val_label1=test_label1, + val_label2=test_label2, + is_first_time=False, step_one_model=step_one_model) + + history_val_loss.append(val_loss) + print("val_accuracy:", val_accuracy) + print("val_loss:", val_loss) + + +def showResult(step_two_model: Joint_Monitoring, test_data, isPlot: bool = False): + # 获取模型的所有参数的个数 + # step_two_model.count_params() + total_result = [] + size, length, dims = test_data.shape + for epoch in range(0, size - batch_size + 1, batch_size): + each_test_data = test_data[epoch:epoch + batch_size, :, :] + _, _, _, output4 = step_two_model.call(each_test_data, is_first_time=False) + total_result.append(output4) + total_result = np.reshape(total_result, [total_result.__len__(), -1]) + total_result = np.reshape(total_result, [-1, ]) + if isPlot: + plt.scatter(list(range(total_result.shape[0])), total_result, c='black', s=10) + # 画出 y=1 这条水平线 + plt.axhline(0.5, c='red', label='Failure threshold') + # 箭头指向上面的水平线 + # plt.arrow(35000, 0.9, 33000, 0.75, head_width=0.02, head_length=0.1, shape="full", fc='red', ec='red', + # alpha=0.9, overhang=0.5) + # plt.text(35000, 0.9, "Truth Fault", fontsize=10, color='black', verticalalignment='top') + plt.axvline(test_data.shape[0] * 2 / 3, c='blue', ls='-.') + plt.xlabel("time") + plt.ylabel("confience") + plt.text(total_result.shape[0] * 4 / 5, 0.6, "Fault", fontsize=10, color='black', verticalalignment='top', + horizontalalignment='center', + bbox={'facecolor': 'grey', + 'pad': 10}) + plt.text(total_result.shape[0] * 1 / 3, 0.4, "Norm", fontsize=10, color='black', verticalalignment='top', + horizontalalignment='center', + bbox={'facecolor': 'grey', + 'pad': 10}) + plt.grid() + # plt.ylim(0, 1) + # plt.xlim(-50, 1300) + # plt.legend("", loc='upper left') + plt.show() + return total_result + + +if __name__ == '__main__': + total_data = loadData.execute(N=feature_num, file_name=file_name) + total_data = normalization(data=total_data) + train_data_healthy, train_label1_healthy, train_label2_healthy = get_training_data_overlapping( + total_data[:healthy_date, :], is_Healthy=True) + train_data_unhealthy, train_label1_unhealthy, train_label2_unhealthy = get_training_data_overlapping( + total_data[healthy_date - time_stamp + unhealthy_patience:unhealthy_date, :], + is_Healthy=False) + #### TODO 第一步训练 + # 单次测试 + # train_step_one(train_data=train_data_healthy[:32, :, :], train_label1=train_label1_healthy[:32, :],train_label2=train_label2_healthy[:32, ]) + # train_step_one(train_data=train_data_healthy, train_label1=train_label1_healthy, train_label2=train_label2_healthy) + + # 导入第一步已经训练好的模型,一个继续训练,一个只输出结果 + # step_one_model = Joint_Monitoring() + # step_one_model.load_weights(save_name) + # + # step_two_model = Joint_Monitoring() + # step_two_model.load_weights(save_name) + + #### TODO 第二步训练 + ### healthy_data.shape: (300333,120,10) + ### unhealthy_data.shape: (16594,10) + healthy_size, _, _ = train_data_healthy.shape + unhealthy_size, _, _ = train_data_unhealthy.shape + # train_data, train_label1, train_label2, test_data, test_label1, test_label2 = split_test_data( + # healthy_data=train_data_healthy[healthy_size - 2 * unhealthy_size:, :, :], + # healthy_label1=train_label1_healthy[healthy_size - 2 * unhealthy_size:, :], + # healthy_label2=train_label2_healthy[healthy_size - 2 * unhealthy_size:, ], unhealthy_data=train_data_unhealthy, + # unhealthy_label1=train_label1_unhealthy, unhealthy_label2=train_label2_unhealthy) + # train_step_two(step_one_model=step_one_model, step_two_model=step_two_model, + # train_data=train_data, + # train_label1=train_label1, train_label2=np.expand_dims(train_label2, axis=-1)) + + ### TODO 测试测试集 + step_one_model = Joint_Monitoring() + step_one_model.load_weights(save_name) + step_two_model = Joint_Monitoring() + step_two_model.load_weights(save_step_two_name) + # test(step_one_model=step_one_model, step_two_model=step_two_model, test_data=test_data, test_label1=test_label1, + # test_label2=np.expand_dims(test_label2, axis=-1)) + + ###TODO 展示全部的结果 + all_data, _, _ = get_training_data_overlapping( + total_data[healthy_size - 2 * unhealthy_size:unhealthy_date, :], is_Healthy=True) + # all_data = np.concatenate([]) + # 单次测试 + # showResult(step_two_model, test_data=all_data[:32], isPlot=True) + showResult(step_two_model, test_data=all_data, isPlot=True) + + pass