20231108爬虫学习更新,一些大论文的代码更新

This commit is contained in:
kevinding1125 2023-12-06 21:38:59 +08:00
parent 2c3e6c25a8
commit 84b1c260f3
178 changed files with 19756 additions and 95 deletions

View File

@ -2,7 +2,7 @@
''' '''
@Author : dingjiawen @Author : dingjiawen
@Date : 2023/11/9 21:34 @Date : 2023/12/6 14:03
@Usage : @Usage :
@Desc : @Desc :
''' '''

View File

@ -0,0 +1,50 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 14:39
@Usage :
@Desc : 保存成Json
'''
import json
str = '''
[{
"name": "Bob",
"gender": "male",
"birthday": "1992-10-18"
}, {
"name": "Selina",
"gender": "female",
"birthday": "1995-10-18"
}]
'''
print(type(str))
data = json.loads(str)
print(data)
print(type(data))
import json
data = [{
'name': 'Bob',
'gender': 'male',
'birthday': '1992-10-18'
}]
with open('data.json', 'w', encoding='utf-8') as file:
file.write(json.dumps(data))
with open('data.json', 'w', encoding='utf-8') as file:
# indent就是有缩进的
file.write(json.dumps(data, indent=2))
data = [{
'name': '张三',
'gender': 'male',
'birthday': '1992-10-18'
}]
with open('data.json', 'w', encoding='utf-8') as file:
# indent就是有缩进的,ensure_ascii规定编码格式(输出中文)
file.write(json.dumps(data, indent=2, ensure_ascii=False))

View File

@ -0,0 +1,33 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 15:03
@Usage :
@Desc :
'''
import pymysql
data = {
'id': '20120001',
'name': 'Bob',
'age': 20
}
# 通过字典动态构建插入语句
table = 'students'
keys = ', '.join(data.keys())
values = ', '.join(['%s'] * len(data))
db = pymysql.connect(host='localhost', user='root',
password=None, port=3306, db='spiders')
cursor = db.cursor()
sql = 'INSERT INTO {table}({keys}) VALUES ({values})'.format(
table=table, keys=keys, values=values)
try:
if cursor.execute(sql, tuple(data.values())):
print('Successful')
db.commit()
except Exception as e:
print('Failed', e)
db.rollback()
db.close()

View File

@ -0,0 +1,38 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 14:08
@Usage :
@Desc :保存为Text
'''
import requests
from pyquery import PyQuery as pq
import re
url = 'https://ssr1.scrape.center/'
html = requests.get(url).text
doc = pq(html)
items = doc('.el-card').items()
file = open('movies.txt', 'w', encoding='utf-8')
for item in items:
# 名称
name = item.find('a > h2').text()
file.write(f'名称: {name}\n')
# 类别
categories = [item.text() for item in item.find('.categories button span').items()]
file.write(f'类别: {categories}\n')
# 上映时间
published_at = item.find('.info:contains(上映)').text()
published_at = re.search('(\d{4}-\d{2}-\d{2})', published_at).group(1) \
if published_at and re.search('\d{4}-\d{2}-\d{2}', published_at) else None
file.write(f'上映时间: {published_at}\n')
# 评分
score = item.find('p.score').text()
file.write(f'评分: {score}\n')
file.write(f'{"=" * 50}\n')
file.close()

View File

@ -0,0 +1,67 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:01
@Usage :
@Desc :
'''
import requests
import logging
import json
from os import makedirs
from os.path import exists
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s: %(message)s')
INDEX_URL = 'https://spa1.scrape.center/api/movie/?limit={limit}&offset={offset}'
DETAIL_URL = 'https://spa1.scrape.center/api/movie/{id}'
LIMIT = 10
TOTAL_PAGE = 10
RESULTS_DIR = 'results'
exists(RESULTS_DIR) or makedirs(RESULTS_DIR)
def scrape_api(url):
logging.info('scraping %s...', url)
try:
response = requests.get(url)
if response.status_code == 200:
return response.json()
logging.error('get invalid status code %s while scraping %s',
response.status_code, url)
except requests.RequestException:
logging.error('error occurred while scraping %s', url, exc_info=True)
def scrape_index(page):
url = INDEX_URL.format(limit=LIMIT, offset=LIMIT * (page - 1))
return scrape_api(url)
def scrape_detail(id):
url = DETAIL_URL.format(id=id)
return scrape_api(url)
def save_data(data):
name = data.get('name')
data_path = f'{RESULTS_DIR}/{name}.json'
json.dump(data, open(data_path, 'w', encoding='utf-8'),
ensure_ascii=False, indent=2)
def main():
for page in range(1, TOTAL_PAGE + 1):
index_data = scrape_index(page)
for item in index_data.get('results'):
id = item.get('id')
detail_data = scrape_detail(id)
logging.info('detail data %s', detail_data)
save_data(detail_data)
if __name__ == '__main__':
main()

View File

@ -2,7 +2,7 @@
''' '''
@Author : dingjiawen @Author : dingjiawen
@Date : 2023/11/9 21:34 @Date : 2023/12/6 15:58
@Usage : @Usage :
@Desc : @Desc :
''' '''

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:19
@Usage :
@Desc :
'''

View File

@ -0,0 +1,28 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:57
@Usage : aiohttp库的使用
@Desc :
@参考:https://github.dev/Python3WebSpider/AsyncTest demo12
'''
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
return await response.text(), response.status
async def main():
async with aiohttp.ClientSession() as session:
html, status = await fetch(session, 'https://cuiqingcai.com')
print(f'html: {html[:100]}...')
print(f'status: {status}')
if __name__ == '__main__':
asyncio.run(main())

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 17:02
@Usage :
@Desc :
'''

View File

@ -0,0 +1,86 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 19:14
@Usage :
@Desc :
'''
import asyncio
import aiohttp
import logging
from motor.motor_asyncio import AsyncIOMotorClient
logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s: %(message)s')
INDEX_URL = 'https://spa5.scrape.center/api/book/?limit=18&offset={offset}'
DETAIL_URL = 'https://spa5.scrape.center/api/book/{id}'
PAGE_SIZE = 18
PAGE_NUMBER = 1
CONCURRENCY = 5
session = None
MONGO_CONNECTION_STRING = 'mongodb://localhost:27017'
MONGO_DB_NAME = 'books'
MONGO_COLLECTION_NAME = 'books'
client = AsyncIOMotorClient(MONGO_CONNECTION_STRING)
db = client[MONGO_DB_NAME]
collection = db[MONGO_COLLECTION_NAME]
semaphore = asyncio.Semaphore(CONCURRENCY)
async def scrape_api(url):
async with semaphore:
try:
logging.info('scraping %s', url)
async with session.get(url) as response:
return await response.json()
except aiohttp.ClientError:
logging.error('error occurred while scraping %s', url, exc_info=True)
async def scrape_index(page):
url = INDEX_URL.format(offset=PAGE_SIZE * (page - 1))
return await scrape_api(url)
async def scrape_detail(id):
url = DETAIL_URL.format(id=id)
data = await scrape_api(url)
await save_data(data)
async def save_data(data):
logging.info('saving data %s', data)
if data:
return await collection.update_one({
'id': data.get('id')
}, {
'$set': data
}, upsert=True)
async def main():
# index tasks
global session
session = aiohttp.ClientSession()
scrape_index_tasks = [asyncio.ensure_future(scrape_index(page)) for page in range(1, PAGE_NUMBER + 1)]
results = await asyncio.gather(*scrape_index_tasks)
# detail tasks
print('results', results)
ids = []
for index_data in results:
if not index_data: continue
for item in index_data.get('results'):
ids.append(item.get('id'))
scrape_detail_tasks = [asyncio.ensure_future(scrape_detail(id)) for id in ids]
await asyncio.wait(scrape_detail_tasks)
await session.close()
if __name__ == '__main__':
asyncio.run(main())

View File

@ -0,0 +1,64 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:57
@Usage : aiohttp库的使用
@Desc :
@参考:https://github.dev/Python3WebSpider/AsyncTest demo12
'''
import aiohttp
import asyncio
async def fetch(session, url):
async with session.get(url) as response:
return await response.text(), response.status
async def main():
async with aiohttp.ClientSession() as session:
html, status = await fetch(session, 'https://cuiqingcai.com')
print(f'html: {html[:100]}...')
print(f'status: {status}')
# 给url参数
async def main1():
params = {'name': 'germey', 'age': 25}
async with aiohttp.ClientSession() as session:
async with session.get('https://httpbin.org/get', params=params) as response:
print(await response.text())
'''
session还支持其他请求类型:
session.post('https://httpbin.org/post', data=b'data')
session.put('https://httpbin.org/put', data=b'data')
session.delete('https://httpbin.org/delete')
session.head('https://httpbin.org/get')
session.options('https://httpbin.org/get')
session.patch('https://httpbin.org/patch', data=b'data')
'''
# 返回的response对象
async def main2():
data = {'name': 'germey', 'age': 25}
# 有些返回字段前面需要加await有些则不需要,原则是,如果返回的是一个协程对象(如async修饰的方法),
# 那么前面就要加await,具体可以看aiohttp的API,其链接为 https://docs.aiohttp.org/en/stable/client_reference.html
async with aiohttp.ClientSession() as session:
async with session.post('https://httpbin.org/post', data=data) as response:
print('status:', response.status)
print('headers:', response.headers)
print('body:', await response.text())
print('bytes:', await response.read())
print('json:', await response.json())
# 超时设置
async def main3():
timeout = aiohttp.ClientTimeout(total=0.1)
async with aiohttp.ClientSession(timeout=timeout) as session:
async with session.get('https://httpbin.org/get') as response:
print('status:', response.status)
if __name__ == '__main__':
asyncio.run(main2())

View File

@ -0,0 +1,42 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:57
@Usage : 并发限制 防止一次太多爬崩网站 semaphore
@Desc :
@参考:https://github.dev/Python3WebSpider/AsyncTest
'''
import aiohttp
import asyncio
CONCURRENCY = 5
URL = 'https://www.baidu.com/'
semaphore = asyncio.Semaphore(CONCURRENCY)
session = None
async def scrape_api():
async with semaphore:
print('scraping', URL)
async with session.get(URL) as response:
# await asyncio.sleep(1)
return await response.text()
async def main():
global session
session = aiohttp.ClientSession()
scrape_index_tasks = [asyncio.ensure_future(scrape_api()) for _ in range(10000)]
await asyncio.gather(*scrape_index_tasks)
await asyncio.wait(scrape_index_tasks)
await session.close()
if __name__ == '__main__':
# asyncio.run(main())
asyncio.get_event_loop().run_until_complete(main())

View File

@ -0,0 +1,27 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:34
@Usage : 多任务协程
@Desc :
@参考: https://github.dev/Python3WebSpider/AsyncTest
'''
import asyncio
import requests
async def request():
url = 'https://www.baidu.com'
status = requests.get(url)
return status
tasks = [asyncio.ensure_future(request()) for _ in range(5)]
print('Tasks:', tasks)
loop = asyncio.get_event_loop()
# 五个任务被顺序执行
loop.run_until_complete(asyncio.wait(tasks))
for task in tasks:
print('Task Result:', task.result())

View File

@ -0,0 +1,33 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:34
@Usage : 多任务协程展示协程的优势
@Desc :
@参考: https://github.dev/Python3WebSpider/AsyncTest demo8_1和demo9_1 demo10
'''
import asyncio
import requests
import time
start = time.time()
# 单个执行每个都至少要5秒
async def request():
url = 'https://httpbin.org/delay/5'
print('Waiting for', url)
# 这里无论是加await还是不加await都无法实现真正意义上的异步 需要使用aiohttp
response = requests.get(url)
print('Get response from', url, 'response', response)
tasks = [asyncio.ensure_future(request()) for _ in range(10)]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
end = time.time()
print('Cost time:', end - start)

View File

@ -0,0 +1,40 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:34
@Usage : 多任务协程展示协程的优势
@Desc :
@参考: https://github.dev/Python3WebSpider/AsyncTest demo11
'''
import asyncio
import aiohttp
import time
start = time.time()
async def get(url):
session = aiohttp.ClientSession()
response = await session.get(url)
await response.text()
await session.close()
return response
async def request():
url = 'https://httpbin.org/delay/5'
print('Waiting for', url)
response = await get(url)
print('Get response from', url, 'response', response)
tasks = [asyncio.ensure_future(request()) for _ in range(100)]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
end = time.time()
print('Cost time:', end - start)
# Cost time: 7.670234203338623

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 17:02
@Usage :
@Desc :
'''

View File

@ -0,0 +1,29 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:20
@Usage : asyncio库 可以使用async和await关键字
@Desc :异步爬虫测试 定义协程
@参考https://github.dev/Python3WebSpider/AsyncTest
'''
import asyncio
async def execute(x):
print('Number:', x)
return x
# 创建一个协程对象 coroutine
coroutine = execute(1)
print('Coroutine:', coroutine)
print('After calling execute')
loop = asyncio.get_event_loop()
task = loop.create_task(coroutine)
print('Task:', task)
loop.run_until_complete(task)
print('Task:', task)
print('After calling loop')

View File

@ -0,0 +1,38 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 16:20
@Usage : asyncio库 可以使用async和await关键字
@Desc :异步爬虫测试 定义协程 为某一个task绑定回调方法
@参考https://github.dev/Python3WebSpider/AsyncTest
'''
import asyncio
import requests
async def request():
url = 'https://www.baidu.com'
status = requests.get(url)
return status
def callback(task):
print('Status:', task.result())
coroutine = request()
task = asyncio.ensure_future(coroutine)
# 绑定回调,来保证顺序
task.add_done_callback(callback)
print('Task:', task)
loop = asyncio.get_event_loop()
loop.run_until_complete(task)
print('Task:', task)
# 直接通过task.result()也可以直接获取结果达到类似的效果
loop = asyncio.get_event_loop()
loop.run_until_complete(task)
print('Task:', task)
print('Task Result:', task.result())

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 19:46
@Usage :
@Desc :
'''

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 19:53
@Usage :
@Desc :
'''

View File

@ -0,0 +1,30 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 19:53
@Usage :
@Desc : selenium基本用法
@参考: https://github.dev/Python3WebSpider/SeleniumTest
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
browser = webdriver.Chrome()
try:
browser.get('https://www.baidu.com')
# input = browser.find_element_by_id('kw') 旧版写法selenium4.0以上使用下面的写法
input = browser.find_element(By.ID, 'kw')
input.send_keys('Python')
input.send_keys(Keys.ENTER)
wait = WebDriverWait(browser, 10)
wait.until(EC.presence_of_element_located((By.ID, 'content_left')))
print(browser.current_url)
print(browser.get_cookies())
print(browser.page_source)
finally:
browser.close()

View File

@ -0,0 +1,18 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 21:11
@Usage : 对Cookie进行操作
@Desc :获取添加删除cookie
'''
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
print(browser.get_cookies())
browser.add_cookie({'name': 'name', 'domain': 'www.zhihu.com', 'value': 'germey'})
print(browser.get_cookies())
browser.delete_all_cookies()
print(browser.get_cookies())

View File

@ -0,0 +1,22 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 21:14
@Usage : 选项卡管理
@Desc : 访问页面的时候会开起一个个选项卡
'''
import time
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.baidu.com')
# 开启一个新的选项卡
browser.execute_script('window.open()')
print(browser.window_handles)
browser.switch_to.window(browser.window_handles[1])
browser.get('https://www.taobao.com')
time.sleep(1)
browser.switch_to.window(browser.window_handles[0])
browser.get('https://python.org')

View File

@ -0,0 +1,26 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 21:17
@Usage : 异常处理
@Desc : 可能会遇到获取节点失败的异常,可以对异常进行处理
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException, NoSuchElementException
browser = webdriver.Chrome()
try:
browser.get('https://www.baidu.com')
except TimeoutException:
print('Time out')
try:
browser.find_element(By.ID, 'hello')
except NoSuchElementException:
print('No Such Element')
finally:
browser.close()

View File

@ -0,0 +1,33 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 21:20
@Usage : 反屏蔽
@Desc : 现在很多网站增加了对Selenium的监测如果检测到Selenium打开浏览器就直接屏蔽
基本原理是监测当前浏览器窗口下的window.navigator对象中是否包含webdriver属性
正常使用浏览器这个属性应该是undefined,一旦使用了Selenium就会给window.navigator设置webdriver属性
https://antispider1.scrape.center/ 就是使用了上述原理
'''
from selenium import webdriver
from selenium.webdriver import ChromeOptions
option = ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_experimental_option('useAutomationExtension', False)
browser = webdriver.Chrome(options=option)
# 无效,因为这是页面加载完毕之后才执行,但是页面渲染之前已经检测了
browser.execute_script('Object.defineProperty(navigator, "webdriver", {get: () => undefined})')
browser.get('https://antispider1.scrape.center/')
# 使用CDP(chrome开发工具协议)解决这个问题在每个页面刚加载的时候就执行JavaScript语句将webdriver置空
option = ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_experimental_option('useAutomationExtension', False)
browser = webdriver.Chrome(options=option)
browser.execute_cdp_cmd('Page.addScriptToEvaluateOnNewDocument', {
'source': 'Object.defineProperty(navigator, "webdriver", {get: () => undefined})'
})
browser.get('https://antispider1.scrape.cuiqingcai.com/')

View File

@ -0,0 +1,20 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 21:31
@Usage : 无头模式
@Desc : 之前的案例运行时总会弹出一个路浏览器窗口
现在已经支持无头模式Headless
'''
from selenium import webdriver
from selenium.webdriver import ChromeOptions
option = ChromeOptions()
option.add_argument('--headless')
browser = webdriver.Chrome(options=option)
browser.set_window_size(1366, 768)
browser.get('https://www.baidu.com')
browser.get_screenshot_as_file('preview.png')

View File

@ -0,0 +1,22 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 19:59
@Usage :
@Desc :selenium访问页面与查找节点
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
browser.get('https://www.taobao.com')
input_first = browser.find_element(By.ID, 'q')
input_second = browser.find_element(By.CSS_SELECTOR, '#q')
input_third = browser.find_element(By.XPATH, '//*[@id="q"]')
print(input_first, input_second, input_third)
# 多个节点
lis = browser.find_elements(By.CSS_SELECTOR,'.service-bd li')
print(lis)
browser.close()

View File

@ -0,0 +1,22 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:04
@Usage :
@Desc :selenium节点交互 驱动浏览器实现一些操作
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
browser = webdriver.Chrome()
browser.get('https://www.taobao.com')
input = browser.find_element(By.ID, 'q')
input.send_keys('iPhone') # 输入文字
time.sleep(1)
input.clear() # 清空文字
input.send_keys('iPad')
button = browser.find_element(By.CLASS_NAME, 'btn-search')
button.click() # 点击搜索

View File

@ -0,0 +1,23 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:08
@Usage :
@Desc :selenium动作链 一系列动作连续执行
'''
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')
source = browser.find_element(By.CSS_SELECTOR, '#draggable')
target = browser.find_element(By.CSS_SELECTOR, '#droppable')
actions = ActionChains(browser)
# 模拟鼠标的点击与放下
actions.drag_and_drop(source, target)
actions.perform() # 正式执行上述模拟操作

View File

@ -0,0 +1,20 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:14
@Usage :
@Desc :selenium运行javaScrip,有一些操作selenium没有提供API,这时可以直接通过运行javascript实现
'''
from selenium import webdriver
import time
browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
# browser.get('https://www.taobao.com')
# 将进度条下拉到最底部
browser.execute_script('window.scrollTo(0, document.body.scrollHeight)')
# 弹出警告提示框
browser.execute_script('alert("To Bottom")')
time.sleep(5)
browser.close()

View File

@ -0,0 +1,27 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:20
@Usage :
@Desc :获取节点信息
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
url = 'https://spa2.scrape.center/'
browser.get(url)
logo = browser.find_element(By.CLASS_NAME, 'logo-image')
print(logo)
# 获取属性
print(logo.get_attribute('src'))
# 获取文本值
title = browser.find_element(By.CLASS_NAME, 'logo-title')
print(title.text)
# 获取ID,位置,标签名,大小
print(title.id)
print(title.location)
print(title.tag_name)
print(title.size)

View File

@ -0,0 +1,26 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:31
@Usage : 切换Frame
@Desc : 网页中有一种节点叫iframe,相当于页面的子页面
selenium打开一个页面后默认是在父Frame里面操作这时需要使用switch_to.frame方法切换
'''
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
browser = webdriver.Chrome()
url = 'http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable'
browser.get(url)
browser.switch_to.frame('iframeResult')
try:
logo = browser.find_element(By.CLASS_NAME, 'logo')
except NoSuchElementException:
print('NO LOGO')
browser.switch_to.parent_frame()
logo = browser.find_element(By.CLASS_NAME, 'logo')
print(logo)
print(logo.text)

View File

@ -0,0 +1,31 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:38
@Usage : 延时等待
@Desc :get方法在网页框架加载结束后才会结束执行
所以在get方法执行完毕之后其结果可能并不是浏览器完全加载完成的页面
所以在必要时我们需要设置浏览器延时等待一段时间
'''
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
browser = webdriver.Chrome()
# 隐式等待 :效果并不好,因为只规定了一个固定时间,页面加载事件会受到网络条件影响
browser.implicitly_wait(10)
browser.get('https://spa2.scrape.center/')
input = browser.find_element(By.CLASS_NAME, 'logo-image')
print(input)
# 显示等待:指定要查找的节点和最长等待时间
browser.get('https://www.taobao.com/')
wait = WebDriverWait(browser, 10)
# presence_of_element_located这个条件表示节点出现
input = wait.until(EC.presence_of_element_located((By.ID, 'q')))
# element_to_be_clickable表示按钮可点击
button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '.btn-search')))
print(input, button)

View File

@ -0,0 +1,21 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/12/6 20:38
@Usage : 模拟浏览器前进后退功能
@Desc :
'''
import time
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.baidu.com/')
browser.get('https://www.taobao.com/')
browser.get('https://www.python.org/')
# 后退
browser.back()
time.sleep(1)
# 前进
browser.forward()
browser.close()

View File

@ -6,6 +6,8 @@ import matplotlib.pyplot as plt
# 数据导入 # 数据导入
data = np.load("../data/HI_DATA/HI_data.npy") data = np.load("../data/HI_DATA/HI_data.npy")
print(data.shape) # (2803, 2560) print(data.shape) # (2803, 2560)
plt.plot(data[0,:])
plt.show()
(samples, dims) = data.shape (samples, dims) = data.shape
# # 24个指标建立 # # 24个指标建立

View File

@ -2,6 +2,7 @@ import tensorflow as tf
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping
# 数据读取 # 数据读取
# train_data = np.load("Select_data.npy") # train_data = np.load("Select_data.npy")
@ -17,8 +18,14 @@ for index in indexs:
Selected_data = np.vstack([Selected_data, feature_data[:, index]]) Selected_data = np.vstack([Selected_data, feature_data[:, index]])
z += 1 z += 1
Selected_data = np.transpose(Selected_data, [1, 0]) # (2803,9) Selected_data = np.transpose(Selected_data, [1, 0]) # (2803,9)
plt.plot(Selected_data)
plt.show()
train_data=Selected_data[1500:-1,:] train_data=Selected_data[1500:-1,:]
print(train_data.shape) print(train_data.shape)
plt.plot(train_data)
plt.show()
# 建立模型 # 建立模型
@ -51,12 +58,25 @@ class model():
if __name__ == '__main__': if __name__ == '__main__':
model = model(input_shape=6).getModel(model_Type='ae') model = model(input_shape=6).getModel(model_Type='ae')
model.compile(optimizer=tf.optimizers.Adam(0.001), loss=tf.losses.mse, metrics=['acc']) model.compile(optimizer=tf.optimizers.Adam(0.001), loss=tf.losses.mse, metrics=['acc'])
model.summary() # model.summary()
history = model.fit(train_data, train_data, epochs=300, batch_size=100) checkpoint = tf.keras.callbacks.ModelCheckpoint(
filepath="AE_model.h5",
monitor='val_loss',
verbose=2,
save_best_only=True,
mode='min')
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=0.0001)
model.compile(optimizer=tf.optimizers.SGD(), loss=tf.losses.mse)
# model.compile(optimizer=tf.optimizers.SGD(learning_rate=0.001), loss=FTMSE())
model.summary()
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=20, mode='min', verbose=1)
history = model.fit(train_data, train_data, epochs=1000, batch_size=100)
HI_merge_data = tf.keras.models.Model(inputs=model.input, outputs=model.get_layer('mid').output).predict(train_data) HI_merge_data = tf.keras.models.Model(inputs=model.input, outputs=model.get_layer('mid').output).predict(train_data)
print(HI_merge_data) print(HI_merge_data)
acc = np.array(history.history.get('acc')) acc = np.array(history.history.get('acc'))
# if acc[299] > 0.9: # if acc[299] > 0.9:
np.save("HI_merge_data.npy", HI_merge_data) np.save("HI_merge_data1.npy", HI_merge_data)
model.save("AE_model.h5") model.save("AE_model.h5")

View File

@ -3,8 +3,10 @@ import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
#数据导入 #数据导入
HI_merge_data=np.load("HI_merge_data.npy") # HI_merge_data=np.load("HI_merge_data.npy")
HI_merge_data=HI_merge_data[0:1250,1] HI_merge_data=np.loadtxt("smallVHI.csv",delimiter=",")
print(HI_merge_data.shape)
# HI_merge_data=HI_merge_data[0:1250,0]
print(HI_merge_data.shape) print(HI_merge_data.shape)
print(HI_merge_data) print(HI_merge_data)
plt.plot(HI_merge_data) plt.plot(HI_merge_data)

View File

@ -12,11 +12,11 @@
[1] https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html [1] https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html
''' '''
import numpy as np import numpy as np
import cv2 import cv2
from scipy.fftpack import dct, idct from scipy.fftpack import dct, idct
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import tensorflow as tf
array = np.array([-0.029078494757, array = np.array([-0.029078494757,
-0.33095228672, -0.33095228672,
@ -40,7 +40,6 @@ amp = np.abs(np.fft.fft(array) / len(array))
print(amp) print(amp)
print(dct_array) print(dct_array)
plt.subplot(4, 1, 1) plt.subplot(4, 1, 1)
plt.plot(array) plt.plot(array)
plt.subplot(4, 1, 2) plt.subplot(4, 1, 2)
@ -50,3 +49,49 @@ plt.plot(dct_array_t)
plt.subplot(4, 1, 4) plt.subplot(4, 1, 4)
plt.plot(amp) plt.plot(amp)
plt.show() plt.show()
def test1(x):
C_temp = np.zeros(x.shape)
dst = np.zeros(x.shape)
n = len(x)
N = n
C_temp[:] = 1 * np.sqrt(1 / N)
for j in range(n):
C_temp[j] = np.cos(np.pi * (2 * j + 1) / (2 * N)
) * np.sqrt(2 / N)
#
dst = np.dot(C_temp, x)
dst = np.dot(dst, np.transpose(C_temp))
# dst1 = np.log(abs(dst)) # 进行log处理
return dst
def test(x):
from numpy.fft import rfft as fft
N = len(x)
y = np.empty(shape=[2 * N])
y[:N] = x
y[N:] = x[::-1]
Y = fft(y)[:N]
z = []
for (k, y) in zip(range(len(Y)), Y):
z.append(y * np.exp(-1j * np.pi * k / (2 * N)))
z= np.array(z)
return z.real
if __name__ == '__main__':
a = tf.random.normal(shape=(3, 522,1), mean=2, stddev=1, dtype=tf.float32) # 创建一个标准正态分布
np.save("a.npy",a)
b = tf.signal.dct(a)
np.save("b.npy",b)
print(b)

View File

@ -160,13 +160,12 @@ def predictContinueByOne(newModel, train_data, predict_num=50):
for each_predict in range(predict_num): for each_predict in range(predict_num):
# predicted_data.shape : (1,10) 取最后一条 # predicted_data.shape : (1,10) 取最后一条
predicted_data = newModel.predict(each_predict_data) # (batch_size,filer_num,1) predicted_data = newModel.predict(each_predict_data) # (batch_size,filer_num,1)
predicted_data = np.expand_dims(predicted_data[:, -1], axis=-1) predicted_data = predicted_data[:, -1]
predicted_list[each_predict] = predicted_data predicted_list[each_predict] = predicted_data
# (1,1) => (10,1)l # (1,1) => (10,1)l
temp1 = np.transpose(np.concatenate([each_predict_data[:, 1:, -1], predicted_data], axis=1), [1, 0]) temp1 = np.transpose(np.concatenate([each_predict_data[:, 1:, -1], predicted_data], axis=1), [1, 0])
each_predict_data = np.expand_dims( each_predict_data = np.concatenate([each_predict_data[:, :, 1:], np.expand_dims(temp1,axis=0)], axis=-1)
np.concatenate([np.squeeze(each_predict_data[:, :, 1:], axis=0), temp1], axis=1), axis=0)
return predicted_list return predicted_list
@ -175,7 +174,7 @@ def predictContinueByOne(newModel, train_data, predict_num=50):
def predictByEveryData(trained_model: tf.keras.Model, predict_data): def predictByEveryData(trained_model: tf.keras.Model, predict_data):
# shape:(1180,10) 取每一次的最后一个点就是从头到尾预测的 # shape:(1180,10) 取每一次的最后一个点就是从头到尾预测的
predicted_data = trained_model.predict(predict_data) predicted_data = trained_model.predict(predict_data)
predicted_data = np.expand_dims(predicted_data[:, -1], axis=-1) predicted_data = predicted_data[:, -1] # 1180,1,1
predicted_data = np.concatenate([np.expand_dims(total_data[:hidden_num + feature, ], axis=1), predicted_data], predicted_data = np.concatenate([np.expand_dims(total_data[:hidden_num + feature, ], axis=1), predicted_data],
axis=0) axis=0)

View File

@ -22,14 +22,14 @@ from pylab import *
''' '''
超参数设置: 超参数设置:
''' '''
hidden_num = 40 # LSTM细胞个数 hidden_num = 10 # LSTM细胞个数
feature = 10 # 一个点的维度 feature = 10 # 一个点的维度
batch_size = 32 batch_size = 32
EPOCH = 1000 EPOCH = 1000
unit = 512 # LSTM的维度 unit = 512 # LSTM的维度
predict_num = 50 # 预测个数 predict_num = 50 # 预测个数
model_name = "dctLSTM" model_name = "dctLSTM"
save_name = r"self_{0}_hidden{1}_unit{2}_feature{3}_predict{4}.h5".format(model_name, hidden_num, unit, feature, save_name = r"self_{0}_hidden{1}_unit{2}_feature{3}_predict{4}_0.0657_0.207.h5".format(model_name, hidden_num, unit, feature,
predict_num) predict_num)
@ -204,18 +204,18 @@ if __name__ == '__main__':
train_label_single, train_label_single,
predict_num=predict_num) predict_num=predict_num)
# # # #### TODO 训练 # # # #### TODO 训练
# model = predict_model(hidden_num, feature) model = predict_model(hidden_num, feature)
# checkpoint = tf.keras.callbacks.ModelCheckpoint( checkpoint = tf.keras.callbacks.ModelCheckpoint(
# filepath=save_name, filepath=save_name,
# monitor='val_loss', monitor='val_loss',
# verbose=2, verbose=2,
# save_best_only=True, save_best_only=True,
# mode='min') mode='min')
# lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=0.0001) lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=0.0001)
#
# model.compile(optimizer=tf.optimizers.SGD(), loss=tf.losses.mse) model.compile(optimizer=tf.optimizers.SGD(), loss=tf.losses.mse)
#
# model.summary() model.summary()
# early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=20, mode='min', verbose=1) # early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=20, mode='min', verbose=1)
# #
# history = model.fit(train_data, train_label_single, epochs=EPOCH, validation_data=(val_data, val_label_single), # history = model.fit(train_data, train_label_single, epochs=EPOCH, validation_data=(val_data, val_label_single),
@ -225,7 +225,7 @@ if __name__ == '__main__':
#### TODO 测试 #### TODO 测试
trained_model = tf.keras.models.load_model(save_name, custom_objects={'AttentionEmbedLSTMLayer': LSTMLayer}) trained_model = tf.keras.models.load_model(save_name, custom_objects={'AttentionEmbedLSTMLayer': LSTMLayer})
model.summary()
# 使用已知的点进行预测 # 使用已知的点进行预测
print("开始预测") print("开始预测")
predicted_data = predictByEveryData(trained_model, train_data) predicted_data = predictByEveryData(trained_model, train_data)

View File

@ -17,6 +17,9 @@ import timeit
# cat_sale = pd.read_excel('data/catering_sale.xls') # cat_sale = pd.read_excel('data/catering_sale.xls')
path = "G:\data\SCADA数据\jb4q_8.csv" path = "G:\data\SCADA数据\jb4q_8.csv"
cat_sale = pd.read_csv(path) cat_sale = pd.read_csv(path)
print(cat_sale)
cat_sale = cat_sale.iloc[:20000, :]
print(cat_sale)
# cat_sale.drop('日期', axis=1, inplace=True) # cat_sale.drop('日期', axis=1, inplace=True)
# 过滤异常值,并置为空值 # 过滤异常值,并置为空值
@ -30,6 +33,8 @@ cat_sale[:][cat_sale[:] == 0] = np.nan # 在索引比较的时候,要转换
:param x:差值前后的索引值 :param x:差值前后的索引值
:param y:差值前后的数值 :param y:差值前后的数值
''' '''
def cal_f(x, y): def cal_f(x, y):
""" """
计算插商 计算插商
@ -50,6 +55,8 @@ def cal_f(x, y):
:param y:差值前后的数值 :param y:差值前后的数值
:param x_j:需要差值的索引 :param x_j:需要差值的索引
''' '''
def newton(x, y, x_j): def newton(x, y, x_j):
""" """
牛顿差值多项式 牛顿差值多项式
@ -76,6 +83,8 @@ def newton(x, y, x_j):
:param is_fast:是否需要快速差值(无论前后是否是零值均采用);反之则一直找到不为0值的进行计算 :param is_fast:是否需要快速差值(无论前后是否是零值均采用);反之则一直找到不为0值的进行计算
:param k:取前后多少个数 :param k:取前后多少个数
''' '''
def ployinterp_columns(s, n, x_j, is_fast: bool = False, k=3): def ployinterp_columns(s, n, x_j, is_fast: bool = False, k=3):
X = [] X = []
Y = [] Y = []
@ -135,19 +144,52 @@ def execute():
def test(): def test():
before_data = pd.DataFrame()
cat_sale[:][cat_sale[:] == 0] = np.nan # 在索引比较的时候要转换成同一类型使用astype cat_sale[:][cat_sale[:] == 0] = np.nan # 在索引比较的时候要转换成同一类型使用astype
for j in range(len(cat_sale['num_gearbox_sumptemp'])): for j in range(len(cat_sale['num_gearbox_pumpoutletpress'])):
if (cat_sale['num_gearbox_sumptemp'].isnull())[j]: if (cat_sale['num_gearbox_pumpoutletpress'].isnull())[j]:
x_j = cat_sale.index[j] x_j = cat_sale.index[j]
cat_sale.loc[j,'num_gearbox_sumptemp'] = ployinterp_columns(cat_sale['num_gearbox_sumptemp'], j, x_j,is_fast=True) # cat_sale.loc[j,'num_gearbox_pumpoutletpress'] = ployinterp_columns(cat_sale['num_gearbox_pumpoutletpress'], j, x_j,is_fast=True)
a = ployinterp_columns(cat_sale['num_gearbox_pumpoutletpress'], j, x_j, is_fast=True)
if a > 10 or a <= 3:
a = 5
before_data.loc[j, 'num_gearbox_pumpoutletpress'] = a
# print('第{0}行牛顿插值为{1}'.format(j, cat_sale.loc[j,'num_gearbox_sumptemp'])) # print('第{0}行牛顿插值为{1}'.format(j, cat_sale.loc[j,'num_gearbox_sumptemp']))
else:
a = cat_sale.loc[j, 'num_gearbox_pumpoutletpress']
if a > 10 or a <= 3:
a = 5
before_data.loc[j, 'num_gearbox_pumpoutletpress'] = a
# cat_sale[:][cat_sale[:] == np.nan] = 0
return before_data['num_gearbox_pumpoutletpress'], cat_sale['num_gearbox_pumpoutletpress']
def plot(x_axis, original_data, restored_data):
import matplotlib.pyplot as plt
# 绘制原始数据和修复后的数据对比图
plt.figure(figsize=(3.2, 2.0))
plt.subplots_adjust(left=0.18, right=0.94, bottom=0.25, top=0.85, wspace=None, hspace=None)
plt.tight_layout()
plt.plot(x_axis, original_data, label='Original Data', color='blue')
plt.plot(x_axis, restored_data, label='Restored Data', color='red')
font = {'family': 'Times New Roman', 'weight': 'normal', 'size': 10}
plt.xlabel('Points', font)
plt.ylabel("Value", font)
plt.title('Original Data vs Restored Data', font)
plt.legend(loc=2, prop=font, handlelength=0.45, frameon=True, facecolor='w')
plt.savefig('NewtonInsert.png',dpi=600)
plt.show()
if __name__ == '__main__': if __name__ == '__main__':
start = timeit.default_timer() start = timeit.default_timer()
# execute() # execute()
test() restored_data, original_data = test()
for i in range(len(original_data)):
a = original_data[i]
if np.isnan(a):
original_data[i] = 0
plot([i for i in range(len(original_data))], original_data, restored_data)
end = timeit.default_timer() end = timeit.default_timer()
print('Running time: %s Seconds' % (end - start)) print('Running time: %s Seconds' % (end - start))
# 返回值是浮点数 # 返回值是浮点数

View File

@ -11,38 +11,28 @@
import numpy as np import numpy as np
import pandas as pd import pandas as pd
path = "G:\data\SCADA数据\jb4q_8.csv"
cat_sale = pd.read_csv(path)
print(cat_sale)
cat_sale = cat_sale.iloc[:20000, :]
print(cat_sale)
# cat_sale.drop('日期', axis=1, inplace=True)
# 过滤异常值,并置为空值
# cat_sale['销量'][(cat_sale['销量'] < 400) | (cat_sale['销量'] > 5000)] = np.NAN
# 将0值变成NAN 通过双中括号进行索引任意位置
# print(df['realtime'][1])
cat_sale[:][cat_sale[:] == 0] = np.nan # 在索引比较的时候要转换成同一类型使用astype
# 拉格朗日插值算法 # 拉格朗日插值算法
def LagrangeInterpolation(slices, x, k=5): def LagrangeInterpolation(X,Y, x):
# slices(series) :the defining points # slices(series) :the defining points
# k :the number of defining points of Lagrange poly 前后各k个值 # k :the number of defining points of Lagrange poly 前后各k个值
# slices index :the corresponding value on each defining point # slices index :the corresponding value on each defining point
# x :the point whose value we are interested # x :the point whose value we are interested
# print(slices[x]) # print(slices[x])
# print(np.isnan(slices[x])) # print(np.isnan(slices[x]))
result = 0 # later to save final result result = 0
X = []
Y = []
# 先取序列前后各k个不为空的值
index = x - 1
while len(X) < k and index >= 0:
if not np.isnan(slices[index]):
Y.append(slices[index])
X.append(index)
index -= 1
index = x + 1
X.reverse()
Y.reverse()
while len(X) < 2 * k and index <= len(slices):
if not np.isnan(slices[index]):
Y.append(slices[index])
X.append(index)
index += 1
# print(X)
# print(Y)
for j in range(len(X)): for j in range(len(X)):
# result_l 基函数 # result_l 基函数
result_l = 1 result_l = 1
@ -50,16 +40,100 @@ def LagrangeInterpolation(slices, x, k=5):
if i != j: if i != j:
result_l = result_l * (x - X[i]) / (X[j] - X[i]) result_l = result_l * (x - X[i]) / (X[j] - X[i])
# 取值 slices[j] # 取值 slices[j]
result = result + slices[j] * result_l result = result + Y[j] * result_l
return result return result
def ployinterp_columns(s, n, x_j, is_fast: bool = False, k=3):
X = []
Y = []
if is_fast:
# 如果最前面的值不够k个
if n < k or n > len(s) - k - 1:
y = s[[i for i in range(max(n - k, 0), n)] + [i for i in range(n + 1, min(n + k + 1, len(s)))]]
# 前后均有k个
else:
y = s[[i for i in range(n - k, n)] + [i for i in range(n + 1, n + k + 1)]] # 取空值处的前后5个数
y = y[y.notnull()] # 剔除空值
X = y.index
Y = list(y)
else:
# 先取序列前后各k个不为空的值
index = n - 1
while len(X) < k and index >= 0:
if not np.isnan(s[index]):
Y.append(s[index])
X.append(index)
index -= 1
index = n + 1
X.reverse()
Y.reverse()
while len(X) < 2 * k and index <= len(s):
if not np.isnan(s[index]):
Y.append(s[index])
X.append(index)
index += 1
# print(X)
# print(Y)
return LagrangeInterpolation(X,Y, x_j) # 插值并返回插值结果
def test():
before_data = pd.DataFrame()
cat_sale[:][cat_sale[:] == 0] = np.nan # 在索引比较的时候要转换成同一类型使用astype
for j in range(len(cat_sale['num_gearbox_pumpoutletpress'])):
if (cat_sale['num_gearbox_pumpoutletpress'].isnull())[j]:
x_j = cat_sale.index[j]
# cat_sale.loc[j,'num_gearbox_pumpoutletpress'] = ployinterp_columns(cat_sale['num_gearbox_pumpoutletpress'], j, x_j,is_fast=True)
a = ployinterp_columns(cat_sale['num_gearbox_pumpoutletpress'], j, x_j, is_fast=True)
if a > 10 or a <= 3:
a = 5
before_data.loc[j, 'num_gearbox_pumpoutletpress'] = a
# print('第{0}行牛顿插值为{1}'.format(j, cat_sale.loc[j,'num_gearbox_sumptemp']))
else:
a = cat_sale.loc[j, 'num_gearbox_pumpoutletpress']
if a > 10 or a <= 3:
a = 5
before_data.loc[j, 'num_gearbox_pumpoutletpress'] = a
# cat_sale[:][cat_sale[:] == np.nan] = 0
return before_data['num_gearbox_pumpoutletpress'], cat_sale['num_gearbox_pumpoutletpress']
def plot(x_axis, original_data, restored_data):
import matplotlib.pyplot as plt
# 绘制原始数据和修复后的数据对比图
plt.figure(figsize=(3.2, 2.0))
plt.subplots_adjust(left=0.18, right=0.94, bottom=0.25, top=0.85, wspace=None, hspace=None)
plt.tight_layout()
plt.plot(x_axis, original_data, label='Original Data', color='blue')
plt.plot(x_axis, restored_data, label='Restored Data', color='red')
font = {'family': 'Times New Roman', 'weight': 'normal', 'size': 10}
plt.xlabel('Points', font)
plt.ylabel("Value", font)
plt.title('Original Data vs Restored Data', font)
plt.legend(loc=2, prop=font, handlelength=0.45, frameon=True, facecolor='w')
plt.savefig('LagrangeInsert.png',dpi=600)
plt.show()
if __name__ == '__main__': if __name__ == '__main__':
restored_data, original_data = test()
for i in range(len(original_data)):
a = original_data[i]
if np.isnan(a):
original_data[i] = 0
plot([i for i in range(len(original_data))], original_data, restored_data)
path = "G:\data\SCADA数据\jb4q_8.csv" path = "G:\data\SCADA数据\jb4q_8.csv"
df = pd.read_csv(path) df = pd.read_csv(path)

View File

@ -48,15 +48,15 @@ class Transformer(Model):
# 接下来如果传入了representation_size就构建一个全连接层激活函数为tanh # 接下来如果传入了representation_size就构建一个全连接层激活函数为tanh
# 如果没有传入的话,就不做任何操作 # 如果没有传入的话,就不做任何操作
if representation_size: # if representation_size:
self.has_logits = True # self.has_logits = True
self.pre_logits = layers.Dense(representation_size, activation="tanh", name="pre_logits") # self.pre_logits = layers.Dense(representation_size, activation="tanh", name="pre_logits")
else: # else:
self.has_logits = False # self.has_logits = False
self.pre_logits = layers.Activation("linear") # self.pre_logits = layers.Activation("linear")
# 定义最后一个全连接层节点个数就是我们的分类个数num_classes # 定义最后一个全连接层节点个数就是我们的分类个数num_classes
self.head = layers.Dense(num_classes, name="head", kernel_initializer=initializers.Zeros()) # self.head = layers.Dense(num_classes, name="head", kernel_initializer=initializers.Zeros())
def get_config(self): def get_config(self):
# 自定义层里面的属性 # 自定义层里面的属性
@ -87,10 +87,10 @@ class Transformer(Model):
x = self.norm(x) x = self.norm(x)
# 这里是提取class_toke的输出然后用切片的方式而刚刚是将class_toke拼接在最前面的 # 这里是提取class_toke的输出然后用切片的方式而刚刚是将class_toke拼接在最前面的
# 所以这里用切片的方式去取class_toke的输出并将它传递给pre_logits # 所以这里用切片的方式去取class_toke的输出并将它传递给pre_logits
x = self.pre_logits(x[:, 0]) # x = self.pre_logits(x[:, 0])
# 最后传递给head # # 最后传递给head
x = self.head(x) # x = self.head(x)
# 为什么只用class_toke对应的输出而不用每一个patches对应的输出呢 # # 为什么只用class_toke对应的输出而不用每一个patches对应的输出呢
# 可以参考原文bird 网络 # 可以参考原文bird 网络
return x return x
@ -98,4 +98,5 @@ class Transformer(Model):
if __name__ == '__main__': if __name__ == '__main__':
# 使用方式 # 使用方式
input =tf.Variable(shape=[20, 10, 10])
Transformer(embed_dim=10, depth=8, num_heads=1, num_classes=10) Transformer(embed_dim=10, depth=8, num_heads=1, num_classes=10)

View File

@ -35,11 +35,10 @@ K = 18
namuda = 0.01 namuda = 0.01
'''保存名称''' '''保存名称'''
save_name = "../model/joint/{0}_timestamp{1}_feature{2}.h5".format(model_name, save_name = "../model/weight/{0}_timestamp{1}_feature{2}_weight/weight".format(model_name,
time_stamp, time_stamp,
feature_num, feature_num,
batch_size, )
EPOCH)
save_step_two_name = "../model/joint_two/{0}_timestamp{1}_feature{2}.h5".format(model_name, save_step_two_name = "../model/joint_two/{0}_timestamp{1}_feature{2}.h5".format(model_name,
time_stamp, time_stamp,
feature_num, feature_num,
@ -244,29 +243,44 @@ if __name__ == '__main__':
#### TODO 第一步训练 #### TODO 第一步训练
####### TODO 训练 ####### TODO 训练
model = Transformer(embed_dim=10, depth=5, num_heads=1, num_classes=10,representation_size=128) # model = Transformer(embed_dim=10, depth=5, num_heads=1, num_classes=10,representation_size=128)
checkpoint = tf.keras.callbacks.ModelCheckpoint( # checkpoint = tf.keras.callbacks.ModelCheckpoint(
filepath=save_name, # filepath=save_name,
monitor='val_loss', # monitor='val_loss',
verbose=2, # verbose=2,
save_best_only=True, # save_best_only=True,
mode='min') # mode='min')
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.001) # lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.001)
#
#
# model.build(input_shape=(batch_size, time_stamp, feature_num))
# model.summary()
# early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=3, mode='min', verbose=1)
#
# history = model.fit(train_data_healthy[:train_data_healthy.shape[0] // 7, :, :],
# train_label1_healthy[:train_label1_healthy.shape[0] // 7, ], epochs=5,
# batch_size=batch_size * 10, validation_split=0.2, shuffle=True, verbose=1,
# )
#
# model.save_weights(save_name)
model.compile(optimizer=tf.optimizers.Adam(), loss=tf.losses.mse) model = Transformer(embed_dim=10, depth=5, num_heads=1, num_classes=10, representation_size=128)
model.load_weights(save_name)
model.build(input_shape=(batch_size, time_stamp, feature_num)) model.build(input_shape=(batch_size, time_stamp, feature_num))
model.summary() model.summary()
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=3, mode='min', verbose=1)
history = model.fit(train_data_healthy[:train_data_healthy.shape[0] // 7, :, :],
train_label1_healthy[:train_label1_healthy.shape[0] // 7, ], epochs=EPOCH,
batch_size=batch_size * 10, validation_split=0.2, shuffle=True, verbose=1,
callbacks=[checkpoint, lr_scheduler, early_stop])
#
# #
# #
# #### TODO 测试 # #### TODO 测试
# model = tf.keras.models.load_model("E:\self_example\TensorFlow_eaxmple\Model_train_test\condition_monitoring\model\joint/transformer_timestamp120_feature10.h5"
# , custom_objects={'Transformer': Transformer}
# )
trained_data = tf.keras.models.Model(inputs=model.input, outputs=model.get_layer('encoder_norm').output).predict(train_data_healthy, batch_size=32)
print(trained_data)
# #
start = time.time() start = time.time()
# 中间写上代码块 # 中间写上代码块
@ -276,7 +290,7 @@ if __name__ == '__main__':
print("data_size:", train_data_healthy.shape) print("data_size:", train_data_healthy.shape)
print('Running time: %s Seconds' % (end - start)) print('Running time: %s Seconds' % (end - start))
trained_model = tf.keras.models.load_model(save_name, custom_objects={'Block': Block}) # trained_model = tf.keras.models.load_model(save_name, custom_objects={'Block': Block})
# #
# #
# #

View File

@ -0,0 +1,113 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/10/23 10:43
@Usage :
@Desc : 计算transformer的参数和时间复杂度 6层self_attention
'''
import tensorflow as tf
from tensorflow.keras import Model, layers, initializers
import numpy as np
from model.SelfAttention.SelfAttention import Block
class Transformer(Model):
# depth表示的是重复encoder block的次数num_heads表示的是在multi-head self-attention中head的个数
# MLP block中有一个Pre_logist,这里指的是当在较大的数据集上学习的时候Pre_logist就表示一个全连接层加上一个tanh激活函数
# 当在较小的数据集上学习的时候Pre_logist是没有的而这里的representation_size表示的就是Pre_logist中全连接层的节点个数
# num_classes表示分类的类数
def __init__(self, embed_dim=768,
depth=[], num_heads=12, qkv_bias=True, qk_scale=None,
drop_ratio=0., attn_drop_ratio=0., drop_path_ratio=0.,
representation_size=None, num_classes=1000, name="ViT-B/16"):
super(Transformer, self).__init__(name=name)
self.embed_dim = embed_dim
self.depth = depth
self.num_heads = num_heads
self.qkv_bias = qkv_bias
self.qk_scale = qk_scale
self.drop_ratio = drop_ratio
self.attn_drop_ratio = attn_drop_ratio
self.drop_path_ratio = drop_path_ratio
self.representation_size = representation_size
self.num_classes = num_classes
dpr = np.linspace(0., drop_path_ratio, len(depth)) # stochastic depth decay rule
# 用一个for循环重复Block模块
# 在用droppath时的drop_path_ratio是由0慢慢递增到我们所指定的drop_path_ratio的
# 所以我们在构建Block时这里的drop_path_ratio时变化的所以用 np.linspace方法创建一个等差数列来初始化drop_path_ratio
self.blocks = []
self.denses = []
for i,dim in zip(range(len(depth)),depth):
self.blocks.append(
Block(dim=dim, num_heads=num_heads, qkv_bias=qkv_bias,
qk_scale=qk_scale, drop_ratio=drop_ratio, attn_drop_ratio=attn_drop_ratio,
drop_path_ratio=dpr[i], name="encoderblock_{}".format(i))
)
self.denses.append(layers.Dense(dim))
# self.norm = layers.LayerNormalization(epsilon=1e-6, name="encoder_norm")
# 接下来如果传入了representation_size就构建一个全连接层激活函数为tanh
# 如果没有传入的话,就不做任何操作
# if representation_size:
# self.has_logits = True
# self.pre_logits = layers.Dense(representation_size, activation="tanh", name="pre_logits")
# else:
# self.has_logits = False
# self.pre_logits = layers.Activation("linear")
# 定义最后一个全连接层节点个数就是我们的分类个数num_classes
# self.head = layers.Dense(num_classes, name="head", kernel_initializer=initializers.Zeros())
def get_config(self):
# 自定义层里面的属性
config = (
{
'embed_dim': self.embed_dim,
'depth': self.depth,
'num_heads': self.num_heads,
'qkv_bias': self.qkv_bias,
'qk_scale': self.qk_scale,
'drop_ratio': self.drop_ratio,
'attn_drop_ratio': self.attn_drop_ratio,
'drop_path_ratio': self.drop_path_ratio,
'representation_size': self.representation_size,
'num_classes': self.num_classes
}
)
base_config = super(Transformer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def call(self, inputs, training=None):
# [B, H, W, C] -> [B, num_patches, embed_dim]
x = inputs # [B, 196, 768]
for (block, dense) in zip(self.blocks, self.denses):
x = dense(x)
x = block(x, training=training)
# x = self.norm(x)
# # 这里是提取class_toke的输出然后用切片的方式而刚刚是将class_toke拼接在最前面的
# # 所以这里用切片的方式去取class_toke的输出并将它传递给pre_logits
# x = self.pre_logits(x[:, 0])
# # 最后传递给head
# x = self.head(x)
# 为什么只用class_toke对应的输出而不用每一个patches对应的输出呢
# 可以参考原文bird 网络
return x
if __name__ == '__main__':
# 使用方式
# input =tf.Variable(shape=[20, 10, 10])
# Transformer(embed_dim=10, depth=8, num_heads=1, num_classes=10)
print([i for i in range(5, 1, -1)])

View File

@ -0,0 +1,23 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/16 15:34
@Usage :
@Desc :
'''
import logging
import time
logging.basicConfig(format='%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s',
level=logging.INFO,
filename='E:\self_example\TensorFlow_eaxmple\Model_train_test/temp/test.log',
filemode='a')
i = 0
while True:
logging.info(i)
time.sleep(2)

View File

@ -0,0 +1,5 @@
@echo off
if "%1" == "h" goto begin
mshta vbscript:createobject("wscript.shell").run("%~nx0 h",0)(window.close)&&exit
:begin
D:\ProgramData\Anaconda3\envs\tensorflow\python.exe loggerTest.py

View File

@ -0,0 +1,303 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/10/23 14:23
@Usage :
@Desc :
'''
import tensorflow as tf
import tensorflow.keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from condition_monitoring.data_deal import loadData_daban as loadData
# from model.Joint_Monitoring.Joint_Monitoring_banda import Joint_Monitoring
# from model.CommonFunction.CommonFunction import *
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model, save_model
from temp.Transformer import Transformer
from model.SelfAttention.SelfAttention import Block
from keras.callbacks import EarlyStopping
import time
'''超参数设置'''
time_stamp = 120
feature_num = 10
batch_size = 32
learning_rate = 0.001
EPOCH = 101
model_name = "transformer"
'''EWMA超参数'''
K = 18
namuda = 0.01
'''保存名称'''
save_name = "../model/weight/{0}_timestamp{1}_feature{2}_weight/weight".format(model_name,
time_stamp,
feature_num,
)
save_step_two_name = "../model/joint_two/{0}_timestamp{1}_feature{2}.h5".format(model_name,
time_stamp,
feature_num,
batch_size,
EPOCH)
save_mse_name = r"./compare/mse/JM_banda/{0}_result.csv".format(model_name)
'''文件名'''
file_name = "G:\data\SCADA数据\SCADA_已处理_粤水电达坂城2020.1月-5月\风机15.csv"
'''
文件说明jb4q_8_delete_total_zero.csv是删除了只删除了全是0的列的文件
文件从0:96748行均是正常值(2019/12.30 00:00:00 - 2020/3/11 05:58:00)
从96748:107116行均是异常值(2020/3/11 05:58:01 - 2021/3/18 11:04:00)
'''
'''文件参数'''
# 最后正常的时间点
healthy_date = 96748
# 最后异常的时间点
unhealthy_date = 107116
# 异常容忍程度
unhealthy_patience = 5
def remove(data, time_stamp=time_stamp):
rows, cols = data.shape
print("remove_data.shape:", data.shape)
num = int(rows / time_stamp)
return data[:num * time_stamp, :]
pass
# 不重叠采样
def get_training_data(data, time_stamp: int = time_stamp):
removed_data = remove(data=data)
rows, cols = removed_data.shape
print("removed_data.shape:", data.shape)
print("removed_data:", removed_data)
train_data = np.reshape(removed_data, [-1, time_stamp, cols])
print("train_data:", train_data)
batchs, time_stamp, cols = train_data.shape
for i in range(1, batchs):
each_label = np.expand_dims(train_data[i, 0, :], axis=0)
if i == 1:
train_label = each_label
else:
train_label = np.concatenate([train_label, each_label], axis=0)
print("train_data.shape:", train_data.shape)
print("train_label.shape", train_label.shape)
return train_data[:-1, :], train_label
# 重叠采样
def get_training_data_overlapping(data, time_stamp: int = time_stamp, is_Healthy: bool = True):
rows, cols = data.shape
train_data = np.empty(shape=[rows - time_stamp - 1, time_stamp, cols])
train_label = np.empty(shape=[rows - time_stamp - 1, cols])
for i in range(rows):
if i + time_stamp >= rows:
break
if i + time_stamp < rows - 1:
train_data[i] = data[i:i + time_stamp]
train_label[i] = data[i + time_stamp]
print("重叠采样以后:")
print("data:", train_data) # (300334,120,10)
print("label:", train_label) # (300334,10)
if is_Healthy:
train_label2 = np.ones(shape=[train_label.shape[0]])
else:
train_label2 = np.zeros(shape=[train_label.shape[0]])
print("label2:", train_label2)
return train_data, train_label, train_label2
# 归一化
def normalization(data):
rows, cols = data.shape
print("归一化之前:", data)
print(data.shape)
print("======================")
# 归一化
max = np.max(data, axis=0)
max = np.broadcast_to(max, [rows, cols])
min = np.min(data, axis=0)
min = np.broadcast_to(min, [rows, cols])
data = (data - min) / (max - min)
print("归一化之后:", data)
print(data.shape)
return data
# 正则化
def Regularization(data):
rows, cols = data.shape
print("正则化之前:", data)
print(data.shape)
print("======================")
# 正则化
mean = np.mean(data, axis=0)
mean = np.broadcast_to(mean, shape=[rows, cols])
dst = np.sqrt(np.var(data, axis=0))
dst = np.broadcast_to(dst, shape=[rows, cols])
data = (data - mean) / dst
print("正则化之后:", data)
print(data.shape)
return data
pass
def EWMA(data, K=K, namuda=namuda):
# t是啥暂时未知
t = 0
mid = np.mean(data, axis=0)
standard = np.sqrt(np.var(data, axis=0))
UCL = mid + K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
LCL = mid - K * standard * np.sqrt(namuda / (2 - namuda) * (1 - (1 - namuda) ** 2 * t))
return mid, UCL, LCL
pass
def get_MSE(data, label, new_model):
predicted_data = new_model.predict(data)
temp = np.abs(predicted_data - label)
temp1 = (temp - np.broadcast_to(np.mean(temp, axis=0), shape=predicted_data.shape))
temp2 = np.broadcast_to(np.sqrt(np.var(temp, axis=0)), shape=predicted_data.shape)
temp3 = temp1 / temp2
mse = np.sum((temp1 / temp2) ** 2, axis=1)
print("z:", mse)
print(mse.shape)
# mse=np.mean((predicted_data-label)**2,axis=1)
print("mse", mse)
dims, = mse.shape
mean = np.mean(mse)
std = np.sqrt(np.var(mse))
max = mean + 3 * std
# min = mean-3*std
max = np.broadcast_to(max, shape=[dims, ])
# min = np.broadcast_to(min,shape=[dims,])
mean = np.broadcast_to(mean, shape=[dims, ])
# plt.plot(max)
# plt.plot(mse)
# plt.plot(mean)
# # plt.plot(min)
# plt.show()
#
#
return mse, mean, max
# pass
def condition_monitoring_model():
input = tf.keras.Input(shape=[time_stamp, feature_num])
conv1 = tf.keras.layers.Conv1D(filters=256, kernel_size=1)(input)
GRU1 = tf.keras.layers.GRU(128, return_sequences=False)(conv1)
d1 = tf.keras.layers.Dense(300)(GRU1)
output = tf.keras.layers.Dense(10)(d1)
model = tf.keras.Model(inputs=input, outputs=output)
return model
def test(step_one_model, step_two_model, test_data, test_label1, test_label2):
history_loss = []
history_val_loss = []
val_loss, val_accuracy = step_two_model.get_val_loss(val_data=test_data, val_label1=test_label1,
val_label2=test_label2,
is_first_time=False, step_one_model=step_one_model)
history_val_loss.append(val_loss)
print("val_accuracy:", val_accuracy)
print("val_loss:", val_loss)
if __name__ == '__main__':
total_data = loadData.execute(N=feature_num, file_name=file_name)
total_data = normalization(data=total_data)
train_data_healthy, train_label1_healthy, train_label2_healthy = get_training_data_overlapping(
total_data[:healthy_date, :], is_Healthy=True)
train_data_unhealthy, train_label1_unhealthy, train_label2_unhealthy = get_training_data_overlapping(
total_data[healthy_date - time_stamp + unhealthy_patience:unhealthy_date, :],
is_Healthy=False)
#### TODO 第一步训练
####### TODO 训练
# model = Transformer(embed_dim=10, depth=[100,200,300,100,1], num_heads=1, num_classes=1,representation_size=128)
# checkpoint = tf.keras.callbacks.ModelCheckpoint(
# filepath=save_name,
# monitor='val_loss',
# verbose=2,
# save_best_only=True,
# save_weights_only=True,
# mode='min')
# lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.001)
#
#
# model.build(input_shape=(batch_size, time_stamp, feature_num))
# model.summary()
# model.compile(optimizer=tf.optimizers.Adam(learning_rate=learning_rate), loss=tf.losses.mse)
# early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=3, mode='min', verbose=1)
#
# history = model.fit(train_data_healthy[:train_data_healthy.shape[0] // 7, :, :],
# train_data_healthy[:train_data_healthy.shape[0] // 7, :, 0], epochs=50,
# batch_size=batch_size * 10, validation_split=0.2, shuffle=True, verbose=1,
# callbacks=[checkpoint,lr_scheduler,early_stop]
# )
# #
# model.save_weights(save_name)
model = Transformer(embed_dim=10, depth=[100,200,300,100,1], num_heads=1, num_classes=1, representation_size=128)
model.load_weights("../model/weight/transformer_timestamp120_feature10_epoch16_weight_0.000087/weight")
# model.build(input_shape=(batch_size, time_stamp, feature_num))
# model.summary()
#
#
# #### TODO 测试
trained_data = model.predict(train_data_healthy[:train_data_healthy.shape[0] // 7, :, :], batch_size=32)
print(trained_data)
print(trained_data.shape)
plt.plot(trained_data[:,-1,:])
plt.show()
#
start = time.time()
# 中间写上代码块
model.predict(train_data_healthy, batch_size=32)
end = time.time()
print("data_size:", train_data_healthy.shape)
print('Running time: %s Seconds' % (end - start))
# trained_model = tf.keras.models.load_model(save_name, custom_objects={'Block': Block})
#
#
#
# # 使用已知的点进行预测
#
# pass

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/21 16:42
@Usage :
@Desc :
'''

View File

@ -0,0 +1,44 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/21 16:44
@Usage :
@Desc :
'''
import pandas as pd
import numpy as np
import os
root_path ='G:\data\cmd_predict_data\shaft2/46\shaft2'
L = []
for root, dir, filename in os.walk(root_path):
for file in filename:
if os.path.splitext(file)[1] == '.csv':
L.append(os.path.join(root_path,file))
a = os.path.join(root_path, file)
b = a.split(".")[1].split("_")
z = 0
L.sort(key=lambda x:x.split(".")[1].split("_")[1])
for filename in L:
print("读取了{0}个文件".format(z+1))
# data_single = pd.read_csv(filename, dtype=np.float32, header=None)
data_single = np.loadtxt(filename, delimiter=',',dtype=np.str)
# data_single = data_single.iloc[0, :].values
if z == 0:
# data_all=data_single
HI_data = data_single
else:
# data_all=np.hstack([data_all,data_single])
HI_data = np.vstack([HI_data, data_single])
z += 1
print(z)
# print(data_all.shape)
print(HI_data.shape)
np.save("data1.npy",HI_data)
# io.savemat("./HI_data.mat", {'HI_data': HI_data})

View File

@ -0,0 +1,24 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/22 20:47
@Usage :
@Desc :
'''
import numpy as np
import matplotlib.pyplot as plt
data = np.load("../data/data.npy")
print(data)
print(data.shape)
# data = np.transpose(data, axes=[1, 0])
data = np.reshape(data[317:517,:], [-1, 1])
plt.plot(data)
plt.show()
# data = np.reshape(data,[-1,8192])
# np.save("HI_data.npy",data)
# print(data.shape)

View File

@ -0,0 +1,155 @@
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# 数据导入
data = np.load("../data/HI_data.npy")
print(data.shape) # (2803, 2560)
(samples, dims) = data.shape
# # 24个指标建立
fs = 25.6 * 1000
T1 = np.mean(data, axis=1)
print(T1.shape)
#
T2 = np.sqrt(np.var(data, axis=1))
T3 = np.mean(np.sqrt(np.abs(data)), axis=1) ** 2
T4 = np.sqrt(np.mean(data ** 2, axis=1))
T5 = np.max(np.abs(data), axis=1)
T6 = np.mean((data - np.broadcast_to(np.expand_dims(T1, axis=1), (samples, dims))) ** 3, axis=1) / (T4 ** 3)
T7 = np.mean((data - np.broadcast_to(np.expand_dims(T1, axis=1), (samples, dims))) ** 4, axis=1) / (T4 ** 4)
T8 = T5 / T4
T9 = T5 / T3
T10 = T4 / np.mean(np.abs(data), axis=1)
T11 = T5 / np.mean(np.abs(data), axis=1)
# 频域
sk = np.abs(np.fft.rfft(data, axis=1) * 2 / dims)
sk = sk[:, 0:-1] # (2803,1280)
(samples, k) = sk.shape # (2803,1280)
print("data:", data)
print("sk:", sk)
fk = np.empty(shape=[samples, k])
for sample in range(samples):
for i in range(k):
fk[sample][i] = (fs / dims) * (i + 1)
# print(fk)
# print(fk.shape)
# plt.plot(sk[1,:])
# plt.xlim((0,k))
# plt.ylim((0,1.5))
# plt.show()
# print(sk.shape)
F1 = np.mean(sk, axis=1)
F2 = np.var(sk, axis=1) * k / (k - 1)
F3 = np.mean((sk - np.broadcast_to(np.expand_dims(F1, axis=1), (samples, k))) ** 3, axis=1) / (np.sqrt(F2) ** 3)
F4 = np.mean((sk - np.broadcast_to(np.expand_dims(F1, axis=1), (samples, k))) ** 4, axis=1) / (F2 ** 2)
F5 = np.sum(np.multiply(fk, sk), axis=1) / np.sum(sk, axis=1)
F6 = np.sqrt(np.mean(np.multiply((fk - np.broadcast_to(np.expand_dims(F5, axis=1), (samples, k))) ** 2, sk), axis=1))
F7 = np.sqrt(np.sum(np.multiply(fk ** 2, sk), axis=1) / np.sum(sk, axis=1))
F8 = np.sqrt(np.sum(np.multiply(fk ** 4, sk), axis=1) / np.sum(fk ** 2 * sk, axis=1))
F9 = np.sum(np.multiply(fk ** 2, sk), axis=1) / np.sqrt(np.sum(sk, axis=1) * np.sum(np.multiply(fk ** 4, sk), axis=1))
F10 = F6 / F5
F11 = np.mean(np.multiply((fk - np.broadcast_to(np.expand_dims(F5, axis=1), (samples, k))) ** 3, sk), axis=1) / (
F6 ** 3)
F12 = np.mean(np.multiply((fk - np.broadcast_to(np.expand_dims(F5, axis=1), (samples, k))) ** 4, sk), axis=1) / (
F6 ** 4)
F13 = np.mean(np.sqrt(np.abs(fk - np.broadcast_to(np.expand_dims(F5, axis=1), (samples, k)))) * sk, axis=1) / np.sqrt(
F6)
# 归一化处理
T1 = (T1 - np.min(T1)) / (np.max(T1) - np.min(T1))
T2 = (T2 - np.min(T2)) / (np.max(T2) - np.min(T2))
T3 = (T3 - np.min(T3)) / (np.max(T3) - np.min(T3))
T4 = (T4 - np.min(T4)) / (np.max(T4) - np.min(T4))
T5 = (T5 - np.min(T5)) / (np.max(T5) - np.min(T5))
T6 = (T6 - np.min(T6)) / (np.max(T6) - np.min(T6))
T7 = (T7 - np.min(T7)) / (np.max(T7) - np.min(T7))
T8 = (T8 - np.min(T8)) / (np.max(T8) - np.min(T8))
T9 = (T9 - np.min(T9)) / (np.max(T9) - np.min(T9))
T10 = (T10 - np.min(T10)) / (np.max(T10) - np.min(T10))
T11 = (T11 - np.min(T11)) / (np.max(T11) - np.min(T11))
F1 = (F1 - np.min(F1)) / (np.max(F1) - np.min(F1))
F2 = (F2 - np.min(F2)) / (np.max(F2) - np.min(F2))
F3 = (F3 - np.min(F3)) / (np.max(F3) - np.min(F3))
F4 = (F4 - np.min(F4)) / (np.max(F4) - np.min(F4))
F5 = (F5 - np.min(F5)) / (np.max(F5) - np.min(F5))
F6 = (F6 - np.min(F6)) / (np.max(F6) - np.min(F6))
F7 = (F7 - np.min(F7)) / (np.max(F7) - np.min(F7))
F8 = (F8 - np.min(F8)) / (np.max(F8) - np.min(F8))
F9 = (F9 - np.min(F9)) / (np.max(F9) - np.min(F9))
F10 = (F10 - np.min(F10)) / (np.max(F10) - np.min(F10))
F11 = (F11 - np.min(F11)) / (np.max(F11) - np.min(F11))
F12 = (F12 - np.min(F12)) / (np.max(F12) - np.min(F12))
F13 = (F13 - np.min(F13)) / (np.max(F13) - np.min(F13))
print(F5)
# plt.plot(F5)
# plt.show()
def plot(data):
l, c = data.shape
for i in range(c):
plt.figure(i + 1)
plt.plot(data[:, i])
plt.show()
if __name__ == '__main__':
T1 = np.expand_dims(T1, axis=1)
T2 = np.expand_dims(T2, axis=1)
T3 = np.expand_dims(T3, axis=1)
T4 = np.expand_dims(T4, axis=1)
T5 = np.expand_dims(T5, axis=1)
T6 = np.expand_dims(T6, axis=1)
T7 = np.expand_dims(T7, axis=1)
T8 = np.expand_dims(T8, axis=1)
T9 = np.expand_dims(T9, axis=1)
T10 = np.expand_dims(T10, axis=1)
T11 = np.expand_dims(T11, axis=1)
F1 = np.expand_dims(F1, axis=1)
F2 = np.expand_dims(F2, axis=1)
F3 = np.expand_dims(F3, axis=1)
F4 = np.expand_dims(F4, axis=1)
F5 = np.expand_dims(F5, axis=1)
F6 = np.expand_dims(F6, axis=1)
F7 = np.expand_dims(F7, axis=1)
F8 = np.expand_dims(F8, axis=1)
F9 = np.expand_dims(F9, axis=1)
F10 = np.expand_dims(F10, axis=1)
F11 = np.expand_dims(F11, axis=1)
F12 = np.expand_dims(F12, axis=1)
F13 = np.expand_dims(F13, axis=1)
feature_data = tf.concat(
[T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13], axis=1)
# plot(feature_data)
np.save('../data/feature_data.npy', feature_data)
print(feature_data.shape)
# print(HI_data.shape)
# np.save("../data/HI_DATA/HIed_data.npy",HI_data)

View File

@ -0,0 +1,98 @@
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# 数据导入
feature_data = np.load("../data/feature_data.npy")
print(feature_data.shape) # (2803,24)
feature_data = np.transpose(feature_data, [1, 0])
print(feature_data.shape) # (24,2803)
# data.shape:(24,2803)
class HI_select():
def __init__(self, data):
self.data = data
def getScore(self):
score = (self.getTred() + self.getMon() + self.getScale()) / 2
print(score.shape)
return score
def getTred(self):
h = self.data
(features, dims) = h.shape
h_mean = np.mean(h, axis=1)
tk = np.broadcast_to(np.expand_dims(np.arange(dims), axis=0), (features, dims)) # (24,2803)
tk_mean = np.mean(tk, axis=1)
tred = np.abs(np.sum(np.multiply((h - np.broadcast_to(np.expand_dims(h_mean, axis=1), (features, dims))),
(tk - np.broadcast_to(np.expand_dims(tk_mean, axis=1), (features, dims)))),
axis=1)) / np.sqrt(
np.sum((h - np.broadcast_to(np.expand_dims(h_mean, axis=1), (features, dims))) ** 2, axis=1) * np.sum(
(tk - np.broadcast_to(np.expand_dims(tk_mean, axis=1), (features, dims))) ** 2, axis=1))
# print(tred)
tred = np.expand_dims(tred, axis=1)
# print("tred.shape:", tred.shape)
return tred
# 单调性
def getMon(self):
h = self.data
(features, dims) = h.shape
mon = np.empty(shape=[24, 1])
for feature in range(features):
positive = 0
negative = 0
for dim in range(dims):
if dim + 1 >= dims:
break
if h[feature][dim + 1] - h[feature][dim] > 0:
positive += 1
if h[feature][dim + 1] - h[feature][dim] < 0:
negative += 1
# print("positive:",positive)
# print("negetive:",negative)
mon[feature] = np.abs((positive - negative) / (dims - 1))
# print(mon[feature])
# print(mon)
# print("mon.shape",mon.shape)
return mon
# 尺度相似性
def getScale(self):
scale = np.zeros(shape=[24, 1])
return scale
if __name__ == '__main__':
scores = HI_select(feature_data).getScore()
(feature, score) = scores.shape
scores = np.ravel(scores)
print(scores.shape)
# 归一化处理
# scores = (scores - np.min(scores)) / (np.max(scores) - np.min(scores))
# score图
plt.bar(range(feature),scores,color=['r','g','b','c','m','y'])
plt.show()
# 获取前9个最大值的索引
# print(scores)
# indexs = np.argpartition(scores, -12)[-12:] # [ 1 23 16 9 19 20 2 22 18] 自选【1,2,3,11,20,23】 备选【9,16,18】
# print(indexs)
# # 选出所需的data
# Selected_data = []
# feature_data = np.transpose(feature_data, [1, 0]) # (2803,24)
# z = 0
# for index in indexs:
# if z == 0:
# Selected_data = feature_data[:, index]
# else:
# Selected_data = np.vstack([Selected_data, feature_data[:, index]])
# z += 1
# Selected_data=np.transpose(Selected_data,[1,0]) #(2803,9)
# # np.save("Select_data.npy",Selected_data)
# print(Selected_data.shape)

View File

@ -0,0 +1,22 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/23 13:43
@Usage :
@Desc :
'''
import numpy as np
import matplotlib.pyplot as plt
data = np.load("HI_merge_data1.npy")
print(data)
data= data[:,1]
plt.plot(data)
plt.show()

View File

@ -0,0 +1,82 @@
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.callbacks import EarlyStopping
# 数据读取
# train_data = np.load("Select_data.npy")
# print(train_data.shape)
feature_data = np.load("../data/feature_data.npy")
print(feature_data.shape) # (2803,24)
indexs=[4,11,16,20,23]
z = 0
for index in indexs:
if z == 0:
Selected_data = feature_data[:, index]
else:
Selected_data = np.vstack([Selected_data, feature_data[:, index]])
z += 1
Selected_data = np.transpose(Selected_data, [1, 0]) # (2803,9)
plt.plot(Selected_data)
plt.show()
train_data=Selected_data[1500:-1,:]
print(train_data.shape)
plt.plot(train_data)
plt.show()
# 建立模型
class model():
def __init__(self, input_shape=9):
self.input_shape = input_shape
pass
def getModel(self, model_Type='ae'):
if model_Type == 'ae':
model = self.AE_model()
return model
else:
raise ValueError("模型尚未实现")
def AE_model(self):
input = tf.keras.Input(shape=self.input_shape)
d1 = tf.keras.layers.Dense(4)(input)
# d2 = tf.keras.layers.Dense(2, activation='relu')(d1)
d3 = tf.keras.layers.Dense(2, name='mid', activation='relu')(d1)
# d4 = tf.keras.layers.Dense(2, activation='relu')(d3)
d5 = tf.keras.layers.Dense(4)(d3)
d6 = tf.keras.layers.Dense(self.input_shape)(d5)
model = tf.keras.Model(inputs=input, outputs=d6)
return model
# HI指标训练和合成
if __name__ == '__main__':
model = model(input_shape=5).getModel(model_Type='ae')
model.compile(optimizer=tf.optimizers.Adam(0.001), loss=tf.losses.mse, metrics=['acc'])
# model.summary()
checkpoint = tf.keras.callbacks.ModelCheckpoint(
filepath="AE_model.h5",
monitor='val_loss',
verbose=2,
save_best_only=True,
mode='min')
lr_scheduler = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=10, min_lr=0.0001)
model.compile(optimizer=tf.optimizers.SGD(), loss=tf.losses.mse)
# model.compile(optimizer=tf.optimizers.SGD(learning_rate=0.001), loss=FTMSE())
model.summary()
early_stop = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=20, mode='min', verbose=1)
history = model.fit(train_data, train_data, epochs=1000, batch_size=100)
HI_merge_data = tf.keras.models.Model(inputs=model.input, outputs=model.get_layer('mid').output).predict(train_data)
print(HI_merge_data)
acc = np.array(history.history.get('acc'))
# if acc[299] > 0.9:
np.save("HI_merge_data1.npy", HI_merge_data)
model.save("AE_model.h5")

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/21 16:41
@Usage :
@Desc :
'''

View File

@ -0,0 +1,143 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/10 15:57
@Usage :
@Desc :
'''
import os
import shutil
import numpy as np
import pandas as pd
def folderGenerate(folder_name):
if not os.path.exists(folder_name):
os.makedirs(folder_name)
# os.mkdir(folder_name)
# 递归删除文件夹
def folderDelete(folder_name):
if os.path.exists(folder_name):
shutil.rmtree(folder_name)
# 判断这次是否进行模型保存,history_loss存储历史上的loss
def SaveBestModel(model, save_name, history_loss, loss_value, pattern: str = "min", epoch=0, is_all=False):
weight_folder = save_name[:-4]
if is_all:
weight_folder = weight_folder + '_epoch' + str(epoch) + "_" + str(loss_value)
save_name = weight_folder + save_name[-7:]
# 如果history_loss为空,那么直接保存
if len(history_loss) == 0:
folderGenerate(weight_folder)
model.save_weights(save_name)
return
if pattern == "min":
# 先判断要不要存模型,如果上一次的比这一次的loss要大,就保存这一次的
if np.min(history_loss) > loss_value:
# 删除上一次的保存这一次的
folderDelete(weight_folder)
folderGenerate(weight_folder)
model.save_weights(save_name)
print("保存这次模型")
return
elif pattern == "max":
# 先判断要不要存模型,如果上一次的比这一次的loss要大,就保存这一次的
if np.max(history_loss) < loss_value:
# 删除上一次的保存这一次的
folderDelete(weight_folder)
folderGenerate(weight_folder)
model.save_weights(save_name)
print("保存这次模型")
return
else:
raise ValueError("算法尚未实现")
pass
# 判断这次是否进行模型保存,history_loss存储历史上的loss
def SaveBestModelByAccuracy(model, save_name, history_accuracy, accuracy_value):
weight_folder = save_name[:-7]
# 如果history_loss为空,那么直接保存
if len(history_accuracy) == 0:
folderGenerate(weight_folder)
model.save_weights(save_name)
return
# 先判断要不要存模型,如果上一次的比这一次的loss要大,就保存这一次的
if np.max(history_accuracy) < accuracy_value:
# 删除上一次的保存这一次的
folderDelete(weight_folder)
folderGenerate(weight_folder)
model.save_weights(save_name)
print("保存这次模型")
return
pass
# 判断这次是否进行模型保存,history_loss存储历史上的loss
def SaveBestH5Model(model, save_name, history_loss, loss_value):
dirpath = os.path.dirname(save_name)
folderGenerate(dirpath)
# 如果history_loss为空,那么直接保存
if len(history_loss) == 0:
model.save(save_name)
return
# 先判断要不要存模型,如果上一次的比这一次的loss要大,就保存这一次的
if np.min(history_loss) > loss_value:
# 删除上一次的保存这一次的
model.save(save_name, overwrite=True)
print("保存这次模型")
return
pass
def IsStopTraining(history_loss, patience=5, pattern: str = "min"):
if len(history_loss) <= patience:
return False
if pattern == "min":
if history_loss[-(patience + 1)] < min(history_loss[-patience:]):
print(patience, "次loss未下降,训练停止")
return True
elif pattern == "max":
if history_loss[-(patience + 1)] > max(history_loss[-patience:]):
print(patience, "次准确率为上升,训练停止")
return True
else:
raise ValueError("算法尚未实现")
return False
def Is_Reduce_learning_rate(history_loss, patience=3, pattern: str = "min"):
if len(history_loss) <= patience:
return False
if pattern == "min":
for i in range(patience):
if history_loss[-(patience + 1)] > history_loss[-i]:
return False
elif pattern == "max":
for i in range(patience):
if history_loss[-(patience + 1)] < history_loss[-i]:
return False
else:
raise ValueError("算法尚未实现")
print(patience, "次loss未下降,降低学习率")
return True
if __name__ == '__main__':
history_loss = [0.1, 0.2, 0.3, 0.25, 0.42, 0.12, 0.31]
IsStopTraining(history_loss)

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/10 13:00
@Usage :
@Desc :
'''

View File

@ -0,0 +1,114 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/10 13:00
@Usage :
@Desc : 构建一些即插即用的channelAttention
'''
import torch.nn as nn
import math
import numpy as np
import torch
import torch_dct as dct
try:
from torch import irfft
from torch import rfft
except ImportError:
def rfft(x, d):
t = torch.fft.fft(x, dim=(-d))
r = torch.stack((t.real, t.imag), -1)
return r
def irfft(x, d):
t = torch.fft.ifft(torch.complex(x[:, :, 0], x[:, :, 1]), dim=(-d))
return t.real
# def dct(x, norm=None):
# """
# Discrete Cosine Transform, Type II (a.k.a. the DCT)
#
# For the meaning of the parameter `norm`, see:
# https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html
#
# :param x: the input signal
# :param norm: the normalization, None or 'ortho'
# :return: the DCT-II of the signal over the last dimension
# """
# x_shape = x.shape
# N = x_shape[-1]
# x = x.contiguous().view(-1, N)
#
# v = torch.cat([x[:, ::2], x[:, 1::2].flip([1])], dim=1)
#
# # Vc = torch.fft.rfft(v, 1, onesided=False)
# Vc = rfft(v, 1)
#
# k = - torch.arange(N, dtype=x.dtype, device=x.device)[None, :] * np.pi / (2 * N)
# W_r = torch.cos(k)
# W_i = torch.sin(k)
#
# V = Vc[:, :, 0] * W_r - Vc[:, :, 1] * W_i
#
# if norm == 'ortho':
# V[:, 0] /= np.sqrt(N) * 2
# V[:, 1:] /= np.sqrt(N / 2) * 2
#
# V = 2 * V.view(*x_shape)
#
# return V
class dct_channel_block(nn.Module):
def __init__(self, channel):
super(dct_channel_block, self).__init__()
# self.avg_pool = nn.AdaptiveAvgPool1d(1) #innovation
self.fc = nn.Sequential(
nn.Linear(channel, channel * 2, bias=False),
nn.Dropout(p=0.1),
nn.ReLU(inplace=True),
nn.Linear(channel * 2, channel, bias=False),
nn.Sigmoid()
)
# self.dct_norm = nn.LayerNorm([512], eps=1e-6)
self.dct_norm = nn.LayerNorm([channel], eps=1e-6) # for lstm on length-wise
# self.dct_norm = nn.LayerNorm([36], eps=1e-6)#for lstm on length-wise on ill with input =36
def forward(self, x):
b, c = x.size() # (B,C,L) (32,96,512)
# list = []
# for i in range(c):
# freq = dct.dct(x[:, :, i])
# list.append(freq)
#
# stack_dct = torch.stack(list, dim=2)
# change = x.transpose(2, 1)
stack_dct = dct.dct(x,norm='ortho')
# stack_dct = stack_dct.transpose(2, 1)
# stack_dct = torch.tensor(stack_dct)
'''
for traffic mission:f_weight = self.dct_norm(f_weight.permute(0,2,1))#matters for traffic datasets
'''
lr_weight = self.dct_norm(stack_dct)
lr_weight = self.fc(stack_dct)
lr_weight = self.dct_norm(lr_weight)
# print("lr_weight",lr_weight.shape)
return x * lr_weight # result
if __name__ == '__main__':
# input_data = torch.Tensor([[1, 2, 3], [4, 5, 6]]) # [2, 3]
x = torch.rand((32, 10, 64))
print(x.shape)
m = nn.Linear(64, 2)
output = m(x)
print(output.shape) # [2, 2]

View File

@ -0,0 +1,159 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/10 13:00
@Usage :
@Desc : 构建一些即插即用的channelAttention
'''
import torch.nn as nn
import math
import numpy as np
import torch
import torch_dct as dct
try:
from torch import irfft
from torch import rfft
except ImportError:
def rfft(x, d):
t = torch.fft.fft(x, dim=(-d))
r = torch.stack((t.real, t.imag), -1)
return r
def irfft(x, d):
t = torch.fft.ifft(torch.complex(x[:, :, 0], x[:, :, 1]), dim=(-d))
return t.real
# def dct(x, norm=None):
# """
# Discrete Cosine Transform, Type II (a.k.a. the DCT)
#
# For the meaning of the parameter `norm`, see:
# https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.fftpack.dct.html
#
# :param x: the input signal
# :param norm: the normalization, None or 'ortho'
# :return: the DCT-II of the signal over the last dimension
# """
# x_shape = x.shape
# N = x_shape[-1]
# x = x.contiguous().view(-1, N)
#
# v = torch.cat([x[:, ::2], x[:, 1::2].flip([1])], dim=1)
#
# # Vc = torch.fft.rfft(v, 1, onesided=False)
# Vc = rfft(v, 1)
#
# k = - torch.arange(N, dtype=x.dtype, device=x.device)[None, :] * np.pi / (2 * N)
# W_r = torch.cos(k)
# W_i = torch.sin(k)
#
# V = Vc[:, :, 0] * W_r - Vc[:, :, 1] * W_i
#
# if norm == 'ortho':
# V[:, 0] /= np.sqrt(N) * 2
# V[:, 1:] /= np.sqrt(N / 2) * 2
#
# V = 2 * V.view(*x_shape)
#
# return V
class dct_channel_block(nn.Module):
def __init__(self, channel):
super(dct_channel_block, self).__init__()
# self.avg_pool = nn.AdaptiveAvgPool1d(1) #innovation
self.fc = nn.Sequential(
nn.Linear(channel, channel * 2, bias=False),
nn.Dropout(p=0.1),
nn.ReLU(inplace=True),
nn.Linear(channel * 2, channel, bias=False),
nn.Sigmoid()
)
# self.dct_norm = nn.LayerNorm([512], eps=1e-6)
self.dct_norm = nn.LayerNorm([channel], eps=1e-6) # for lstm on length-wise
# self.dct_norm = nn.LayerNorm([36], eps=1e-6)#for lstm on length-wise on ill with input =36
def forward(self, x):
b, t, c = x.size() # (B,C,L) (32,96,512)
# list = []
# for i in range(c):
# freq = dct.dct(x[:, :, i])
# list.append(freq)
#
# stack_dct = torch.stack(list, dim=2)
change = x.transpose(2, 1)
stack_dct = dct.dct(change,norm='ortho')
stack_dct = stack_dct.transpose(2, 1)
# stack_dct = torch.tensor(stack_dct)
'''
for traffic mission:f_weight = self.dct_norm(f_weight.permute(0,2,1))#matters for traffic datasets
'''
lr_weight = self.dct_norm(stack_dct)
lr_weight = self.fc(stack_dct)
lr_weight = self.dct_norm(lr_weight)
# print("lr_weight",lr_weight.shape)
return x * lr_weight # result
class dct_channel_block1(nn.Module):
def __init__(self, channel):
super(dct_channel_block1, self).__init__()
# self.avg_pool = nn.AdaptiveAvgPool1d(1) #innovation
self.fc = nn.Sequential(
nn.Linear(channel, channel * 2, bias=False),
nn.Dropout(p=0.1),
nn.ReLU(inplace=True),
nn.Linear(channel * 2, channel, bias=False),
nn.Sigmoid()
)
# self.dct_norm = nn.LayerNorm([512], eps=1e-6)
self.dct_norm = nn.LayerNorm([96], eps=1e-6) # for lstm on length-wise
# self.dct_norm = nn.LayerNorm([36], eps=1e-6)#for lstm on length-wise on ill with input =36
def forward(self, x):
b, c, l = x.size() # (B,C,L) (32,96,512)
# y = self.avg_pool(x) # (B,C,L) -> (B,C,1)
# y = self.avg_pool(x).view(b, c) # (B,C,L) -> (B,C,1)
# print("y",y.shape
# y = self.fc(y).view(b, c, 96)
list = []
for i in range(c):
freq = dct.dct(x[:, i, :])
# print("freq-shape:",freq.shape)
list.append(freq)
stack_dct = torch.stack(list, dim=1)
stack_dct = torch.tensor(stack_dct)
'''
for traffic mission:f_weight = self.dct_norm(f_weight.permute(0,2,1))#matters for traffic datasets
'''
lr_weight = self.dct_norm(stack_dct)
lr_weight = self.fc(stack_dct)
lr_weight = self.dct_norm(lr_weight)
# print("lr_weight",lr_weight.shape)
return x * lr_weight # result
if __name__ == '__main__':
# input_data = torch.Tensor([[1, 2, 3], [4, 5, 6]]) # [2, 3]
x = torch.rand((8, 7, 96))
dct_model = dct_channel_block1(7)
result = dct_model.forward(x)
print(result)
# print(x.shape)
# m = nn.Linear(64, 2)
# output = m(x)
# print(output.shape) # [2, 2]

View File

@ -0,0 +1,78 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/23 16:32
@Usage :
@Desc :
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
def mae(y_true, y_predict):
return np.mean(np.mean(np.abs(y_true - y_predict), axis=-1))
def mape(y_true, y_predict):
return np.mean(np.mean(np.abs((y_true - y_predict) / y_true), axis=-1))
def score(y_true, y_predict):
Eri = y_predict - y_true
dw = np.log(0.5)
total = []
if len(y_true.shape) > 1:
b, c = y_true.shape
for i in range(b):
cList = []
for j in range(c):
if Eri[i, j] < 0:
cList.append(np.exp(-(Eri[i, j] / 13)) - 1)
else:
cList.append(np.exp((Eri[i, j] / 10)) - 1)
total.append(np.stack(cList))
total = np.stack(total, axis=0)
return np.mean(np.mean(total, axis=-1))
else:
b, = y_true.shape
for i in range(b):
if Eri[i] <= 0:
total.append(np.exp((-dw) * (Eri[i] / 5)))
else:
total.append(np.exp((dw) * (Eri[i] / 20)))
total = np.stack(total, axis=0)
return np.mean(total)
pass
def rmse(y_true, y_predict):
loss = np.sqrt(np.mean(np.mean((y_predict - y_true) ** 2, axis=-1)))
return loss
def getEvaluate(y_true, y_predict):
return [rmse(y_true, y_predict), mae(y_true, y_predict), mape(y_true, y_predict), score(y_true, y_predict)]
if __name__ == '__main__':
# a = torch.log(torch.tensor(0.5))
# print(a)
# x = torch.rand((32,))
# y = torch.rand((32,))
x = np.random.random(size=(32,))
y = np.random.random(size=(32,))
print(mae(x, y))
print(mape(x, y))
print(rmse(x, y))
print(score(x, y))

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/20 16:46
@Usage :
@Desc :
'''

View File

@ -0,0 +1,57 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 14:46
@Usage :
@Desc :
'''
import torch
import torch.nn as nn
import math
import numpy as np
import torch.nn.functional as F
from torch.autograd import Function
class ReverseLayerF(Function):
@staticmethod
def forward(ctx, x, alpha):
ctx.alpha = alpha
return x.view_as(x)
@staticmethod
def backward(ctx, grad_output):
output = grad_output.neg() * ctx.alpha
return output, None
class Discriminator(nn.Module):
def __init__(self, input_dim=256, hidden_dim=256):
super(Discriminator, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.dis1 = nn.Linear(input_dim, hidden_dim)
self.dis2 = nn.Linear(hidden_dim, 1)
def forward(self, x):
x = F.relu(self.dis1(x))
x = self.dis2(x)
x = torch.sigmoid(x)
return x
def adv(source, target, input_dim=256, hidden_dim=512):
domain_loss = nn.BCELoss()
# !!! Pay attention to .cuda !!!
adv_net = Discriminator(input_dim, hidden_dim)
domain_src = torch.ones(len(source)) # 源域的标签
domain_tar = torch.zeros(len(target)) # 目标域的标签
domain_src, domain_tar = domain_src.view(domain_src.shape[0], 1), domain_tar.view(domain_tar.shape[0], 1)
reverse_src = ReverseLayerF.apply(source, 1)
reverse_tar = ReverseLayerF.apply(target, 1)
pred_src = adv_net(reverse_src)
pred_tar = adv_net(reverse_tar)
loss_s, loss_t = domain_loss(pred_src, domain_src), domain_loss(pred_tar, domain_tar)
loss = loss_s + loss_t
return loss

View File

@ -0,0 +1,27 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 14:46
@Usage :
@Desc :
'''
import torch
def CORAL(source, target):
d = source.size(1)
ns, nt = source.size(0), target.size(0)
# source covariance
tmp_s = torch.ones((1, ns)) @ source
cs = (source.t() @ source - (tmp_s.t() @ tmp_s) / ns) / (ns - 1)
# target covariance
tmp_t = torch.ones((1, nt)) @ target
ct = (target.t() @ target - (tmp_t.t() @ tmp_t) / nt) / (nt - 1)
# frobenius norm
loss = (cs - ct).pow(2).sum()
loss = loss / (4 * d * d)
return loss

View File

@ -0,0 +1,15 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 14:47
@Usage :
@Desc :
'''
import torch.nn as nn
def cosine(source, target):
source, target = source.mean(0), target.mean(0)
cos = nn.CosineSimilarity(dim=0)
loss = cos(source, target)
return loss.mean()

View File

@ -0,0 +1,40 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/20 15:44
@Usage :
@Desc : fft 频域Loss
'''
import torch
import torch.nn as nn
import torch_dct as dct
import torch.fft as fft
def fft_mse(source, target):
if len(source.shape) < 2:
length = 1
else:
_, length = source.shape
source = fft.rfft(source)
target = fft.rfft(target)
source = torch.abs(source / length)
target = torch.abs(target / length)
source, target = source.mean(0), target.mean(0)
mse = nn.MSELoss()
loss = mse(source, target)
return loss.mean()
pass
def dct_mse(source, target):
source = dct.dct(source)
target = dct.dct(target)
source, target = source.mean(0), target.mean(0)
mse = nn.MSELoss()
loss = mse(source, target)
return loss.mean()
pass

View File

@ -0,0 +1,28 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 14:47
@Usage :
@Desc :
'''
import torch.nn as nn
def kl_div(source, target):
if len(source) < len(target):
target = target[:len(source)]
elif len(source) > len(target):
source = source[:len(target)]
criterion = nn.KLDivLoss(reduction='batchmean')
loss = criterion(source.log(), target)
return loss
def js(source, target):
if len(source) < len(target):
target = target[:len(source)]
elif len(source) > len(target):
source = source[:len(target)]
M = .5 * (source + target)
loss_1, loss_2 = kl_div(source, M), kl_div(target, M)
return .5 * (loss_1 + loss_2)

View File

@ -0,0 +1,57 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 14:47
@Usage :
@Desc :
'''
import torch
import torch.nn as nn
class MMD_loss(nn.Module):
def __init__(self, kernel_type='linear', kernel_mul=2.0, kernel_num=5):
super(MMD_loss, self).__init__()
self.kernel_num = kernel_num
self.kernel_mul = kernel_mul
self.fix_sigma = None
self.kernel_type = kernel_type
def guassian_kernel(self, source, target, kernel_mul=2.0, kernel_num=5, fix_sigma=None):
n_samples = int(source.size()[0]) + int(target.size()[0])
total = torch.cat([source, target], dim=0)
total0 = total.unsqueeze(0).expand(
int(total.size(0)), int(total.size(0)), int(total.size(1)))
total1 = total.unsqueeze(1).expand(
int(total.size(0)), int(total.size(0)), int(total.size(1)))
L2_distance = ((total0-total1)**2).sum(2)
if fix_sigma:
bandwidth = fix_sigma
else:
bandwidth = torch.sum(L2_distance.data) / (n_samples**2-n_samples)
bandwidth /= kernel_mul ** (kernel_num // 2)
bandwidth_list = [bandwidth * (kernel_mul**i)
for i in range(kernel_num)]
kernel_val = [torch.exp(-L2_distance / bandwidth_temp)
for bandwidth_temp in bandwidth_list]
return sum(kernel_val)
def linear_mmd(self, X, Y):
delta = X.mean(axis=0) - Y.mean(axis=0)
loss = delta.dot(delta.T)
return loss
def forward(self, source, target):
if self.kernel_type == 'linear':
return self.linear_mmd(source, target)
elif self.kernel_type == 'rbf':
batch_size = int(source.size()[0])
kernels = self.guassian_kernel(
source, target, kernel_mul=self.kernel_mul, kernel_num=self.kernel_num, fix_sigma=self.fix_sigma)
with torch.no_grad():
XX = torch.mean(kernels[:batch_size, :batch_size])
YY = torch.mean(kernels[batch_size:, batch_size:])
XY = torch.mean(kernels[:batch_size, batch_size:])
YX = torch.mean(kernels[batch_size:, :batch_size])
loss = torch.mean(XX + YY - XY - YX)
return loss

View File

@ -0,0 +1,38 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 14:47
@Usage :
@Desc :
'''
import torch
import torch.nn as nn
import torch.nn.functional as F
class Mine_estimator(nn.Module):
def __init__(self, input_dim=2048, hidden_dim=512):
super(Mine_estimator, self).__init__()
self.mine_model = Mine(input_dim, hidden_dim)
def forward(self, X, Y):
Y_shffle = Y[torch.randperm(len(Y))]
loss_joint = self.mine_model(X, Y)
loss_marginal = self.mine_model(X, Y_shffle)
ret = torch.mean(loss_joint) - \
torch.log(torch.mean(torch.exp(loss_marginal)))
loss = -ret
return loss
class Mine(nn.Module):
def __init__(self, input_dim=2048, hidden_dim=512):
super(Mine, self).__init__()
self.fc1_x = nn.Linear(input_dim, hidden_dim)
self.fc1_y = nn.Linear(input_dim, hidden_dim)
self.fc2 = nn.Linear(hidden_dim, 1)
def forward(self, x, y):
h1 = F.leaky_relu(self.fc1_x(x)+self.fc1_y(y))
h2 = self.fc2(h1)
return h2

View File

@ -0,0 +1,54 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 14:48
@Usage :
@Desc :
'''
import torch
import numpy as np
def pairwise_dist(X, Y):
n, d = X.shape
m, _ = Y.shape
assert d == Y.shape[1]
a = X.unsqueeze(1).expand(n, m, d)
b = Y.unsqueeze(0).expand(n, m, d)
return torch.pow(a - b, 2).sum(2)
def pairwise_dist_np(X, Y):
n, d = X.shape
m, _ = Y.shape
assert d == Y.shape[1]
a = np.expand_dims(X, 1)
b = np.expand_dims(Y, 0)
a = np.tile(a, (1, m, 1))
b = np.tile(b, (n, 1, 1))
return np.power(a - b, 2).sum(2)
def pa(X, Y):
XY = np.dot(X, Y.T)
XX = np.sum(np.square(X), axis=1)
XX = np.transpose([XX])
YY = np.sum(np.square(Y), axis=1)
dist = XX + YY - 2 * XY
return dist
if __name__ == '__main__':
import sys
args = sys.argv
data = args[0]
print(data)
# a = torch.arange(1, 7).view(2, 3)
# b = torch.arange(12, 21).view(3, 3)
# print(pairwise_dist(a, b))
# a = np.arange(1, 7).reshape((2, 3))
# b = np.arange(12, 21).reshape((3, 3))
# print(pa(a, b))

View File

@ -0,0 +1,113 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/20 19:30
@Usage :
@Desc :
'''
import matplotlib.pyplot as plt
import time
def plot_prediction(total_data, predicted_data_easy, predicted_data_hard, save_fig_name, predict_num=50):
font1 = {'family': 'Times New Roman', 'weight': 'normal', 'size': 15} # 设置坐标标签的字体大小,字体
font2 = {'family': 'Times New Roman', 'weight': 'normal', 'size': 15} # 设置坐标标签的字体大小,字体
timestamp = str(int(time.time()))[-4:] # 画图的时间戳取后四位
plt.figure(1)
'''保存的模型参数的路径'''
from matplotlib import rcParams
config = {
"font.family": 'Times New Roman', # 设置字体类型
"axes.unicode_minus": False, # 解决负号无法显示的问题
"axes.labelsize": 13
}
rcParams.update(config)
# 简单预测图
length = len(predicted_data_easy)
plt.scatter(list(range(length)), total_data, c='blue', s=12, label='Actual value')
plt.plot(list(range(length - predict_num)),
predicted_data_easy[:length - predict_num], linewidth=2, color='red',
label='Traning value')
plt.scatter(list(range(length - predict_num, length)), predicted_data_easy[length - predict_num:length], c='black',
s=15, label='Predictive value')
plt.axhline(total_data[-1], linewidth=2, c='green', label='Failure threshold')
# plt.title()
plt.xlabel('Serial number of the fusion feature point', font=font1)
plt.ylabel('Virtual health indicator', font=font1)
plt.legend(loc='upper left', prop=font2)
plt.savefig(save_fig_name + 'easy{0}.png'.format(timestamp))
plt.show()
# 困难预测图
plt.figure(2)
'''保存的模型参数的路径'''
config = {
"font.family": 'Times New Roman', # 设置字体类型
"axes.unicode_minus": False, # 解决负号无法显示的问题
"axes.labelsize": 13
}
rcParams.update(config)
# 简单预测图
length = len(predicted_data_easy)
plt.scatter(list(range(length)), total_data, c='blue', s=12, label='Actual value')
plt.plot(list(range(length - predict_num)),
predicted_data_hard[:length - predict_num], linewidth=2, color='red',
label='Traning value')
plt.scatter(list(range(length - predict_num, length)), predicted_data_hard[length - predict_num:length], c='black',
s=15, label='Predictive value')
# plt.title()
plt.xlabel('Serial number of the fusion feature point', font=font1)
plt.ylabel('Virtual health indicator', font=font1)
plt.axhline(total_data[-1], linewidth=2, c='green', label='Failure threshold')
plt.legend(loc='upper left', prop=font2)
plt.savefig(save_fig_name + 'hard{0}.png'.format(timestamp))
plt.show()
def plot_forSelf(total_data, predicted_data_easy, predicted_data_hard):
pic1 = plt.figure(figsize=(8, 6), dpi=200)
'''保存的模型参数的路径'''
from matplotlib import rcParams
config = {
"font.family": 'Times New Roman', # 设置字体类型
"axes.unicode_minus": False, # 解决负号无法显示的问题
"axes.labelsize": 13
}
rcParams.update(config)
# 简单预测图
plt.subplot(2, 1, 1)
plt.plot(total_data)
plt.plot(predicted_data_easy)
plt.title('Easy Prediction')
plt.xlabel('time')
plt.ylabel('loss')
# plt.legend(loc='upper right')
# 困难预测图
plt.subplot(2, 1, 2)
plt.plot(total_data)
plt.plot(predicted_data_hard)
plt.title('Easy Prediction')
plt.xlabel('time')
plt.ylabel('loss')
# plt.legend(loc='upper right')
# plt.scatter()
plt.show()

View File

@ -0,0 +1,10 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/24 14:15
@Usage :
@Desc : 一些测试方法
'''

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/24 15:51
@Usage :
@Desc :
'''

View File

@ -0,0 +1,229 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 16:33
@Usage :
@Desc :
'''
import collections
import torch
import os
import pandas as pd
import torch.nn as nn
from tqdm import tqdm
import numpy as np
EPS = 1e-12
class AverageMeter(object):
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
self.list = []
def update(self, val, n=1):
self.val = val
self.list.append(val)
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def average_params(params_list):
assert isinstance(params_list, (tuple, list, collections.deque))
n = len(params_list)
if n == 1:
return params_list[0]
new_params = collections.OrderedDict()
keys = None
for i, params in enumerate(params_list):
if keys is None:
keys = params.keys()
for k, v in params.items():
if k not in keys:
raise ValueError('the %d-th model has different params' % i)
if k not in new_params:
new_params[k] = v / n
else:
new_params[k] += v / n
return new_params
def zscore(x):
return (x - x.mean(dim=0, keepdim=True)) / x.std(dim=0, keepdim=True, unbiased=False)
def calc_loss(pred, label):
return torch.mean((zscore(pred) - label) ** 2)
def calc_corr(pred, label):
return (zscore(pred) * zscore(label)).mean()
def test_ic(model_list, data_list, device, verbose=True, ic_type='spearman'):
'''
model_list: [model1, model2, ...]
datalist: [loader1, loader2, ...]
return: unified ic, specific ic (all values), loss
'''
spec_ic = []
loss_test = AverageMeter()
loss_fn = torch.nn.MSELoss()
label_true, label_pred = torch.empty(0).to(device), torch.empty(0).to(device)
for i in range(len(model_list)):
label_spec_true, label_spec_pred = torch.empty(0).to(device), torch.empty(0).to(device)
model_list[i].eval()
with torch.no_grad():
for _, (feature, label_actual, _, _) in enumerate(data_list[i]):
# feature = torch.tensor(feature, dtype=torch.float32, device=device)
label_actual = label_actual.clone().detach().view(-1, 1)
label_actual, mask = handle_nan(label_actual)
label_predict = model_list[i].predict(feature).view(-1, 1)
label_predict = label_predict[mask]
loss = loss_fn(label_actual, label_predict)
loss_test.update(loss.item())
# Concat them for computing IC later
label_true = torch.cat([label_true, label_actual])
label_pred = torch.cat([label_pred, label_predict])
label_spec_true = torch.cat([label_spec_true, label_actual])
label_spec_pred = torch.cat([label_spec_pred, label_predict])
ic = calc_ic(label_spec_true, label_spec_pred, ic_type)
spec_ic.append(ic.item())
unify_ic = calc_ic(label_true, label_pred, ic_type).item()
# spec_ic.append(sum(spec_ic) / len(spec_ic))
loss = loss_test.avg
if verbose:
print('[IC] Unified IC: {:.6f}, specific IC: {}, loss: {:.6f}'.format(unify_ic, spec_ic, loss))
return unify_ic, spec_ic, loss
def test_ic_daily(model_list, data_list, device, verbose=True, ic_type='spearman'):
'''
model_list: [model1, model2, ...]
datalist: [loader1, loader2, ...]
return: unified ic, specific ic (all values + avg), loss
'''
spec_ic = []
loss_test = AverageMeter()
loss_fn = torch.nn.MSELoss()
label_true, label_pred = torch.empty(0).to(device), torch.empty(0).to(device)
for i in range(len(model_list)):
label_spec_true, label_spec_pred = torch.empty(0).to(device), torch.empty(0).to(device)
model_list[i].eval()
with torch.no_grad():
for slc in tqdm(data_list[i].iter_daily(), total=data_list[i].daily_length):
feature, label_actual, _, _ = data_list[i].get(slc)
# for _, (feature, label_actual, _, _) in enumerate(data_list[i]):
# feature = torch.tensor(feature, dtype=torch.float32, device=device)
label_actual = torch.tensor(label_actual, dtype=torch.float32, device=device).view(-1, 1)
label_actual, mask = handle_nan(label_actual)
label_predict = model_list[i].predict(feature).view(-1, 1)
label_predict = label_predict[mask]
loss = loss_fn(label_actual, label_predict)
loss_test.update(loss.item())
# Concat them for computing IC later
label_true = torch.cat([label_true, label_actual])
label_pred = torch.cat([label_pred, label_predict])
label_spec_true = torch.cat([label_spec_true, label_actual])
label_spec_pred = torch.cat([label_spec_pred, label_predict])
ic = calc_ic(label_spec_true, label_spec_pred, ic_type)
spec_ic.append(ic.item())
unify_ic = calc_ic(label_true, label_pred, ic_type).item()
# spec_ic.append(sum(spec_ic) / len(spec_ic))
loss = loss_test.avg
if verbose:
print('[IC] Unified IC: {:.6f}, specific IC: {}, loss: {:.6f}'.format(unify_ic, spec_ic, loss))
return unify_ic, spec_ic, loss
def test_ic_uni(model, data_loader, model_path=None, ic_type='spearman', verbose=False):
if model_path:
model.load_state_dict(torch.load(model_path))
model.eval()
loss_all = []
ic_all = []
for slc in tqdm(data_loader.iter_daily(), total=data_loader.daily_length):
data, label, _, _ = data_loader.get(slc)
with torch.no_grad():
pred = model.predict(data)
mask = ~torch.isnan(label)
pred = pred[mask]
label = label[mask]
loss = torch.mean(torch.log(torch.cosh(pred - label)))
if ic_type == 'spearman':
ic = spearman_corr(pred, label)
elif ic_type == 'pearson':
ic = pearson_corr(pred, label)
loss_all.append(loss.item())
ic_all.append(ic)
loss, ic = np.mean(loss_all), np.mean(ic_all)
if verbose:
print('IC: ', ic)
return loss, ic
def calc_ic(x, y, ic_type='pearson'):
ic = -100
if ic_type == 'pearson':
ic = pearson_corr(x, y)
elif ic_type == 'spearman':
ic = spearman_corr(x, y)
return ic
def create_dir(path):
if not os.path.exists(path):
os.makedirs(path)
def delete_file(path):
if os.path.exists(path):
os.remove(path)
def handle_nan(x):
mask = ~torch.isnan(x)
return x[mask], mask
class Log_Loss(nn.Module):
def __init__(self):
super(Log_Loss, self).__init__()
def forward(self, ytrue, ypred):
delta = ypred - ytrue
return torch.mean(torch.log(torch.cosh(delta)))
def spearman_corr(x, y):
X = pd.Series(x.cpu())
Y = pd.Series(y.cpu())
spearman = X.corr(Y, method='spearman')
return spearman
def spearman_corr2(x, y):
X = pd.Series(x)
Y = pd.Series(y)
spearman = X.corr(Y, method='spearman')
return spearman
def pearson_corr(x, y):
X = pd.Series(x.cpu())
Y = pd.Series(y.cpu())
spearman = X.corr(Y, method='pearson')
return spearman
def dir_exist(dirs):
if not os.path.exists(dirs):
os.makedirs(dirs)

Binary file not shown.

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/10 10:27
@Usage :
@Desc :
'''

View File

@ -0,0 +1,139 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/10 15:21
@Usage :
@Desc : 获取数据集
'''
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
'''正常Dataset类'''
class Nor_Dataset(Dataset):
def __init__(self, datas, labels=None):
self.datas = torch.tensor(datas)
if labels is not None:
self.labels = torch.tensor(labels)
else:
self.labels = None
def __getitem__(self, index):
data = self.datas[index]
if self.labels is not None:
label = self.labels[index]
return data, label
return data
def __len__(self):
return len(self.datas)
def standardization(data):
mu = np.mean(data, axis=0)
sigma = np.std(data, axis=0)
return (data - mu) / sigma
def normalization(data):
_range = np.max(data) - np.min(data)
return (data - np.min(data)) / _range
# LSTM_cell的数目,维度,是否正则化
def getData(filter_num, dims, if_norm: bool = False):
# 数据读入
HI_merge_data_origin = np.load("../../dataset/HI_merge_data.npy")
# plt.plot(HI_merge_data[0:1250, 1])
# 去除掉退化特征不明显前面的点
HI_merge_data = HI_merge_data_origin[0:1250, 1]
# HI_merge_data = np.loadtxt("E:\self_example\pytorch_example\RUL\dataset\smallVHI.csv", delimiter=",")
# 是否正则化
if if_norm:
HI_merge_data = normalization(HI_merge_data)
# plt.plot(HI_merge_data)
# plt.show()
(total_dims,) = HI_merge_data.shape
# # 将其分成重叠采样状态-滑动窗口函数
predict_data = np.empty(shape=[total_dims - filter_num, filter_num])
# 重叠采样获取时间部和训练次数
for dim in range(total_dims - filter_num):
predict_data[dim] = HI_merge_data[dim:dim + filter_num]
train_label = predict_data[dims:, :]
train_label_single = HI_merge_data[dims + filter_num - 1:-1]
# 再重叠采样获取一个点的维度
'''train_data.shape:(sample,filter_num) -> (sample,filter_num,dims)'''
# # 将其分成重叠采样状态-滑动窗口函数
train_data = np.empty(shape=[dims, total_dims - filter_num - dims, filter_num])
for dim in range(dims):
train_data[dim] = predict_data[dim:total_dims - filter_num - dims + dim, :]
# 转置变成想要的数据 (dims,sample,filter_num) -> (sample,filter_num,dims)
train_data = np.transpose(train_data, [1, 2, 0])
# todo 解决模型保存时,query无法序列化的问题
total_data = HI_merge_data
print("total_data.shape:", total_data.shape)
print("train_data.shape:", train_data.shape) # (20, 1200, 30)
print("train_label.shape:", train_label.shape) # (20, 1200)
print("train_label_single.shape:", train_label_single.shape)
# 所有的原始数据;所有的训练数据;所有的训练标签(预测一个序列);所有的训练标签(预测一个点)
return total_data, train_data, train_label, train_label_single
def splitValData(data, label, label_single, predict_num=50):
sample, hidden, feature = data.shape
train_data = data[:sample - predict_num, :, :]
val_data = data[sample - predict_num:, :, :]
train_label = label[:sample - predict_num, :]
val_label = label[sample - predict_num:, :]
train_label_single = label_single[:sample - predict_num, ]
val_label_single = label_single[sample - predict_num:, ]
return train_data, val_data, train_label, val_label, train_label_single, val_label_single
def getTotalData(hidden_num, feature, is_single=True, is_norm=False):
total_data, train_data, train_label, train_label_single = getData(hidden_num, feature, is_norm)
if is_single:
total_dataset = Nor_Dataset(train_data, train_label_single)
else:
total_dataset = Nor_Dataset(train_data, train_label)
return total_data, total_dataset
# lstm细胞数channel数预测多少个点是否正则化
def getDataset(hidden_num, feature, predict_num, is_single=True, is_norm=False):
total_data, train_data, train_label, train_label_single = getData(hidden_num, feature, is_norm)
# 根据预测的点数划分训练集和测试集(验证集)
train_data, val_data, train_label, val_label, train_label_single, val_label_single = splitValData(train_data,
train_label,
train_label_single,
predict_num=predict_num)
if is_single:
train_dataset = Nor_Dataset(train_data, train_label_single)
val_dataset = Nor_Dataset(val_data, val_label_single)
else:
train_dataset = Nor_Dataset(train_data, train_label)
val_dataset = Nor_Dataset(val_data, val_label)
return train_dataset, val_dataset

View File

@ -0,0 +1,234 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/10 10:46
@Usage :
@Desc : convLSTM 2D基本实现
'''
import torch.nn as nn
import torch
from torch.autograd import Variable
class LSTMCell(nn.Module):
def __init__(self, input_dim, hidden_dim, bias):
"""
Initialize ConvLSTM cell.
Parameters
----------
input_dim: int
Number of channels of input tensor.
hidden_dim: int
Number of channels of hidden state.
kernel_size: int
Size of the convolutional kernel.
bias: bool
Whether or not to add the bias.
Input:
A tensor of size B, T, C
B: bacth_size
T: timestamp
C: channel
"""
super(LSTMCell, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.bias = bias
self.hidden = nn.Linear(in_features=self.input_dim + self.hidden_dim,
out_features=4 * self.hidden_dim,
bias=self.bias)
def forward(self, input_tensor, cur_state):
# shape :b,c
h_cur, c_cur = cur_state
combined = torch.cat([input_tensor, h_cur], dim=-1) # concatenate along channel axis
combined_linear = self.hidden(combined)
cc_i, cc_f, cc_o, cc_g = torch.split(combined_linear, self.hidden_dim, dim=1)
i = torch.sigmoid(cc_i)
f = torch.sigmoid(cc_f)
o = torch.sigmoid(cc_o)
g = torch.tanh(cc_g)
c_next = f * c_cur + i * g
h_next = o * torch.tanh(c_next)
return h_next, c_next
def init_hidden(self, batch_size):
return (torch.zeros(batch_size, self.hidden_dim, device=self.hidden.weight.device),
torch.zeros(batch_size, self.hidden_dim, device=self.hidden.weight.device))
class LSTM(nn.Module):
"""
Parameters:
input_dim: Number of channels in input
hidden_dim: Number of hidden channels
kernel_size: Size of kernel in convolutions
num_layers: Number of LSTM layers stacked on each other
batch_first: Whether or not dimension 0 is the batch or not
bias: Bias or no bias in Convolution
return_all_layers: Return the list of computations for all layers
Note: Will do same padding.
Input:
A tensor of size B, T, C or T, B, C
Output:
A tuple of two lists of length num_layers (or length 1 if return_all_layers is False).
0 - layer_output_list is the list of lists of length T of each output
1 - last_state_list is the list of last states
each element of the list is a tuple (h, c) for hidden state and memory
Example:
>> x = torch.rand((32, 10, 64))
>> convlstm = ConvLSTM(64, 16, 3, 1, True, True, False)
>> _, last_states = convlstm(x)
>> h = last_states[0][0] # 0 for layer index, 0 for h index
"""
def __init__(self, input_dim, hidden_dim, num_layers,
batch_first=False, bias=True, return_all_layers=False):
super(LSTM, self).__init__()
# Make sure that both `kernel_size` and `hidden_dim` are lists having len == num_layers
hidden_dim = self._extend_for_multilayer(hidden_dim, num_layers)
if not len(hidden_dim) == num_layers:
raise ValueError('Inconsistent list length.')
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.batch_first = batch_first
self.bias = bias
self.return_all_layers = return_all_layers
cell_list = []
for i in range(0, self.num_layers):
cur_input_dim = self.input_dim if i == 0 else self.hidden_dim[i - 1]
cell_list.append(LSTMCell(input_dim=cur_input_dim,
hidden_dim=self.hidden_dim[i],
bias=self.bias))
self.cell_list = nn.ModuleList(cell_list)
def forward(self, input_tensor, hidden_state=None):
"""
Parameters
----------
input_tensor: todo
5-D Tensor either of shape (t, b, c, s) or (b, t, c, s)
hidden_state: todo
None. todo implement stateful
Returns
-------
last_state_list, layer_output
"""
if not self.batch_first:
# 等同于transpose
# (t, b, c, h, w) -> (b, t, c, h, w)
input_tensor = input_tensor.permute(1, 0, 2)
b, _, _ = input_tensor.size()
# Implement stateful ConvLSTM
if hidden_state is not None:
raise NotImplementedError()
else:
# Since the init is done in forward. Can send image size here
hidden_state = self._init_hidden(batch_size=b)
layer_output_list = []
last_state_list = []
timestamp = input_tensor.size(1)
cur_layer_input = input_tensor
for layer_idx in range(self.num_layers):
h, c = hidden_state[layer_idx]
output_inner = []
for t in range(timestamp):
h, c = self.cell_list[layer_idx](input_tensor=cur_layer_input[:, t, :],
cur_state=[h, c])
output_inner.append(h)
layer_output = torch.stack(output_inner, dim=1)
cur_layer_input = layer_output
layer_output_list.append(layer_output)
last_state_list.append([h, c])
if not self.return_all_layers:
layer_output_list = layer_output_list[-1:]
last_state_list = last_state_list[-1:]
return layer_output_list, last_state_list
def _init_hidden(self, batch_size):
init_states = []
for i in range(self.num_layers):
init_states.append(self.cell_list[i].init_hidden(batch_size))
return init_states
@staticmethod
def _extend_for_multilayer(param, num_layers):
if not isinstance(param, list):
param = [param] * num_layers
return param
class PredictModel(nn.Module):
def __init__(self, input_dim):
super(PredictModel, self).__init__()
self.lstm = LSTM(input_dim=input_dim, hidden_dim=[512, 256], num_layers=2, batch_first=True, bias=True,
return_all_layers=False)
self.backbone = nn.Sequential(
nn.Linear(in_features=256, out_features=128),
nn.ReLU(),
nn.Linear(in_features=128, out_features=64),
nn.ReLU(),
nn.Dropout(0.2),
nn.BatchNorm1d(64),
nn.Linear(in_features=64, out_features=32),
nn.ReLU(),
nn.Dropout(0.2),
nn.BatchNorm1d(32),
nn.ReLU(),
nn.Linear(in_features=32, out_features=16),
nn.Linear(in_features=16, out_features=1)
)
def forward(self, input_tensor):
input_tensor = input_tensor.to(torch.float32)
layer_output_list, last_states = self.lstm(input_tensor)
last_timestamp = last_states[0][0]
predict = self.backbone(last_timestamp)
return predict
if __name__ == '__main__':
x = torch.rand((32, 10, 64))
lstm = LSTM(input_dim=64, hidden_dim=16, num_layers=1, batch_first=True, bias=True,
return_all_layers=False)
layer_output_list, last_states = lstm(x)
all = layer_output_list[0]
h = last_states[0][0]
print(all.size())
print(h.size())

View File

@ -0,0 +1,227 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/10 10:46
@Usage :
@Desc : convLSTM 2D基本实现
'''
import torch.nn as nn
import torch
from torch.autograd import Variable
class LSTMCell(nn.Module):
def __init__(self, input_dim, hidden_dim, bias):
"""
Initialize ConvLSTM cell.
Parameters
----------
input_dim: int
Number of channels of input tensor.
hidden_dim: int
Number of channels of hidden state.
kernel_size: int
Size of the convolutional kernel.
bias: bool
Whether or not to add the bias.
Input:
A tensor of size B, T, C
B: bacth_size
T: timestamp
C: channel
"""
super(LSTMCell, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.bias = bias
self.hidden = nn.Linear(in_features=self.input_dim + self.hidden_dim,
out_features=4 * self.hidden_dim,
bias=self.bias)
def forward(self, input_tensor, cur_state):
# shape :b,c
h_cur, c_cur = cur_state
combined = torch.cat([input_tensor, h_cur], dim=-1) # concatenate along channel axis
combined_linear = self.hidden(combined)
cc_i, cc_f, cc_o, cc_g = torch.split(combined_linear, self.hidden_dim, dim=1)
i = torch.sigmoid(cc_i)
f = torch.sigmoid(cc_f)
o = torch.sigmoid(cc_o)
g = torch.tanh(cc_g)
c_next = f * c_cur + i * g
h_next = o * torch.tanh(c_next)
return h_next, c_next
def init_hidden(self, batch_size):
return (torch.zeros(batch_size, self.hidden_dim, device=self.hidden.weight.device),
torch.zeros(batch_size, self.hidden_dim, device=self.hidden.weight.device))
class LSTM(nn.Module):
"""
Parameters:
input_dim: Number of channels in input
hidden_dim: Number of hidden channels
kernel_size: Size of kernel in convolutions
num_layers: Number of LSTM layers stacked on each other
batch_first: Whether or not dimension 0 is the batch or not
bias: Bias or no bias in Convolution
return_all_layers: Return the list of computations for all layers
Note: Will do same padding.
Input:
A tensor of size B, T, C or T, B, C
Output:
A tuple of two lists of length num_layers (or length 1 if return_all_layers is False).
0 - layer_output_list is the list of lists of length T of each output
1 - last_state_list is the list of last states
each element of the list is a tuple (h, c) for hidden state and memory
Example:
>> x = torch.rand((32, 10, 64))
>> convlstm = ConvLSTM(64, 16, 3, 1, True, True, False)
>> _, last_states = convlstm(x)
>> h = last_states[0][0] # 0 for layer index, 0 for h index
"""
def __init__(self, input_dim, hidden_dim, num_layers,
batch_first=False, bias=True, return_all_layers=False):
super(LSTM, self).__init__()
# Make sure that both `kernel_size` and `hidden_dim` are lists having len == num_layers
hidden_dim = self._extend_for_multilayer(hidden_dim, num_layers)
if not len(hidden_dim) == num_layers:
raise ValueError('Inconsistent list length.')
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.batch_first = batch_first
self.bias = bias
self.return_all_layers = return_all_layers
cell_list = []
for i in range(0, self.num_layers):
cur_input_dim = self.input_dim if i == 0 else self.hidden_dim[i - 1]
cell_list.append(LSTMCell(input_dim=cur_input_dim,
hidden_dim=self.hidden_dim[i],
bias=self.bias))
self.cell_list = nn.ModuleList(cell_list)
def forward(self, input_tensor, hidden_state=None):
"""
Parameters
----------
input_tensor: todo
5-D Tensor either of shape (t, b, c, s) or (b, t, c, s)
hidden_state: todo
None. todo implement stateful
Returns
-------
last_state_list, layer_output
"""
if not self.batch_first:
# 等同于transpose
# (t, b, c, h, w) -> (b, t, c, h, w)
input_tensor = input_tensor.permute(1, 0, 2)
b, _, _ = input_tensor.size()
# Implement stateful ConvLSTM
if hidden_state is not None:
raise NotImplementedError()
else:
# Since the init is done in forward. Can send image size here
hidden_state = self._init_hidden(batch_size=b)
layer_output_list = []
last_state_list = []
timestamp = input_tensor.size(1)
cur_layer_input = input_tensor
for layer_idx in range(self.num_layers):
h, c = hidden_state[layer_idx]
output_inner = []
for t in range(timestamp):
h, c = self.cell_list[layer_idx](input_tensor=cur_layer_input[:, t, :],
cur_state=[h, c])
output_inner.append(h)
layer_output = torch.stack(output_inner, dim=1)
cur_layer_input = layer_output
layer_output_list.append(layer_output)
last_state_list.append([h, c])
if not self.return_all_layers:
layer_output_list = layer_output_list[-1:]
last_state_list = last_state_list[-1:]
return layer_output_list, last_state_list
def _init_hidden(self, batch_size):
init_states = []
for i in range(self.num_layers):
init_states.append(self.cell_list[i].init_hidden(batch_size))
return init_states
@staticmethod
def _extend_for_multilayer(param, num_layers):
if not isinstance(param, list):
param = [param] * num_layers
return param
class PredictModel(nn.Module):
def __init__(self, input_dim):
super(PredictModel, self).__init__()
self.lstm = LSTM(input_dim=input_dim, hidden_dim=[64, 64], num_layers=2, batch_first=True, bias=True,
return_all_layers=False)
self.backbone = nn.Sequential(
nn.Linear(in_features=64, out_features=64),
nn.Linear(in_features=64, out_features=64),
nn.BatchNorm1d(64),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(in_features=64, out_features=1)
)
def forward(self, input_tensor):
input_tensor = input_tensor.to(torch.float32)
layer_output_list, last_states = self.lstm(input_tensor)
last_timestamp = last_states[0][0]
predict = self.backbone(last_timestamp)
return predict
if __name__ == '__main__':
x = torch.rand((32, 10, 64))
lstm = LSTM(input_dim=64, hidden_dim=16, num_layers=1, batch_first=True, bias=True,
return_all_layers=False)
layer_output_list, last_states = lstm(x)
all = layer_output_list[0]
h = last_states[0][0]
print(all.size())
print(h.size())

View File

@ -0,0 +1,118 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/10 16:27
@Usage :
@Desc :
'''
import numpy as np
import torch
from RUL.otherIdea.LSTM.loadData import getDataset, getTotalData
from RUL.otherIdea.LSTM.modelForEasy import PredictModel
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from RUL.baseModel.plot import plot_prediction, plot_forSelf
from RUL.baseModel.loss.Evaluate import getEvaluate
# 仅使用预测出来的最新的一个点预测以后
def predictOneByOne(model, train_data, predict_num=50):
# 取出训练数据的最后一条
each_predict_data = train_data
predicted_list = np.empty(shape=(predict_num, 1)) # (5,filter_num,30)
# all_data = total_data # (1201,)
for each_predict in range(predict_num):
# predicted_data.shape : (1,1)
predicted_data = model(each_predict_data).cpu().detach().numpy()[-1] # (batch_size,filer_num,1)
predicted_list[each_predict] = predicted_data
each_predict_data = each_predict_data.numpy()
# (1,1) => (10,1)
# 中间拼接过程: (1) => (10) => (40,10) => (30,40,10)
a = np.concatenate([each_predict_data[-1, -1, 1:], predicted_data], axis=0)
b = np.concatenate([each_predict_data[-1, 1:, :], np.expand_dims(a, axis=0)], axis=0)
c = np.concatenate([each_predict_data[1:, :, :], np.expand_dims(b, axis=0)], axis=0)
each_predict_data = torch.tensor(c)
return np.squeeze(predicted_list)
def test(hidden_num, feature, predict_num, batch_size, save_path, save_fig_name, is_single=True, is_norm=False):
total_data, total_dataset = getTotalData(hidden_num, feature, is_single=is_single, is_norm=is_norm)
train_dataset, val_dataset = getDataset(hidden_num, feature, predict_num=predict_num, is_single=is_single,
is_norm=is_norm)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
# 加载网络
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = PredictModel(input_dim=feature).to(device)
model.load_state_dict(
torch.load(save_path, map_location=device)
)
print(model)
params_num = sum(param.numel() for param in model.parameters())
print('参数数量:{}'.format(params_num))
model.eval()
predicted_data_easy = total_data[:hidden_num + feature, ]
predicted_data_hard = total_data[:hidden_num + feature, ]
# 衡量矩阵
train_list = []
easy_list = []
val_label = []
with torch.no_grad():
for batch_idx, (data, label) in enumerate(train_loader):
data, label = data.float().to(device), label.float().to(device)
last_train_data = data
each_predicted_data = torch.squeeze(model(data)).cpu().detach().numpy()
predicted_data_easy = np.concatenate(
[predicted_data_easy, each_predicted_data],
axis=0)
predicted_data_hard = np.concatenate(
[predicted_data_hard, each_predicted_data],
axis=0)
train_list.append(getEvaluate(label.cpu().detach().numpy(), each_predicted_data))
# 简单版的,每次预测重新用已知的
for batch_idx, (data, label) in enumerate(val_loader):
data, label = data.to(device), label.to(device)
each_predicted_data = torch.squeeze(model(data)).cpu().detach().numpy()
predicted_data_easy = np.concatenate(
[predicted_data_easy, each_predicted_data],
axis=0)
easy_list.append(getEvaluate(label.cpu().detach().numpy(), each_predicted_data))
val_label = np.concatenate(
[val_label, label.cpu().detach().numpy()],
axis=0)
# 困难版的,每次预测基于上次的预测
predict_hard = predictOneByOne(model, last_train_data, predict_num=predict_num)
predicted_data_hard = np.concatenate([predicted_data_hard,
predict_hard], axis=0)
####衡量
train_evaluate = np.mean(train_list, axis=0)
easy_evaluate = np.mean(easy_list, axis=0)
hard_evaluate = getEvaluate(val_label, predict_hard)
print('train: RMSE %.6f, MAE %.6f, MAPE %.6f, Score %.6f' %
(train_evaluate[0], train_evaluate[1], train_evaluate[2], train_evaluate[3]))
print('easy: RMSE %.6f, MAE %.6f, MAPE %.6f, Score %.6f' %
(easy_evaluate[0], easy_evaluate[1], easy_evaluate[2], easy_evaluate[3]))
print('hard: RMSE %.6f, MAE %.6f, MAPE %.6f, Score %.6f' %
(hard_evaluate[0], hard_evaluate[1], hard_evaluate[2], hard_evaluate[3]))
plot_prediction(total_data, predicted_data_easy, predicted_data_hard, save_fig_name, predict_num=predict_num)
plot_forSelf(total_data, predicted_data_easy, predicted_data_hard)
if __name__ == '__main__':
test(40, 10, 50, 32,
"E:\self_example\pytorch_example\RUL\otherIdea\LSTM\parameters\LSTM_hidden40_feature10_predict50_epoch66_trainLoss0.05624270847895079_valLoss0.4181802272796631.pkl"
)

View File

@ -0,0 +1,177 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/10 14:56
@Usage :
@Desc : 训练LSTM
'''
import os
import time
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from RUL.otherIdea.LSTM.modelForEasy import PredictModel
from RUL.otherIdea.LSTM.loadData import getDataset
from RUL.otherIdea.LSTM.test import test
from RUL.baseModel.CommonFunction import IsStopTraining
from scipy.spatial.distance import cdist
import math
import RUL.baseModel.utils.utils as utils
'''
超参数设置:
'''
hidden_num = 10 # LSTM细胞个数
feature = 2 # 一个点的维度
batch_size = 32
EPOCH = 1000
seed = 5
predict_num = 200 # 预测个数
is_norm = False
is_single = True
model_name = "LSTM"
base_save = r"parameters/{0}_hidden{1}_feature{2}_predict{3}".format(model_name, hidden_num, feature,
predict_num)
save_fig_name = 'fig/seed{0}_hidden{1}_feature{2}_predict{3}'.format(seed, hidden_num, feature, predict_num)
if not os.path.exists("parameters"):
os.makedirs("parameters")
if not os.path.exists("fig"):
os.makedirs("fig")
def get_dataset():
'''得到数据集'''
train_dataset, val_dataset = getDataset(
hidden_num=hidden_num, feature=feature, predict_num=predict_num, is_single=is_single, is_norm=is_norm)
'''DataLoader'''
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False, drop_last=False)
return train_loader, val_loader
def train(device, lr, lr_patience, early_stop_patience, epochs):
'''预测模型'''
global best_save_path
model = PredictModel(input_dim=feature)
'''得到数据集'''
train_loader, val_loader = get_dataset()
criterion = nn.MSELoss().to(device)
optimizer_model = torch.optim.SGD(model.parameters(), lr=lr)
scheduler_model = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer_model, mode="min", factor=0.5,
patience=lr_patience)
def zero_grad_all():
optimizer_model.zero_grad()
train_loss_list = []
val_loss_list = []
best_save_path = None
for epoch in range(epochs):
epoch_start_time = time.time()
train_loss = 0.0
val_loss = 0.0
model.train()
for (train_batch_idx, (train_data, train_label)) in enumerate(train_loader):
train_data, train_label = train_data.float().to(device), train_label.float().to(device)
zero_grad_all()
predict_data = torch.squeeze(model(train_data))
# MSE损失
loss = criterion(predict_data, train_label)
loss.backward()
optimizer_model.step()
zero_grad_all()
train_loss += loss.item()
model.eval()
with torch.no_grad():
for val_batch_idx, (val_data, val_label) in enumerate(val_loader):
val_data, val_label = val_data.float().to(device), val_label.float().to(device)
val_predict_data = torch.squeeze(model(val_data))
loss = criterion(val_predict_data, val_label)
val_loss += loss.item()
scheduler_model.step(val_loss)
train_loss = train_loss / len(train_loader)
val_loss = val_loss / len(val_loader)
print(
"[{:03d}/{:03d}] {:2.2f} sec(s) train_loss: {:3.9f} | val_loss: {:3.9f} | Learning rate : {:3.6f}".format(
epoch + 1, epochs, time.time() - epoch_start_time,
train_loss,
val_loss,
optimizer_model.state_dict()['param_groups'][0]['lr']))
# 保存在验证集上loss最小的模型
# if val_loss_list.__len__() > 0 and (val_loss / val_dataset.__len__()) < min(val_loss_list):
# 如果精度大于最高精度,则保存
if len(val_loss_list) == 0 or val_loss < min(val_loss_list):
print("保存模型最佳模型成功")
# 保存模型参数
# 保存模型参数
if best_save_path != None:
utils.delete_file(best_save_path)
best_save_path = base_save + "_epoch" + str(epoch) + \
"_trainLoss" + str(train_loss) + \
"_valLoss" + str(val_loss) + ".pkl"
torch.save(model.state_dict(),
best_save_path)
train_loss_list.append(train_loss)
val_loss_list.append(val_loss)
if IsStopTraining(history_loss=val_loss_list, patience=early_stop_patience):
break
'''保存的模型参数的路径'''
return best_save_path
if __name__ == '__main__':
begin = time.time()
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device("cpu")
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
'''训练'''
save_path = train(device, lr=0.01, lr_patience=10, early_stop_patience=20, epochs=1000)
end = time.time()
'''测试'''
# test1(5, src_condition, tar_condition, G_params_path, LC_params_path)
test(hidden_num, feature, predict_num=predict_num,
batch_size=batch_size, save_path=save_path,
is_single=is_single, is_norm=is_norm, save_fig_name=save_fig_name)
print("训练耗时:{:3.2f}s".format(end - begin))

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/20 14:11
@Usage :
@Desc :
'''

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/16 19:46
@Usage :
@Desc :
'''

View File

@ -0,0 +1,111 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 16:35
@Usage :
@Desc :
'''
# encoding=utf-8
import RUL.otherIdea.adaRNN.dataset_vibrate.data_vibrate as data_vibrate
from RUL.otherIdea.adaRNN.loss_transfer import TransferLoss
from RUL.otherIdea.adaRNN.dataset_vibrate.loadData import getVibrate_data
import torch
import math
def get_split_time(num_domain=2, mode='pre_process', data=None, dis_type='coral'):
spilt_time = {
'2': [(0, 600), (600, 1200)]
}
if mode == 'pre_process':
return spilt_time[str(num_domain)]
if mode == 'tdc':
return TDC(num_domain, data, dis_type=dis_type)
else:
print("error in mode")
def TDC(num_domain, data, dis_type='coral'):
# 样本个数
num_day = len(data[0])
split_N = 10
feat = data[0][0:num_day]
feat = torch.tensor(feat, dtype=torch.float32)
feat_shape_1 = feat.shape[1] # 时间部
feat = feat.reshape(-1, feat.shape[2])
feat = feat
selected = [0, 10]
candidate = [1, 2, 3, 4, 5, 6, 7, 8, 9]
start = 0
if num_domain in [2, 3, 5, 7, 10]:
while len(selected) - 2 < num_domain - 1:
distance_list = []
for can in candidate:
selected.append(can)
selected.sort()
dis_temp = 0
for i in range(1, len(selected) - 1):
for j in range(i, len(selected) - 1):
index_part1_start = start + math.floor(selected[i - 1] / split_N * num_day) * feat_shape_1
index_part1_end = start + math.floor(selected[i] / split_N * num_day) * feat_shape_1
feat_part1 = feat[index_part1_start: index_part1_end]
index_part2_start = start + math.floor(selected[j] / split_N * num_day) * feat_shape_1
index_part2_end = start + math.floor(selected[j + 1] / split_N * num_day) * feat_shape_1
feat_part2 = feat[index_part2_start:index_part2_end]
criterion_transder = TransferLoss(loss_type=dis_type, input_dim=feat_part1.shape[1])
dis_temp += criterion_transder.compute(feat_part1, feat_part2)
distance_list.append(dis_temp)
selected.remove(can)
can_index = distance_list.index(max(distance_list))
selected.append(candidate[can_index])
candidate.remove(candidate[can_index])
selected.sort()
res = []
for i in range(1, len(selected)):
if i == 1:
sel_start_index = int(num_day / split_N * selected[i - 1])
else:
sel_start_index = int(num_day / split_N * selected[i - 1]) + 1
sel_end_index = int(num_day / split_N * selected[i])
res.append((sel_start_index, sel_end_index))
return res
else:
print("error in number of domain")
def load_weather_data_multi_domain(hidden_num, feature, predict_num, batch_size=6, number_domain=2, mode='pre_process',
dis_type='coral', is_norm=False):
# mode: 'tdc', 'pre_process'
train_data, val_data = getVibrate_data(hidden_num=hidden_num, feature=feature, predict_num=predict_num,
is_norm=is_norm)
split_time_list = get_split_time(number_domain, mode=mode, data=train_data, dis_type=dis_type)
train_list = []
for i in range(len(split_time_list)):
index_temp = split_time_list[i]
train_loader = data_vibrate.get_vibrate_data(train_data, start_index=index_temp[0],
end_index=index_temp[1], batch_size=batch_size)
train_list.append(train_loader)
valid_loader = data_vibrate.get_vibrate_data(val_data, start_index=0,
end_index=len(val_data), batch_size=batch_size, mean=None,
std=None, shuffle=False)
test_loader = valid_loader
return train_list, valid_loader, test_loader
if __name__ == '__main__':
load_weather_data_multi_domain(hidden_num=10, feature=10, predict_num=50, batch_size=32, number_domain=2,
mode='tdc',
dis_type='coral', is_norm=False)

View File

@ -0,0 +1,78 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 16:36
@Usage :
@Desc :
'''
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from pandas.core.frame import DataFrame
from torch.utils.data import Dataset, DataLoader
import torch
import pickle
import datetime
class data_loader(Dataset):
def __init__(self, df_feature, df_label, df_label_reg, t=None):
assert len(df_feature) == len(df_label)
assert len(df_feature) == len(df_label_reg)
# df_feature = df_feature.reshape(df_feature.shape[0], df_feature.shape[1] // 6, df_feature.shape[2] * 6)
self.df_feature = df_feature
self.df_label = df_label
self.df_label_reg = df_label_reg
self.T = t
self.df_feature = torch.tensor(
self.df_feature, dtype=torch.float32)
self.df_label = torch.tensor(
self.df_label, dtype=torch.float32)
self.df_label_reg = torch.tensor(
self.df_label_reg, dtype=torch.float32)
def __getitem__(self, index):
sample, target, label_reg = self.df_feature[index], self.df_label[index], self.df_label_reg[index]
if self.T:
return self.T(sample), target, label_reg
else:
return sample, target, label_reg
def __len__(self):
return len(self.df_feature)
def create_dataset(data, start_index, end_index, mean=None, std=None):
feat, label_continue, label_single = data[0], data[1], data[2]
referece_start_index = 0
referece_end_index = 1250
assert start_index - referece_start_index >= 0
assert end_index - referece_end_index <= 0
assert end_index - start_index >= 0
feat = feat[start_index: end_index + 1]
label = label_continue[start_index: end_index + 1]
label_reg = label_single[start_index: end_index + 1]
# ori_shape_1, ori_shape_2=feat.shape[1], feat.shape[2]
# feat=feat.reshape(-1, feat.shape[2])
# feat=(feat - mean) / std
# feat=feat.reshape(-1, ori_shape_1, ori_shape_2)
return data_loader(feat, label, label_reg)
def get_vibrate_data(data, start_index, end_index, batch_size, shuffle=True, mean=None, std=None):
dataset = create_dataset(data, start_index,
end_index, mean=mean, std=std)
train_loader = DataLoader(
dataset, batch_size=batch_size, shuffle=shuffle)
return train_loader

View File

@ -0,0 +1,150 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/10 15:21
@Usage :
@Desc : 获取数据集
'''
import torch
import numpy as np
from torch.utils.data import Dataset, DataLoader
'''正常Dataset类'''
class Nor_Dataset(Dataset):
def __init__(self, datas, labels=None):
self.datas = torch.tensor(datas)
if labels is not None:
self.labels = torch.tensor(labels)
else:
self.labels = None
def __getitem__(self, index):
data = self.datas[index]
if self.labels is not None:
label = self.labels[index]
return data, label
return data
def __len__(self):
return len(self.datas)
def standardization(data):
mu = np.mean(data, axis=0)
sigma = np.std(data, axis=0)
return (data - mu) / sigma
def normalization(data):
_range = np.max(data) - np.min(data)
return (data - np.min(data)) / _range
# LSTM_cell的数目,维度,是否正则化
def getData(filter_num, dims, if_norm: bool = False):
# 数据读入
# HI_merge_data_origin = np.load("E:\self_example\pytorch_example\RUL\dataset\HI_merge_data1.npy")
#
# # plt.plot(HI_merge_data[0:1250, 1])
# # 去除掉退化特征不明显前面的点
# HI_merge_data = HI_merge_data_origin[0:1250, 1]
HI_merge_data = np.loadtxt("E:\self_example\pytorch_example\RUL\dataset\smallVHI.csv", delimiter=",")
# 是否正则化
if if_norm:
HI_merge_data = normalization(HI_merge_data)
# plt.plot(HI_merge_data)
# plt.show()
(total_dims,) = HI_merge_data.shape
# # 将其分成重叠采样状态-滑动窗口函数
predict_data = np.empty(shape=[total_dims - filter_num, filter_num])
# 重叠采样获取时间部和训练次数
for dim in range(total_dims - filter_num):
predict_data[dim] = HI_merge_data[dim:dim + filter_num]
train_label = predict_data[dims:, :]
train_label_single = HI_merge_data[dims + filter_num - 1:-1]
# 再重叠采样获取一个点的维度
'''train_data.shape:(sample,filter_num) -> (sample,filter_num,dims)'''
# # 将其分成重叠采样状态-滑动窗口函数
train_data = np.empty(shape=[dims, total_dims - filter_num - dims, filter_num])
for dim in range(dims):
train_data[dim] = predict_data[dim:total_dims - filter_num - dims + dim, :]
# 转置变成想要的数据 (dims,sample,filter_num) -> (sample,filter_num,dims)
train_data = np.transpose(train_data, [1, 2, 0])
total_data = HI_merge_data
print("total_data.shape:", total_data.shape)
print("train_data.shape:", train_data.shape) # (20, 1200, 30)
print("train_label.shape:", train_label.shape) # (20, 1200)
print("train_label_single.shape:", train_label_single.shape)
# 所有的原始数据;所有的训练数据;所有的训练标签(预测一个序列);所有的训练标签(预测一个点)
return total_data, train_data, train_label, train_label_single
def splitValData(data, label, label_single, predict_num=50):
sample, hidden, feature = data.shape
train_data = data[:sample - predict_num, :, :]
val_data = data[sample - predict_num:, :, :]
train_label = label[:sample - predict_num, :]
val_label = label[sample - predict_num:, :]
train_label_single = label_single[:sample - predict_num, ]
val_label_single = label_single[sample - predict_num:, ]
return train_data, val_data, train_label, val_label, train_label_single, val_label_single
def getTotalData(hidden_num, feature, is_single=True, is_norm=False):
total_data, train_data, train_label, train_label_single = getData(hidden_num, feature, is_norm)
if is_single:
total_dataset = Nor_Dataset(train_data, train_label_single)
else:
total_dataset = Nor_Dataset(train_data, train_label)
return total_data, total_dataset
# lstm细胞数channel数预测多少个点是否正则化
def getDataset(hidden_num, feature, predict_num, is_single=True, is_norm=False):
total_data, train_data, train_label, train_label_single = getData(hidden_num, feature, is_norm)
# 根据预测的点数划分训练集和测试集(验证集)
train_data, val_data, train_label, val_label, train_label_single, val_label_single = splitValData(train_data,
train_label,
train_label_single,
predict_num=predict_num)
if is_single:
train_dataset = Nor_Dataset(train_data, train_label_single)
val_dataset = Nor_Dataset(val_data, val_label_single)
else:
train_dataset = Nor_Dataset(train_data, train_label)
val_dataset = Nor_Dataset(val_data, val_label)
return train_dataset, val_dataset
def getVibrate_data(hidden_num, feature, predict_num, is_norm=False):
total_data, train_data, train_label, train_label_single = getData(hidden_num, feature, is_norm)
# 根据预测的点数划分训练集和测试集(验证集)
train_data, val_data, train_label, val_label, train_label_single, val_label_single = splitValData(train_data,
train_label,
train_label_single,
predict_num=predict_num)
return [train_data,train_label,train_label_single],[val_data,val_label,val_label_single]

View File

@ -0,0 +1,64 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 14:45
@Usage :
@Desc :
'''
from RUL.baseModel.loss import adv_loss, coral, kl_js, mmd, mutual_info, cos, pair_dist
class TransferLoss(object):
def __init__(self, loss_type='cosine', input_dim=512):
"""
Supported loss_type: mmd(mmd_lin), mmd_rbf, coral, cosine, kl, js, mine, adv
"""
self.loss_type = loss_type
self.input_dim = input_dim
def compute(self, X, Y):
"""Compute adaptation loss
Arguments:
X {tensor} -- source matrix
Y {tensor} -- target matrix
Returns:
[tensor] -- transfer loss
"""
if self.loss_type == 'mmd_lin' or self.loss_type == 'mmd':
mmdloss = mmd.MMD_loss(kernel_type='linear')
loss = mmdloss(X, Y)
elif self.loss_type == 'coral':
loss = coral.CORAL(X, Y)
elif self.loss_type == 'cosine' or self.loss_type == 'cos':
loss = 1 - cos.cosine(X, Y)
elif self.loss_type == 'kl':
loss = kl_js.kl_div(X, Y)
elif self.loss_type == 'js':
loss = kl_js.js(X, Y)
elif self.loss_type == 'mine':
mine_model = mutual_info.Mine_estimator(
input_dim=self.input_dim, hidden_dim=60)
loss = mine_model(X, Y)
elif self.loss_type == 'adv':
loss = adv_loss.adv(X, Y, input_dim=self.input_dim, hidden_dim=32)
elif self.loss_type == 'mmd_rbf':
mmdloss = mmd.MMD_loss(kernel_type='rbf')
loss = mmdloss(X, Y)
elif self.loss_type == 'pairwise':
pair_mat = pair_dist.pairwise_dist(X, Y)
import torch
loss = torch.norm(pair_mat)
return loss
if __name__ == "__main__":
import torch
trans_loss = TransferLoss('adv')
a = (torch.randn(5, 512) * 10)
b = (torch.randn(5, 512) * 10)
print(trans_loss.compute(a, b))

View File

@ -0,0 +1,411 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 14:44
@Usage :
@Desc :
'''
import torch
import torch.nn as nn
from RUL.otherIdea.adaRNN.loss_transfer import TransferLoss
import torch.nn.functional as F
from RUL.baseModel.dctChannelAttention import dct_channel_block
import torch.nn as nn
import torch
from RUL.baseModel.dctAttention import dct_channel_block
class dctLSTMCell(nn.Module):
def __init__(self, input_dim, hidden_dim, bias):
"""
Initialize ConvLSTM cell.
Parameters
----------
input_dim: int
Number of channels of input tensor.
hidden_dim: int
Number of channels of hidden state.
kernel_size: int
Size of the convolutional kernel.
bias: bool
Whether or not to add the bias.
Input:
A tensor of size B, T, C
B: bacth_size
T: timestamp
C: channel
"""
super(dctLSTMCell, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.bias = bias
self.hidden = nn.Linear(in_features=self.input_dim + self.hidden_dim,
out_features=4 * self.hidden_dim,
bias=self.bias)
self.attention = dct_channel_block(channel=self.input_dim + self.hidden_dim)
def forward(self, input_tensor, cur_state):
# shape :b,c
h_cur, c_cur = cur_state
combined = torch.cat([input_tensor, h_cur], dim=-1) # concatenate along channel axis
# 增加一个channelAttention
combined = self.attention(combined)
combined_linear = self.hidden(combined)
cc_i, cc_f, cc_o, cc_g = torch.split(combined_linear, self.hidden_dim, dim=-1)
i = torch.sigmoid(cc_i)
f = torch.sigmoid(cc_f)
o = torch.sigmoid(cc_o)
g = torch.tanh(cc_g)
c_next = f * c_cur + i * g
h_next = o * torch.tanh(c_next)
return h_next, c_next
def init_hidden(self, batch_size):
return (torch.zeros(batch_size, self.hidden_dim, device=self.hidden.weight.device),
torch.zeros(batch_size, self.hidden_dim, device=self.hidden.weight.device))
class LSTM(nn.Module):
"""
Parameters:
input_dim: Number of channels in input
hidden_dim: Number of hidden channels
kernel_size: Size of kernel in convolutions
num_layers: Number of LSTM layers stacked on each other
batch_first: Whether or not dimension 0 is the batch or not
bias: Bias or no bias in Convolution
return_all_layers: Return the list of computations for all layers
Note: Will do same padding.
Input:
A tensor of size B, T, C or T, B, C
Output:
A tuple of two lists of length num_layers (or length 1 if return_all_layers is False).
0 - layer_output_list is the list of lists of length T of each output
1 - last_state_list is the list of last states
each element of the list is a tuple (h, c) for hidden state and memory
Example:
>> x = torch.rand((32, 10, 64))
>> convlstm = ConvLSTM(64, 16, 3, 1, True, True, False)
>> _, last_states = convlstm(x)
>> h = last_states[0][0] # 0 for layer index, 0 for h index
"""
def __init__(self, input_dim, hidden_dim, num_layers,
batch_first=False, bias=True, return_all_layers=False):
super(LSTM, self).__init__()
# Make sure that both `kernel_size` and `hidden_dim` are lists having len == num_layers
hidden_dim = self._extend_for_multilayer(hidden_dim, num_layers)
if not len(hidden_dim) == num_layers:
raise ValueError('Inconsistent list length.')
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.batch_first = batch_first
self.bias = bias
self.return_all_layers = return_all_layers
cell_list = []
for i in range(0, self.num_layers):
cur_input_dim = self.input_dim if i == 0 else self.hidden_dim[i - 1]
cell_list.append(
dctLSTMCell(input_dim=cur_input_dim,
hidden_dim=self.hidden_dim[i],
bias=self.bias),
)
self.cell_list = nn.ModuleList(cell_list)
def forward(self, input_tensor, hidden_state=None):
"""
Parameters
----------
input_tensor: todo
5-D Tensor either of shape (t, b, c) or (b, t, c)
hidden_state: todo
None. todo implement stateful
Returns
-------
last_state_list, layer_output
"""
if not self.batch_first:
# 等同于transpose
# (t, b, c, h, w) -> (b, t, c, h, w)
input_tensor = input_tensor.permute(1, 0, 2)
b, _, _ = input_tensor.size()
# Implement stateful ConvLSTM
if hidden_state is not None:
raise NotImplementedError()
else:
# Since the init is done in forward. Can send image size here
hidden_state = self._init_hidden(batch_size=b)
layer_output_list = []
last_state_list = []
timestamp = input_tensor.size(1)
cur_layer_input = input_tensor
for layer_idx in range(self.num_layers):
h, c = hidden_state[layer_idx]
output_inner = []
for t in range(timestamp):
h, c = self.cell_list[layer_idx](input_tensor=cur_layer_input[:, t, :],
cur_state=[h, c])
output_inner.append(h)
layer_output = torch.stack(output_inner, dim=1)
# TODO 每层之间增加一个dct_attention
# layer_output = self.attention_list[layer_idx](layer_output)
cur_layer_input = layer_output
layer_output_list.append(layer_output)
last_state_list.append([h, c])
if not self.return_all_layers:
layer_output_list = layer_output_list[-1:]
last_state_list = last_state_list[-1:]
return layer_output_list, last_state_list
def _init_hidden(self, batch_size):
init_states = []
for i in range(self.num_layers):
init_states.append(self.cell_list[i].init_hidden(batch_size))
return init_states
@staticmethod
def _extend_for_multilayer(param, num_layers):
if not isinstance(param, list):
param = [param] * num_layers
return param
class AdaRNN(nn.Module):
"""
model_type: 'Boosting', 'AdaRNN'
bottleneck_list: (dim,is_BatchNorm,is_ReLu,drop_out)
"""
def __init__(self, use_bottleneck=False, bottleneck_list=[(64, False, False, 0), (64, True, True, 0.5)],
n_input=128, n_hiddens=[64, 64], n_output=6,
dropout=0.0, len_seq=9, model_type='AdaRNN',
trans_loss='mmd'):
super(AdaRNN, self).__init__()
self.use_bottleneck = use_bottleneck
self.n_input = n_input
self.num_layers = len(n_hiddens)
self.hiddens = n_hiddens
self.n_output = n_output
self.model_type = model_type
self.trans_loss = trans_loss
self.len_seq = len_seq
in_size = self.n_input
features = nn.ModuleList()
# dctAttention = nn.ModuleList()
for hidden in n_hiddens:
# rnn = nn.GRU(
# input_size=in_size,
# num_layers=1,
# hidden_size=hidden,
# batch_first=True,
# dropout=dropout
# )
rnn = LSTM(input_dim=in_size, hidden_dim=[hidden], num_layers=1, batch_first=True, return_all_layers=True)
# attention = dct_channel_block(channel=hidden)
features.append(rnn)
# dctAttention.append(attention)
in_size = hidden
self.features = nn.Sequential(*features)
# self.dctAttention = nn.Sequential(*dctAttention)
if use_bottleneck == True: # finance
bottleneck = []
for i in range(len(bottleneck_list)):
cur_input_dim = self.hiddens[-1] if i == 0 else bottleneck_list[i - 1][0]
bottleneck.append(
nn.Linear(cur_input_dim, bottleneck_list[i][0])
)
### 不加初始权重会让Hard predict更不稳定振幅更大
# 初始权重越大,振幅越大
bottleneck[-1].weight.data.normal_(0, 0.03)
bottleneck[-1].bias.data.fill_(0.01)
if bottleneck_list[i][1]:
bottleneck.append(nn.BatchNorm1d(bottleneck_list[i][0]))
if bottleneck_list[i][2]:
bottleneck.append(nn.ReLU())
if bottleneck_list[i][3] != 0:
bottleneck.append(nn.Dropout(bottleneck_list[i][3]))
self.bottleneck = nn.Sequential(*bottleneck)
self.fc = nn.Linear(bottleneck_list[-1][0], n_output)
torch.nn.init.xavier_normal_(self.fc.weight)
else:
self.fc_out = nn.Linear(n_hiddens[-1], self.n_output)
if self.model_type == 'AdaRNN':
gate = nn.ModuleList()
for i in range(len(n_hiddens)):
gate_weight = nn.Linear(
len_seq * self.hiddens[i] * 2, len_seq)
gate.append(gate_weight)
self.gate = gate
bnlst = nn.ModuleList()
for i in range(len(n_hiddens)):
bnlst.append(nn.BatchNorm1d(len_seq))
self.bn_lst = bnlst
self.softmax = torch.nn.Softmax(dim=0)
self.init_layers()
def init_layers(self):
for i in range(len(self.hiddens)):
self.gate[i].weight.data.normal_(0, 0.05)
self.gate[i].bias.data.fill_(0.0)
def forward_pre_train(self, x, len_win=0):
out = self.gru_features(x)
# 两层GRU之后的结果
fea = out[0]
if self.use_bottleneck == True:
fea_bottleneck = self.bottleneck(fea[:, -1, :])
fc_out = self.fc(fea_bottleneck).squeeze()
else:
fc_out = self.fc_out(fea[:, -1, :]).squeeze()
# 每层GRU之后的结果,每层GRU前后权重归一化之后的结果
out_list_all, out_weight_list = out[1], out[2]
# 可以理解为前半段 和 后半段
out_list_s, out_list_t = self.get_features(out_list_all)
loss_transfer = torch.zeros((1,))
for i in range(len(out_list_s)):
criterion_transder = TransferLoss(
loss_type=self.trans_loss, input_dim=out_list_s[i].shape[2])
h_start = 0
for j in range(h_start, self.len_seq, 1):
i_start = max(j - len_win, 0)
i_end = j + len_win if j + len_win < self.len_seq else self.len_seq - 1
for k in range(i_start, i_end + 1):
weight = out_weight_list[i][j] if self.model_type == 'AdaRNN' else 1 / (
self.len_seq - h_start) * (2 * len_win + 1)
loss_transfer = loss_transfer + weight * criterion_transder.compute(
out_list_s[i][:, j, :], out_list_t[i][:, k, :])
return fc_out, loss_transfer, out_weight_list
def gru_features(self, x, predict=False):
x_input = x
out = None
out_lis = []
out_weight_list = [] if (
self.model_type == 'AdaRNN') else None
for i in range(self.num_layers):
# GRU的输出
out, _ = self.features[i](x_input.float())
out = out[0]
# out = self.dctAttention[i](out.float())
x_input = out
out_lis.append(out)
if self.model_type == 'AdaRNN' and predict == False:
out_gate = self.process_gate_weight(x_input, i)
out_weight_list.append(out_gate)
# 两层GRU之后的结果,每层GRU之后的结果,每层GRU前后权重归一化之后的结果
return out, out_lis, out_weight_list
def process_gate_weight(self, out, index):
x_s = out[0: int(out.shape[0] // 2)] # 可以理解为前一半个batch_size的分布 域Di
x_t = out[out.shape[0] // 2: out.shape[0]] # 可以理解为后一半个batch_size的分布 域Dj
# 对应着不同的域
x_all = torch.cat((x_s, x_t), 2)
x_all = x_all.view(x_all.shape[0], -1)
weight = torch.sigmoid(self.bn_lst[index](
self.gate[index](x_all.float())))
weight = torch.mean(weight, dim=0)
res = self.softmax(weight).squeeze()
return res
def get_features(self, output_list):
fea_list_src, fea_list_tar = [], []
for fea in output_list:
fea_list_src.append(fea[0: fea.size(0) // 2])
fea_list_tar.append(fea[fea.size(0) // 2:])
return fea_list_src, fea_list_tar
# For Boosting-based
def forward_Boosting(self, x, weight_mat=None):
out = self.gru_features(x)
fea = out[0]
if self.use_bottleneck:
fea_bottleneck = self.bottleneck(fea[:, -1, :])
fc_out = self.fc(fea_bottleneck).squeeze()
else:
fc_out = self.fc_out(fea[:, -1, :]).squeeze()
out_list_all = out[1]
# 可以理解为前半段和后半段
out_list_s, out_list_t = self.get_features(out_list_all)
loss_transfer = torch.zeros((1,))
if weight_mat is None:
weight = (1.0 / self.len_seq *
torch.ones(self.num_layers, self.len_seq))
else:
weight = weight_mat
dist_mat = torch.zeros(self.num_layers, self.len_seq)
for i in range(len(out_list_s)):
criterion_transder = TransferLoss(
loss_type=self.trans_loss, input_dim=out_list_s[i].shape[2])
for j in range(self.len_seq):
loss_trans = criterion_transder.compute(
out_list_s[i][:, j, :], out_list_t[i][:, j, :])
loss_transfer = loss_transfer + weight[i, j] * loss_trans
dist_mat[i, j] = loss_trans
return fc_out, loss_transfer, dist_mat, weight
# For Boosting-based
def update_weight_Boosting(self, weight_mat, dist_old, dist_new):
epsilon = 1e-12
dist_old = dist_old.detach()
dist_new = dist_new.detach()
ind = dist_new > dist_old + epsilon
weight_mat[ind] = weight_mat[ind] * \
(1 + torch.sigmoid(dist_new[ind] - dist_old[ind]))
weight_norm = torch.norm(weight_mat, dim=1, p=1)
weight_mat = weight_mat / weight_norm.t().unsqueeze(1).repeat(1, self.len_seq)
return weight_mat
def predict(self, x):
out = self.gru_features(x, predict=True)
fea = out[0]
if self.use_bottleneck:
fea_bottleneck = self.bottleneck(fea[:, -1, :])
fc_out = self.fc(fea_bottleneck).squeeze()
else:
fc_out = self.fc_out(fea[:, -1, :]).squeeze()
return fc_out

View File

@ -0,0 +1,94 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/10 16:27
@Usage :
@Desc :
'''
import numpy as np
import torch
from RUL.otherIdea.LSTM.loadData import getDataset, getTotalData
from RUL.otherIdea.dctLSTM.model import PredictModel
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from RUL.baseModel.plot import plot_prediction, plot_forSelf
# 仅使用预测出来的最新的一个点预测以后
def predictOneByOne(model, train_data, predict_num=50):
# 取出训练数据的最后一条
each_predict_data = train_data[-1].unsqueeze(0)
predicted_list = np.empty(shape=(predict_num, 1)) # (5,filter_num,30)
# all_data = total_data # (1201,)
for each_predict in range(predict_num):
# predicted_data.shape : (1,1)
predicted_data = model.predict(each_predict_data).cpu().detach().numpy() # (batch_size,filer_num,1)
predicted_list[each_predict] = predicted_data
each_predict_data = each_predict_data.numpy()
# (1,1) => (10,1)
# 中间拼接过程: (1) => (10) => (40,10) => (30,40,10)
c = each_predict_data[-1, -1, 1:]
a = np.concatenate([each_predict_data[-1, -1, 1:], np.expand_dims(predicted_data, axis=0)], axis=0)
b = np.concatenate([each_predict_data[-1, 1:, :], np.expand_dims(a, axis=0)], axis=0)
c = np.expand_dims(b, axis=0)
each_predict_data = torch.tensor(c)
return np.squeeze(predicted_list)
def test(hidden_num, feature, predict_num, batch_size, model, is_single=True, is_norm=False, save_fig_name=""):
total_data, total_dataset = getTotalData(hidden_num, feature, is_single=is_single, is_norm=is_norm)
train_dataset, val_dataset = getDataset(hidden_num, feature, predict_num=predict_num, is_single=is_single,
is_norm=is_norm)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(dataset=val_dataset, batch_size=batch_size, shuffle=False)
# 加载网络
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(model)
params_num = sum(param.numel() for param in model.parameters())
print('参数数量:{}'.format(params_num))
model.eval()
predicted_data_easy = total_data[:hidden_num + feature, ]
predicted_data_hard = total_data[:hidden_num + feature, ]
with torch.no_grad():
for batch_idx, (data, label) in enumerate(train_loader):
data, label = data.to(device), label.to(device)
last_train_data = data
each_predicted_data = model.predict(data).cpu().detach().numpy()
predicted_data_easy = np.concatenate(
[predicted_data_easy, each_predicted_data],
axis=0)
predicted_data_hard = np.concatenate(
[predicted_data_hard, each_predicted_data],
axis=0)
# 简单版的,每次预测重新用已知的
for batch_idx, (data, label) in enumerate(val_loader):
data, label = data.to(device), label.to(device)
each_predicted_data = model.predict(data).cpu().detach().numpy()
predicted_data_easy = np.concatenate(
[predicted_data_easy, each_predicted_data],
axis=0)
# 困难版的,每次预测基于上次的预测
predicted_data_hard = np.concatenate([predicted_data_hard,
predictOneByOne(model, last_train_data, predict_num=predict_num)], axis=0)
plot_prediction(total_data, predicted_data_easy, predicted_data_hard, save_fig_name, predict_num=predict_num)
plot_forSelf(total_data, predicted_data_easy, predicted_data_hard)
if __name__ == '__main__':
test(40, 10, 50, 32,
"E:\self_example\pytorch_example\RUL\otherIdea/adaRNN\outputs\AdaRNN_tdcLoss(cos)_transferLoss(cos)_dw0.5_lr0.0005\parameters\AdaRNN_hidden24_feature10_predict50_dimList64-64_epoch62_trainLoss0.5115623474121094_valLoss0.12946119904518127.pkl"
)

View File

@ -0,0 +1,476 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 16:32
@Usage :
@Desc :
'''
import torch.nn as nn
import torch
import torch.optim as optim
import os
import argparse
import datetime
import numpy as np
import random
from tqdm import tqdm
from RUL.otherIdea.adaDctEmdLSTM.utils import utils
from RUL.otherIdea.adaDctEmdLSTM.model import AdaRNN
import RUL.otherIdea.adaDctEmdLSTM.dataset_vibrate.data_process as data_process
from RUL.baseModel.loss.ffd import fft_mse, dct_mse
from RUL.otherIdea.adaDctEmdLSTM.test import test
import matplotlib.pyplot as plt
import time
from RUL.baseModel.CommonFunction import IsStopTraining
'''
超参数设置:
'''
# 数据准备
is_norm = False
is_single = True
tdc_loss_type = 'cos'
num_domain = 2 # 划分为几个源域和目标域
# RNN相关
hidden_num = 10 # LSTM细胞个数
feature = 2 # 一个点的维度
predict_num = 50 # 预测个数
batch_size = 32
model_name = "AdaRNN"
hidden_list = [64, 64] # 每层RNN的隐藏层的维度
### bottleneck_list: (dim,is_BatchNorm,is_ReLu,drop_out)
bottleneck = [(64, False, False, 0), (64, True, True, 0.5)]
# bottleneck = [(128, False, True, 0),
# (64, True, True, 0.2),
# (32, True, True, 0.2),
# (16, False, False, 0)]
# 训练相关
pre_epoch = 40
epochs = 1000
transfer_loss_type = 'cos' # 目前测试cos最好
dw = 0.5
fft_dw = 0.1
lr = 0.01
len_win = 0 # 窗口大小为0暂时不知道有什么用
seed = 125
# 相关初始化工作
out_dir = './outputs'
output_path = out_dir + '/{0}_tdc({1})_transfer({2})_domain{3}_dw{4}_fdw{5}_lr{6}_Norm{7}'.format(model_name,
tdc_loss_type,
transfer_loss_type,
num_domain,
dw, fft_dw, lr,
is_norm)
save_model_name = 'parameters/seed{0}_hidden{1}_feature{2}_predict{3}_dimList{4}'.format(seed, hidden_num,
feature,
predict_num,
str(hidden_list[
0]) + "-" + str(
hidden_list[1]))
save_fig_name = 'fig/{0}_hidden{1}_feature{2}_predict{3}_dimList{4}.png'.format(model_name, hidden_num,
feature,
predict_num,
str(hidden_list[0]) + "-" + str(
hidden_list[1]))
utils.dir_exist(output_path)
utils.dir_exist(os.path.join(output_path, 'parameters'))
utils.dir_exist(os.path.join(output_path, 'fig'))
log_file = os.path.join(output_path, 'run.log')
def pprint(*text):
# print with UTC+8 time
time = '[' + str(datetime.datetime.utcnow() +
datetime.timedelta(hours=8))[:19] + '] -'
print(time, *text, flush=True)
if log_file is None:
return
with open(log_file, 'a') as f:
print(time, *text, flush=True, file=f)
def get_model(name='AdaRNN'):
# 经过测试整体来说如果加了bottleneck整体更愿意振动而不加整体仅存在趋势
# bottleneck_list: (dim,is_BatchNorm,is_ReLu,drop_out)
return AdaRNN(use_bottleneck=True, bottleneck_list=bottleneck, n_input=feature, n_hiddens=hidden_list,
n_output=1, dropout=0.0, model_type=name, len_seq=hidden_num,
trans_loss=transfer_loss_type)
def train_AdaRNN(model, optimizer, train_loader_list, epoch, dist_old=None, weight_mat=None):
model.train()
criterion = nn.MSELoss()
criterion_1 = nn.L1Loss()
loss_all = []
loss_1_all = []
dist_mat = torch.zeros(len(hidden_list), hidden_num)
len_loader = np.inf
for loader in train_loader_list:
if len(loader) < len_loader:
len_loader = len(loader)
for data_all in tqdm(zip(*train_loader_list), total=len_loader):
optimizer.zero_grad()
# 如果训练集域之间的batch_size对不齐就没法计算
# 为了不抛弃所有样本这里将选择最小的域batch_size作为本轮的batch_size
min_batch_size = 10000
for data in data_all:
min_batch_size = min(min_batch_size, data[0].shape[0])
list_feat = []
list_label = []
for data in data_all:
feature, label, label_reg = data[0].float(
), data[1].float(), data[2].float()
list_feat.append(feature[:min_batch_size])
list_label.append(label_reg[:min_batch_size])
index = get_index(len(data_all) - 1)
loss_mse = torch.zeros(1)
loss_fft = torch.zeros(1)
loss_transfer = torch.zeros(1)
total_loss_l1 = torch.zeros(1)
for i in range(len(index)):
feature_s = list_feat[index[i][0]]
feature_t = list_feat[index[i][1]]
label_reg_s = list_label[index[i][0]]
label_reg_t = list_label[index[i][1]]
# 在batch_size处合并
feature_all = torch.cat((feature_s, feature_t), 0)
if epoch < pre_epoch:
pred_all, each_loss_transfer, out_weight_list = model.forward_pre_train(
feature_all, len_win=len_win)
else:
pred_all, each_loss_transfer, dist, weight_mat = model.forward_Boosting(
feature_all, weight_mat)
dist_mat = dist_mat + dist
pred_s = pred_all[0:feature_s.size(0)]
pred_t = pred_all[feature_s.size(0):]
loss_s = criterion(pred_s, label_reg_s)
loss_t = criterion(pred_t, label_reg_t)
lossf_s = dct_mse(pred_s, label_reg_s)
lossf_t = dct_mse(pred_t, label_reg_t)
loss_l1 = criterion_1(pred_s, label_reg_s)
loss_mse += loss_s + loss_t
loss_fft += (lossf_s + lossf_t) * fft_dw
loss_transfer += dw * each_loss_transfer
total_loss_l1 += loss_l1
total_loss = loss_mse + loss_transfer
loss_all.append([total_loss.item(), loss_mse.item(), loss_transfer.item(), loss_fft.item()])
loss_1_all.append(total_loss_l1.item())
# 反向传播
total_loss.backward()
# 梯度裁剪梯度最大范数为3
torch.nn.utils.clip_grad_value_(model.parameters(), 3.)
optimizer.step()
loss = np.array(loss_all).mean(axis=0)
loss_l1 = np.array(loss_1_all).mean()
if epoch >= pre_epoch:
if epoch > pre_epoch:
weight_mat = model.update_weight_Boosting(
weight_mat, dist_old, dist_mat)
return loss, loss_l1, weight_mat, dist_mat
else:
weight_mat = transform_type(out_weight_list)
return loss, loss_l1, weight_mat, None
def get_index(num_domain=2):
index = []
for i in range(num_domain):
for j in range(i + 1, num_domain + 1):
index.append((i, j))
return index
def count_parameters(model):
return sum(p.numel() for p in model.parameters() if p.requires_grad)
def val_epoch(model, val_loader, device, scheduler):
model.eval()
val_loss = 0
val_loss_1 = 0
val_loss_r = 0
criterion = nn.MSELoss()
criterion_1 = nn.L1Loss()
with torch.no_grad():
for val_batch_idx, (val_data, val_continue, val_label) in enumerate(val_loader):
val_data, val_label = val_data.to(device), val_label.to(device)
val_predict_data = model.predict(val_data)
loss = criterion(val_predict_data, val_label)
loss_r = torch.sqrt(loss)
loss_1 = criterion_1(val_predict_data, val_label)
val_loss += loss.item()
val_loss_1 += loss_1.item()
val_loss_r += loss_r.item()
scheduler.step(val_loss)
loss = val_loss / len(val_loader)
loss_1 = val_loss_1 / len(val_loader)
loss_r = val_loss_r / len(val_loader)
return loss, loss_1, loss_r
def test_epoch_inference(model, test_loader, prefix='Test'):
model.eval()
total_loss = 0
total_loss_1 = 0
total_loss_r = 0
correct = 0
criterion = nn.MSELoss()
criterion_1 = nn.L1Loss()
i = 0
for feature, label, label_reg in tqdm(test_loader, desc=prefix, total=len(test_loader)):
feature, label_reg = feature.float(), label_reg.float()
with torch.no_grad():
pred = model.predict(feature)
loss = criterion(pred, label_reg)
loss_r = torch.sqrt(loss)
loss_1 = criterion_1(pred, label_reg)
total_loss += loss.item()
total_loss_1 += loss_1.item()
total_loss_r += loss_r.item()
if i == 0:
label_list = label_reg.cpu().numpy()
predict_list = pred.cpu().numpy()
else:
label_list = np.hstack((label_list, label_reg.cpu().numpy()))
predict_list = np.hstack((predict_list, pred.cpu().numpy()))
i = i + 1
loss = total_loss / len(test_loader)
loss_1 = total_loss_1 / len(test_loader)
loss_r = total_loss_r / len(test_loader)
return loss, loss_1, loss_r, label_list, predict_list
def inference(model, data_loader):
loss, loss_1, loss_r, label_list, predict_list = test_epoch_inference(
model, data_loader, prefix='Inference')
return loss, loss_1, loss_r, label_list, predict_list
def inference_all(output_path, model, model_path, loaders):
pprint('inference...')
loss_list = []
loss_l1_list = []
loss_r_list = []
model.load_state_dict(torch.load(model_path))
i = 0
for loader in loaders:
loss, loss_1, loss_r, label_list, predict_list = inference(
model, loader)
loss_list.append(loss)
loss_l1_list.append(loss_1)
loss_r_list.append(loss_r)
i = i + 1
return loss_list, loss_l1_list, loss_r_list
def transform_type(init_weight):
weight = torch.ones(len(hidden_list), hidden_num)
for i in range(weight.shape[0]):
for j in range(weight.shape[1]):
weight[i, j] = init_weight[i][j].item()
return weight
def loadData():
train_loader_list, valid_loader, test_loader = data_process.load_weather_data_multi_domain(
hidden_num=hidden_num, feature=feature, predict_num=predict_num, is_norm=is_norm, batch_size=batch_size,
number_domain=num_domain, mode='tdc', dis_type=tdc_loss_type
)
return train_loader_list, valid_loader, test_loader
pass
def train(model, train_loader_list, valid_loader, lr_patience, early_stop_patience, device):
optimizer = optim.SGD(model.parameters(), lr=lr)
scheduler_model = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5,
patience=lr_patience)
best_score = np.inf
best_epoch, stop_round = 0, 0
weight_mat, dist_mat = None, None
train_loss_list = []
val_loss_list = []
for epoch in range(epochs):
epoch_start_time = time.time()
train_loss, loss1, weight_mat, dist_mat = train_AdaRNN(
model, optimizer, train_loader_list, epoch, dist_mat, weight_mat)
val_loss, val_loss_l1, val_loss_r = val_epoch(
model, valid_loader, device=device, scheduler=scheduler_model)
pprint(
"[{:03d}/{:03d}] {:2.2f} sec(s) train_total_loss: {:3.9f} | train_mse_loss: {:3.9f} | train_transfer_loss: {:3.9f} | train_transfer_loss: {:3.9f} "
" | val_loss: {:3.9f} | Learning rate : {:3.6f}".format(
epoch + 1, epochs, time.time() - epoch_start_time,
train_loss[0], train_loss[1], train_loss[3], train_loss[2],
val_loss,
optimizer.state_dict()['param_groups'][0]['lr']))
if len(val_loss_list) == 0 or val_loss < min(val_loss_list):
pprint("保存模型最佳模型成功")
best_epoch = epoch
best_score = val_loss
# 保存模型参数
best_save_path = save_model_name + "_epoch" + str(epoch) + \
"_trainLoss" + str('%.5f' % train_loss[1]) + \
"_valLoss" + str('%.5f' % val_loss) + ".pkl"
print(os.path.join(output_path, best_save_path))
torch.save(model.state_dict(),
os.path.join(output_path, best_save_path))
train_loss_list.append(train_loss)
val_loss_list.append(val_loss)
if IsStopTraining(history_loss=val_loss_list, patience=early_stop_patience):
pprint("{0}次loss未下降,训练停止".format(early_stop_patience))
break
pprint('best val score:', best_score, '@', best_epoch)
return best_save_path
pass
def main_transfer():
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device("cpu")
pprint('create DataLoaders...')
train_loader_list, valid_loader, test_loader = loadData()
pprint('create AdaRNN model...')
model = get_model(model_name)
num_model = count_parameters(model)
print(model)
print('#model params:', num_model)
pprint('train model...')
best_save_path = train(model=model, train_loader_list=train_loader_list, valid_loader=valid_loader, lr_patience=20,
early_stop_patience=50, device=device)
end = time.time()
print("训练耗时:{:3.2f}s".format(end - begin))
pprint('验证模型...')
loaders = train_loader_list[0], valid_loader, test_loader
loss_list, loss_l1_list, loss_r_list = inference_all(output_path, model, os.path.join(
output_path, best_save_path), loaders)
pprint('MSE: train %.6f, valid %.6f, test %.6f' %
(loss_list[0], loss_list[1], loss_list[2]))
pprint('L1: train %.6f, valid %.6f, test %.6f' %
(loss_l1_list[0], loss_l1_list[1], loss_l1_list[2]))
pprint('RMSE: train %.6f, valid %.6f, test %.6f' %
(loss_r_list[0], loss_r_list[1], loss_r_list[2]))
pprint('Finished.')
# 加载网络
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load(os.path.join(output_path, best_save_path), map_location=device))
test(hidden_num=hidden_num, feature=feature, predict_num=predict_num, batch_size=batch_size, model=model,
is_single=is_single, is_norm=is_norm, save_fig_name=os.path.join(output_path, save_fig_name))
def after_test(save_name):
model = get_model(model_name)
# 加载网络
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.load_state_dict(torch.load(
save_name
, map_location=device))
test(hidden_num=hidden_num, feature=feature, predict_num=predict_num, batch_size=batch_size, model=model,
is_single=is_single, is_norm=is_norm)
def get_args():
parser = argparse.ArgumentParser()
# model
parser.add_argument('--model_name', default='AdaRNN')
parser.add_argument('--d_feat', type=int, default=feature)
parser.add_argument('--hidden_size', type=int, default=64)
parser.add_argument('--num_layers', type=int, default=2)
parser.add_argument('--dropout', type=float, default=0.0)
parser.add_argument('--class_num', type=int, default=1)
parser.add_argument('--pre_epoch', type=int, default=40) # 20, 30, 50
# training
parser.add_argument('--n_epochs', type=int, default=200)
parser.add_argument('--lr', type=float, default=5e-4)
parser.add_argument('--early_stop', type=int, default=40)
parser.add_argument('--smooth_steps', type=int, default=5)
parser.add_argument('--batch_size', type=int, default=36)
parser.add_argument('--dw', type=float, default=0.5) # 0.01, 0.05, 5.0
parser.add_argument('--loss_type', type=str, default='cos')
parser.add_argument('--data_mode', type=str, default='tdc')
parser.add_argument('--num_domain', type=int, default=2)
parser.add_argument('--len_seq', type=int, default=hidden_num)
# other
parser.add_argument('--seed', type=int, default=10)
parser.add_argument('--data_path', default="E:\self_example\pytorch_example\RUL\otherIdea/adaRNN\dataset/")
parser.add_argument('--outdir', default='./outputs')
parser.add_argument('--overwrite', action='store_true')
parser.add_argument('--log_file', type=str, default='run.log')
parser.add_argument('--gpu_id', type=int, default=0)
parser.add_argument('--len_win', type=int, default=0)
args = parser.parse_args()
return args
if __name__ == '__main__':
begin = time.time()
if torch.cuda.is_available():
device = torch.device("cuda:0")
else:
device = torch.device("cpu")
torch.manual_seed(seed)
random.seed(seed)
np.random.seed(seed)
# 训练与测试
main_transfer()
'''事后测试'''
# after_test(save_name="E:\self_example\pytorch_example\RUL\otherIdea/adaRNN\outputs\AdaRNN_tdcLoss(cos)_transferLoss(cos)_domain2_dw0.5_lr0.0005\parameters\AdaRNN_hidden24_feature10_predict50_dimList64-64_epoch62_trainLoss0.5115623474121094_valLoss0.12946119904518127.pkl")

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 16:33
@Usage :
@Desc :
'''

View File

@ -0,0 +1,225 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 16:33
@Usage :
@Desc :
'''
import collections
import torch
import os
import pandas as pd
import torch.nn as nn
from tqdm import tqdm
import numpy as np
EPS = 1e-12
class AverageMeter(object):
def __init__(self):
self.reset()
def reset(self):
self.val = 0
self.avg = 0
self.sum = 0
self.count = 0
self.list = []
def update(self, val, n=1):
self.val = val
self.list.append(val)
self.sum += val * n
self.count += n
self.avg = self.sum / self.count
def average_params(params_list):
assert isinstance(params_list, (tuple, list, collections.deque))
n = len(params_list)
if n == 1:
return params_list[0]
new_params = collections.OrderedDict()
keys = None
for i, params in enumerate(params_list):
if keys is None:
keys = params.keys()
for k, v in params.items():
if k not in keys:
raise ValueError('the %d-th model has different params' % i)
if k not in new_params:
new_params[k] = v / n
else:
new_params[k] += v / n
return new_params
def zscore(x):
return (x - x.mean(dim=0, keepdim=True)) / x.std(dim=0, keepdim=True, unbiased=False)
def calc_loss(pred, label):
return torch.mean((zscore(pred) - label) ** 2)
def calc_corr(pred, label):
return (zscore(pred) * zscore(label)).mean()
def test_ic(model_list, data_list, device, verbose=True, ic_type='spearman'):
'''
model_list: [model1, model2, ...]
datalist: [loader1, loader2, ...]
return: unified ic, specific ic (all values), loss
'''
spec_ic = []
loss_test = AverageMeter()
loss_fn = torch.nn.MSELoss()
label_true, label_pred = torch.empty(0).to(device), torch.empty(0).to(device)
for i in range(len(model_list)):
label_spec_true, label_spec_pred = torch.empty(0).to(device), torch.empty(0).to(device)
model_list[i].eval()
with torch.no_grad():
for _, (feature, label_actual, _, _) in enumerate(data_list[i]):
# feature = torch.tensor(feature, dtype=torch.float32, device=device)
label_actual = label_actual.clone().detach().view(-1, 1)
label_actual, mask = handle_nan(label_actual)
label_predict = model_list[i].predict(feature).view(-1, 1)
label_predict = label_predict[mask]
loss = loss_fn(label_actual, label_predict)
loss_test.update(loss.item())
# Concat them for computing IC later
label_true = torch.cat([label_true, label_actual])
label_pred = torch.cat([label_pred, label_predict])
label_spec_true = torch.cat([label_spec_true, label_actual])
label_spec_pred = torch.cat([label_spec_pred, label_predict])
ic = calc_ic(label_spec_true, label_spec_pred, ic_type)
spec_ic.append(ic.item())
unify_ic = calc_ic(label_true, label_pred, ic_type).item()
# spec_ic.append(sum(spec_ic) / len(spec_ic))
loss = loss_test.avg
if verbose:
print('[IC] Unified IC: {:.6f}, specific IC: {}, loss: {:.6f}'.format(unify_ic, spec_ic, loss))
return unify_ic, spec_ic, loss
def test_ic_daily(model_list, data_list, device, verbose=True, ic_type='spearman'):
'''
model_list: [model1, model2, ...]
datalist: [loader1, loader2, ...]
return: unified ic, specific ic (all values + avg), loss
'''
spec_ic = []
loss_test = AverageMeter()
loss_fn = torch.nn.MSELoss()
label_true, label_pred = torch.empty(0).to(device), torch.empty(0).to(device)
for i in range(len(model_list)):
label_spec_true, label_spec_pred = torch.empty(0).to(device), torch.empty(0).to(device)
model_list[i].eval()
with torch.no_grad():
for slc in tqdm(data_list[i].iter_daily(), total=data_list[i].daily_length):
feature, label_actual, _, _ = data_list[i].get(slc)
# for _, (feature, label_actual, _, _) in enumerate(data_list[i]):
# feature = torch.tensor(feature, dtype=torch.float32, device=device)
label_actual = torch.tensor(label_actual, dtype=torch.float32, device=device).view(-1, 1)
label_actual, mask = handle_nan(label_actual)
label_predict = model_list[i].predict(feature).view(-1, 1)
label_predict = label_predict[mask]
loss = loss_fn(label_actual, label_predict)
loss_test.update(loss.item())
# Concat them for computing IC later
label_true = torch.cat([label_true, label_actual])
label_pred = torch.cat([label_pred, label_predict])
label_spec_true = torch.cat([label_spec_true, label_actual])
label_spec_pred = torch.cat([label_spec_pred, label_predict])
ic = calc_ic(label_spec_true, label_spec_pred, ic_type)
spec_ic.append(ic.item())
unify_ic = calc_ic(label_true, label_pred, ic_type).item()
# spec_ic.append(sum(spec_ic) / len(spec_ic))
loss = loss_test.avg
if verbose:
print('[IC] Unified IC: {:.6f}, specific IC: {}, loss: {:.6f}'.format(unify_ic, spec_ic, loss))
return unify_ic, spec_ic, loss
def test_ic_uni(model, data_loader, model_path=None, ic_type='spearman', verbose=False):
if model_path:
model.load_state_dict(torch.load(model_path))
model.eval()
loss_all = []
ic_all = []
for slc in tqdm(data_loader.iter_daily(), total=data_loader.daily_length):
data, label, _, _ = data_loader.get(slc)
with torch.no_grad():
pred = model.predict(data)
mask = ~torch.isnan(label)
pred = pred[mask]
label = label[mask]
loss = torch.mean(torch.log(torch.cosh(pred - label)))
if ic_type == 'spearman':
ic = spearman_corr(pred, label)
elif ic_type == 'pearson':
ic = pearson_corr(pred, label)
loss_all.append(loss.item())
ic_all.append(ic)
loss, ic = np.mean(loss_all), np.mean(ic_all)
if verbose:
print('IC: ', ic)
return loss, ic
def calc_ic(x, y, ic_type='pearson'):
ic = -100
if ic_type == 'pearson':
ic = pearson_corr(x, y)
elif ic_type == 'spearman':
ic = spearman_corr(x, y)
return ic
def create_dir(path):
if not os.path.exists(path):
os.makedirs(path)
def handle_nan(x):
mask = ~torch.isnan(x)
return x[mask], mask
class Log_Loss(nn.Module):
def __init__(self):
super(Log_Loss, self).__init__()
def forward(self, ytrue, ypred):
delta = ypred - ytrue
return torch.mean(torch.log(torch.cosh(delta)))
def spearman_corr(x, y):
X = pd.Series(x.cpu())
Y = pd.Series(y.cpu())
spearman = X.corr(Y, method='spearman')
return spearman
def spearman_corr2(x, y):
X = pd.Series(x)
Y = pd.Series(y)
spearman = X.corr(Y, method='spearman')
return spearman
def pearson_corr(x, y):
X = pd.Series(x.cpu())
Y = pd.Series(y.cpu())
spearman = X.corr(Y, method='pearson')
return spearman
def dir_exist(dirs):
if not os.path.exists(dirs):
os.makedirs(dirs)

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/20 14:11
@Usage :
@Desc :
'''

View File

@ -0,0 +1,8 @@
#-*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/16 19:46
@Usage :
@Desc :
'''

View File

@ -0,0 +1,111 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 16:35
@Usage :
@Desc :
'''
# encoding=utf-8
import RUL.otherIdea.adaRNN.dataset_vibrate.data_vibrate as data_vibrate
from RUL.otherIdea.adaRNN.loss_transfer import TransferLoss
from RUL.otherIdea.adaRNN.dataset_vibrate.loadData import getVibrate_data
import torch
import math
def get_split_time(num_domain=2, mode='pre_process', data=None, dis_type='coral'):
spilt_time = {
'2': [(0, 600), (600, 1200)]
}
if mode == 'pre_process':
return spilt_time[str(num_domain)]
if mode == 'tdc':
return TDC(num_domain, data, dis_type=dis_type)
else:
print("error in mode")
def TDC(num_domain, data, dis_type='coral'):
# 样本个数
num_day = len(data[0])
split_N = 10
feat = data[0][0:num_day]
feat = torch.tensor(feat, dtype=torch.float32)
feat_shape_1 = feat.shape[1] # 时间部
feat = feat.reshape(-1, feat.shape[2])
feat = feat
selected = [0, 10]
candidate = [1, 2, 3, 4, 5, 6, 7, 8, 9]
start = 0
if num_domain in [2, 3, 5, 7, 10]:
while len(selected) - 2 < num_domain - 1:
distance_list = []
for can in candidate:
selected.append(can)
selected.sort()
dis_temp = 0
for i in range(1, len(selected) - 1):
for j in range(i, len(selected) - 1):
index_part1_start = start + math.floor(selected[i - 1] / split_N * num_day) * feat_shape_1
index_part1_end = start + math.floor(selected[i] / split_N * num_day) * feat_shape_1
feat_part1 = feat[index_part1_start: index_part1_end]
index_part2_start = start + math.floor(selected[j] / split_N * num_day) * feat_shape_1
index_part2_end = start + math.floor(selected[j + 1] / split_N * num_day) * feat_shape_1
feat_part2 = feat[index_part2_start:index_part2_end]
criterion_transder = TransferLoss(loss_type=dis_type, input_dim=feat_part1.shape[1])
dis_temp += criterion_transder.compute(feat_part1, feat_part2)
distance_list.append(dis_temp)
selected.remove(can)
can_index = distance_list.index(max(distance_list))
selected.append(candidate[can_index])
candidate.remove(candidate[can_index])
selected.sort()
res = []
for i in range(1, len(selected)):
if i == 1:
sel_start_index = int(num_day / split_N * selected[i - 1])
else:
sel_start_index = int(num_day / split_N * selected[i - 1]) + 1
sel_end_index = int(num_day / split_N * selected[i])
res.append((sel_start_index, sel_end_index))
return res
else:
print("error in number of domain")
def load_weather_data_multi_domain(hidden_num, feature, predict_num, batch_size=6, number_domain=2, mode='pre_process',
dis_type='coral', is_norm=False):
# mode: 'tdc', 'pre_process'
train_data, val_data = getVibrate_data(hidden_num=hidden_num, feature=feature, predict_num=predict_num,
is_norm=is_norm)
split_time_list = get_split_time(number_domain, mode=mode, data=train_data, dis_type=dis_type)
train_list = []
for i in range(len(split_time_list)):
index_temp = split_time_list[i]
train_loader = data_vibrate.get_vibrate_data(train_data, start_index=index_temp[0],
end_index=index_temp[1], batch_size=batch_size)
train_list.append(train_loader)
valid_loader = data_vibrate.get_vibrate_data(val_data, start_index=0,
end_index=len(val_data), batch_size=batch_size, mean=None,
std=None, shuffle=False)
test_loader = valid_loader
return train_list, valid_loader, test_loader
if __name__ == '__main__':
load_weather_data_multi_domain(hidden_num=10, feature=10, predict_num=50, batch_size=32, number_domain=2,
mode='tdc',
dis_type='coral', is_norm=False)

View File

@ -0,0 +1,78 @@
# -*- encoding:utf-8 -*-
'''
@Author : dingjiawen
@Date : 2023/11/15 16:36
@Usage :
@Desc :
'''
import math
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
from pandas.core.frame import DataFrame
from torch.utils.data import Dataset, DataLoader
import torch
import pickle
import datetime
class data_loader(Dataset):
def __init__(self, df_feature, df_label, df_label_reg, t=None):
assert len(df_feature) == len(df_label)
assert len(df_feature) == len(df_label_reg)
# df_feature = df_feature.reshape(df_feature.shape[0], df_feature.shape[1] // 6, df_feature.shape[2] * 6)
self.df_feature = df_feature
self.df_label = df_label
self.df_label_reg = df_label_reg
self.T = t
self.df_feature = torch.tensor(
self.df_feature, dtype=torch.float32)
self.df_label = torch.tensor(
self.df_label, dtype=torch.float32)
self.df_label_reg = torch.tensor(
self.df_label_reg, dtype=torch.float32)
def __getitem__(self, index):
sample, target, label_reg = self.df_feature[index], self.df_label[index], self.df_label_reg[index]
if self.T:
return self.T(sample), target, label_reg
else:
return sample, target, label_reg
def __len__(self):
return len(self.df_feature)
def create_dataset(data, start_index, end_index, mean=None, std=None):
feat, label_continue, label_single = data[0], data[1], data[2]
referece_start_index = 0
referece_end_index = 1250
assert start_index - referece_start_index >= 0
assert end_index - referece_end_index <= 0
assert end_index - start_index >= 0
feat = feat[start_index: end_index + 1]
label = label_continue[start_index: end_index + 1]
label_reg = label_single[start_index: end_index + 1]
# ori_shape_1, ori_shape_2=feat.shape[1], feat.shape[2]
# feat=feat.reshape(-1, feat.shape[2])
# feat=(feat - mean) / std
# feat=feat.reshape(-1, ori_shape_1, ori_shape_2)
return data_loader(feat, label, label_reg)
def get_vibrate_data(data, start_index, end_index, batch_size, shuffle=True, mean=None, std=None):
dataset = create_dataset(data, start_index,
end_index, mean=mean, std=std)
train_loader = DataLoader(
dataset, batch_size=batch_size, shuffle=shuffle)
return train_loader

Some files were not shown because too many files have changed in this diff Show More