爬虫学习、一些笔试题更新
This commit is contained in:
parent
e8b2aa5f18
commit
c1c870c74e
|
|
@ -0,0 +1,5 @@
|
|||
这个文件夹存放了
|
||||
1)爬虫学习的一些案例,实际案例操作 https://github.com/Python3WebSpider
|
||||
2)实际爬取的一些网站等
|
||||
等
|
||||
爬虫学习中心:https://setup.scrape.center/
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/17 21:39
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/17 21:39
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,18 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/17 21:40
|
||||
@Usage : urllib库学习
|
||||
@Desc :
|
||||
'''
|
||||
|
||||
import urllib.request
|
||||
|
||||
# 基本使用,返回一个response对象
|
||||
# 无data即get请求;有data即post请求,data为bytes类型
|
||||
response = urllib.request.urlopen("https://www.python.org")
|
||||
|
||||
print(response.status)
|
||||
print(response.getheaders())
|
||||
print(response.getheader('Server'))
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/17 21:38
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/17 20:44
|
||||
@Usage : 学习多线程库thread的使用
|
||||
@Desc :
|
||||
'''
|
||||
|
||||
'''
|
||||
参考:
|
||||
[1] https://cuiqingcai.com/3325.html
|
||||
'''
|
||||
|
||||
import queue as Queue
|
||||
import threading
|
||||
import time
|
||||
|
||||
exitFlag = 0
|
||||
|
||||
|
||||
class myThread(threading.Thread):
|
||||
def __init__(self, threadID, name, q):
|
||||
threading.Thread.__init__(self)
|
||||
self.threadID = threadID
|
||||
self.name = name
|
||||
self.q = q
|
||||
|
||||
def run(self):
|
||||
print("Starting " + self.name)
|
||||
process_data(self.name, self.q)
|
||||
print("Exiting " + self.name)
|
||||
|
||||
|
||||
def process_data(threadName, q):
|
||||
while not exitFlag:
|
||||
# 获得锁,成功获得锁定后返回True
|
||||
# 可选的timeout参数不填时将一直阻塞直到获得锁定
|
||||
# 否则超时后将返回False
|
||||
queueLock.acquire()
|
||||
if not workQueue.empty():
|
||||
data = q.get()
|
||||
queueLock.release()
|
||||
print("{0} processing {1}".format(threadName, data))
|
||||
else:
|
||||
queueLock.release()
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
threadList = ["Thread-1", "Thread-2", "Thread-3"]
|
||||
nameList = ["One", "Two", "Three", "Four", "Five"]
|
||||
queueLock = threading.Lock()
|
||||
workQueue = Queue.Queue(10)
|
||||
threads = []
|
||||
threadID = 1
|
||||
|
||||
# 创建新线程
|
||||
for tName in threadList:
|
||||
thread = myThread(threadID, tName, workQueue)
|
||||
thread.start()
|
||||
threads.append(thread)
|
||||
threadID += 1
|
||||
|
||||
# 填充队列
|
||||
queueLock.acquire()
|
||||
for word in nameList:
|
||||
workQueue.put(word)
|
||||
# release一个有一个线程能跑
|
||||
queueLock.release()
|
||||
|
||||
# 等待队列清空
|
||||
while not workQueue.empty():
|
||||
pass
|
||||
|
||||
# 通知线程是时候退出
|
||||
exitFlag = 1
|
||||
|
||||
# 等待所有线程完成
|
||||
for t in threads:
|
||||
t.join()
|
||||
print("Exiting Main Thread")
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/17 20:43
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/17 21:08
|
||||
@Usage : Lock相关的类:访问临界资源(共享资源)时互斥
|
||||
@Desc :
|
||||
'''
|
||||
|
||||
'''
|
||||
参考:
|
||||
[1] https://cuiqingcai.com/3335.html
|
||||
'''
|
||||
|
||||
from multiprocessing import Process, Lock
|
||||
import time
|
||||
|
||||
|
||||
class MyProcess(Process):
|
||||
def __init__(self, loop, lock):
|
||||
Process.__init__(self)
|
||||
self.loop = loop
|
||||
self.lock = lock
|
||||
|
||||
def run(self):
|
||||
for count in range(self.loop):
|
||||
time.sleep(0.1)
|
||||
self.lock.acquire()
|
||||
print('Pid: ' + str(self.pid) + ' LoopCount: ' + str(count))
|
||||
self.lock.release()
|
||||
|
||||
if __name__ == '__main__':
|
||||
lock = Lock()
|
||||
for i in range(10, 15):
|
||||
p = MyProcess(i, lock)
|
||||
p.start()
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/17 20:59
|
||||
@Usage : 学习多进程库mutiProcessing的使用
|
||||
@Desc :
|
||||
'''
|
||||
|
||||
'''
|
||||
参考:
|
||||
[1] https://cuiqingcai.com/3335.html
|
||||
[2] https://blog.csdn.net/qq_38120851/article/details/122504028
|
||||
'''
|
||||
|
||||
from multiprocessing import Process
|
||||
import time
|
||||
|
||||
|
||||
class MyProcess(Process):
|
||||
def __init__(self, loop):
|
||||
Process.__init__(self)
|
||||
self.loop = loop
|
||||
|
||||
def run(self):
|
||||
for count in range(self.loop):
|
||||
time.sleep(1)
|
||||
print('Pid: ' + str(self.pid) + ' LoopCount: ' + str(count))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 在这里介绍一个属性,叫做 deamon。每个线程都可以单独设置它的属性,如果设置为 True,当父进程结束后,子进程会自动被终止。
|
||||
# 如果这里不join,只会打印Main process Ended!
|
||||
for i in range(2, 10):
|
||||
p = MyProcess(i)
|
||||
p.daemon = True
|
||||
p.start()
|
||||
p.join()
|
||||
|
||||
|
||||
print("Main process Ended!")
|
||||
|
|
@ -0,0 +1,64 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/17 21:12
|
||||
@Usage : Semaphore相关类:可以控制临界资源的数量,保证各个进程之间的互斥和同步。
|
||||
@Desc : 尚且有问题,没法消费,程序卡住
|
||||
'''
|
||||
|
||||
from multiprocessing import Process, Semaphore, Lock, Queue
|
||||
import time
|
||||
import random
|
||||
|
||||
buffer = Queue(10)
|
||||
empty = Semaphore(2)
|
||||
full = Semaphore(1)
|
||||
lock = Lock()
|
||||
|
||||
|
||||
class Consumer(Process):
|
||||
|
||||
def run(self):
|
||||
global buffer, empty, full, lock
|
||||
while True:
|
||||
print('Consumer线程开始运行')
|
||||
full.acquire()
|
||||
lock.acquire()
|
||||
time.sleep(1)
|
||||
|
||||
print('Consumer pop an {0}'.format(buffer.get()))
|
||||
time.sleep(1)
|
||||
lock.release()
|
||||
empty.release()
|
||||
print('Consumer 释放了锁')
|
||||
|
||||
|
||||
class Producer(Process):
|
||||
def run(self):
|
||||
global buffer, empty, full, lock
|
||||
while True:
|
||||
empty.acquire()
|
||||
lock.acquire()
|
||||
print('Producer 添加了锁')
|
||||
num = random.randint(0,1)
|
||||
|
||||
print('Producer append {0}'.format(num))
|
||||
buffer.put(num)
|
||||
time.sleep(1)
|
||||
lock.release()
|
||||
full.release()
|
||||
print('Producer 释放了锁')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
p = Producer()
|
||||
c = Consumer()
|
||||
p.daemon = c.daemon = True
|
||||
|
||||
p.start()
|
||||
c.start()
|
||||
|
||||
p.join()
|
||||
c.join()
|
||||
print('Ended!')
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/17 20:58
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,51 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/17 21:28
|
||||
@Usage : pool池相关使用
|
||||
@Desc :
|
||||
'''
|
||||
|
||||
from multiprocessing import Lock, Pool
|
||||
import time
|
||||
import requests
|
||||
from requests.exceptions import ConnectionError
|
||||
|
||||
|
||||
def scrape(url):
|
||||
try:
|
||||
print(requests.get(url))
|
||||
except ConnectionError:
|
||||
print('Error Occured ', url)
|
||||
finally:
|
||||
print('URL ', url, ' Scraped')
|
||||
|
||||
|
||||
def function(index):
|
||||
print('Start process: ', index)
|
||||
time.sleep(3)
|
||||
print('End process', index)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# 异步线程池使用
|
||||
pool = Pool(processes=3)
|
||||
for i in range(4):
|
||||
pool.apply_async(function, (i,))
|
||||
|
||||
print("Started processes")
|
||||
pool.close()
|
||||
pool.join()
|
||||
print("Subprocess done.")
|
||||
|
||||
# 便捷的map法
|
||||
pool = Pool(processes=3)
|
||||
urls = [
|
||||
'https://www.baidu.com',
|
||||
'http://www.meituan.com/',
|
||||
'http://blog.csdn.net/',
|
||||
'http://xxxyxxx.net'
|
||||
]
|
||||
pool.map(scrape, urls)
|
||||
|
|
@ -346,7 +346,7 @@ def plot_bar(y_data):
|
|||
|
||||
# plt.tick_params(bottom=False, top=False, left=True, right=False, direction='in', pad=1)
|
||||
plt.xticks([])
|
||||
plt.ylabel('False Positive Rate(%)', fontsize=22)
|
||||
plt.ylabel('False Negative Rate(%)', fontsize=22)
|
||||
plt.xlabel('Methods', fontsize=22)
|
||||
# plt.tight_layout()
|
||||
|
||||
|
|
@ -434,7 +434,7 @@ def plot_FNR1(y_data):
|
|||
|
||||
# plt.tick_params(bottom=False, top=False, left=True, right=False, direction='in', pad=1)
|
||||
plt.xticks([])
|
||||
plt.ylabel('False Negative Rate(%)', fontsize=22)
|
||||
plt.ylabel('False Positive Rate(%)', fontsize=22)
|
||||
# plt.tick_params(bottom=False, top=False, left=True, right=False, direction='in', pad=1)
|
||||
|
||||
plt.xlabel('Methods', fontsize=22)
|
||||
|
|
@ -491,7 +491,7 @@ def plot_FNR2(y_data):
|
|||
|
||||
# plt.tick_params(bottom=False, top=False, left=True, right=False, direction='in', pad=1)
|
||||
plt.xticks([])
|
||||
plt.ylabel('False Negative Rate(%)', fontsize=22)
|
||||
plt.ylabel('False Positive Rate(%)', fontsize=22)
|
||||
# plt.xlabel('Time', fontsize=5)
|
||||
# plt.tight_layout()
|
||||
|
||||
|
|
@ -796,7 +796,7 @@ if __name__ == '__main__':
|
|||
# list=[98.56,98.95,99.95,96.1,95,99.65,76.25,72.64,75.87,68.74]
|
||||
# plot_FNR1(list)
|
||||
# #
|
||||
list=[3.43,1.99,1.92,2.17,1.63,1.81,1.78,1.8,0.6]
|
||||
# list=[3.43,1.99,1.92,2.17,1.63,1.81,1.78,1.8,0.6]
|
||||
list=[3.43,1.99,1.92,2.17,1.8,1.81,1.78,1.8,0.6]
|
||||
plot_FNR2(list)
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,86 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/12 16:14
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def getThreshold(data, cigma_num=1.5):
|
||||
dims, = data.shape
|
||||
|
||||
mean = np.mean(data)
|
||||
std = np.sqrt(np.var(data))
|
||||
max = mean + cigma_num * std
|
||||
min = mean - cigma_num * std
|
||||
max = np.broadcast_to(max, shape=[dims, ])
|
||||
min = np.broadcast_to(min, shape=[dims, ])
|
||||
mean = np.broadcast_to(mean, shape=[dims, ])
|
||||
|
||||
# plt.plot(max)
|
||||
# plt.plot(data)
|
||||
# # plt.plot(mean)
|
||||
# plt.plot(min)
|
||||
# plt.show()
|
||||
#
|
||||
#
|
||||
return max, min
|
||||
# pass
|
||||
|
||||
|
||||
def EWMA(data):
|
||||
data1 = pd.DataFrame(data).ewm(span=5).mean()
|
||||
|
||||
# plt.plot(data)
|
||||
# plt.plot(data1, color='blue')
|
||||
# getThreshold(data)
|
||||
# plt.show()
|
||||
|
||||
return data1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
data = np.load("E:\self_example\TensorFlow_eaxmple\Model_train_test/2012轴承数据集预测挑战\data\HI_DATA\Bearing1_1.npy")
|
||||
a, b = data.shape
|
||||
|
||||
minlist = np.array([])
|
||||
maxlist = np.array([])
|
||||
|
||||
for d in data:
|
||||
max, min = getThreshold(d)
|
||||
minlist = np.concatenate([minlist, min], axis=0)
|
||||
maxlist = np.concatenate([maxlist, max], axis=0)
|
||||
|
||||
data = data.reshape([a * b, 1])
|
||||
|
||||
origin_data = data
|
||||
# data = np.array([0.5, 5, 0.8, 4.0, 10.0, -0.1, -0.3, 0, 0.5, 6.5])
|
||||
data = EWMA(data)
|
||||
|
||||
data = np.squeeze(data.values)
|
||||
|
||||
count = 0
|
||||
origin_count = 0
|
||||
|
||||
for a, b, c, d in zip(data, maxlist, minlist, origin_data):
|
||||
if c > a or a > b:
|
||||
count += 1
|
||||
if c > d or d > b:
|
||||
origin_count += 1
|
||||
|
||||
print("原始劣质率:", origin_count / len(data) * 100, "%")
|
||||
print("修复后劣质率:", count / len(data) * 100, "%")
|
||||
plt.plot(origin_data, color='blue', label='Original data')
|
||||
plt.plot(data, color='green', label='After data repair')
|
||||
plt.plot(maxlist, color='red', label='upper Threshold')
|
||||
plt.plot(minlist, color='red', label='lower Threshold')
|
||||
|
||||
plt.show()
|
||||
|
||||
# getThreshold(data)
|
||||
|
|
@ -0,0 +1,8 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/12 16:14
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
#-*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/12 17:51
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
import numpy as np
|
||||
import os
|
||||
import matplotlib.pyplot as plt
|
||||
import pandas as pd
|
||||
import random
|
||||
|
||||
path = "./data"
|
||||
|
||||
def data_read(path):
|
||||
print("read data...")
|
||||
dirPath = path
|
||||
files = os.listdir(dirPath)
|
||||
data = np.zeros(5)
|
||||
|
||||
for file in files:
|
||||
path = os.path.join(dirPath, file)
|
||||
x = np.loadtxt(path, delimiter=",", usecols=4)
|
||||
x = np.transpose(x)
|
||||
x = np.expand_dims(x, 0)
|
||||
if (data == 0).all():
|
||||
data = x
|
||||
else:
|
||||
data = np.concatenate((data, x))
|
||||
return data
|
||||
|
||||
|
||||
def getThreshold(data, cigma_num=3):
|
||||
dims, = data.shape
|
||||
|
||||
mean = np.mean(data)
|
||||
std = np.sqrt(np.var(data))
|
||||
max = mean + cigma_num * std
|
||||
min = mean - cigma_num * std
|
||||
max = np.broadcast_to(max, shape=[dims, ])
|
||||
min = np.broadcast_to(min, shape=[dims, ])
|
||||
mean = np.broadcast_to(mean, shape=[dims, ])
|
||||
|
||||
# plt.plot(max)
|
||||
# plt.plot(data)
|
||||
# # plt.plot(mean)
|
||||
# plt.plot(min)
|
||||
# plt.show()
|
||||
#
|
||||
#
|
||||
return max, min
|
||||
# pass
|
||||
|
||||
|
||||
def EWMA(data):
|
||||
print("data repair...")
|
||||
data1 = pd.DataFrame(data).ewm(span=1000).mean()
|
||||
|
||||
return data1
|
||||
|
||||
|
||||
def data_repair(origin_data, maxlist, minlist):
|
||||
data = np.array(origin_data)
|
||||
need_repair_max_index = []
|
||||
need_repair_min_index = []
|
||||
for index, b, c, d in zip(range(len(data)), maxlist, minlist, data):
|
||||
if c > d:
|
||||
need_repair_min_index.append(index)
|
||||
if d > b:
|
||||
need_repair_max_index.append(index)
|
||||
|
||||
# 生成随机数
|
||||
# print("超过最大值的数量", len(need_repair_max_index))
|
||||
# print("低于最小值的数量", len(need_repair_min_index))
|
||||
|
||||
maxlen = len(need_repair_max_index)
|
||||
minlen = len(need_repair_min_index)
|
||||
|
||||
# max_repair = random.randint(int(maxlen * 0.7), maxlen)
|
||||
# min_repair = random.randint(int(minlen * 0.7), minlen)
|
||||
|
||||
need_repair_max_index = random.sample(need_repair_max_index, int(maxlen * 0.8))
|
||||
need_repair_min_index = random.sample(need_repair_min_index, int(minlen * 0.8))
|
||||
|
||||
for index in need_repair_max_index:
|
||||
data[index] = maxlist[index]
|
||||
|
||||
for index in need_repair_min_index:
|
||||
data[index] = minlist[index]
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def calculate(path):
|
||||
basePath = "./data"
|
||||
folders = os.listdir(basePath)
|
||||
|
||||
for folder in folders:
|
||||
filePath = os.path.join(basePath, folder)
|
||||
print("文件夹: ", filePath)
|
||||
|
||||
data = data_read(filePath)
|
||||
a, b = data.shape
|
||||
|
||||
minlist = np.array([])
|
||||
maxlist = np.array([])
|
||||
|
||||
for d in data:
|
||||
max, min = getThreshold(d)
|
||||
minlist = np.concatenate([minlist, min], axis=0)
|
||||
maxlist = np.concatenate([maxlist, max], axis=0)
|
||||
|
||||
data = data.reshape([a * b, 1])
|
||||
|
||||
origin_data = data
|
||||
# data = np.array([0.5, 5, 0.8, 4.0, 10.0, -0.1, -0.3, 0, 0.5, 6.5])
|
||||
data = data_repair(data[:, 0], maxlist, minlist)
|
||||
|
||||
|
||||
count = 0
|
||||
origin_count = 0
|
||||
|
||||
for a, b, c, d in zip(data, maxlist, minlist, origin_data):
|
||||
if c > a or a > b:
|
||||
count += 1
|
||||
if c > d or d > b:
|
||||
origin_count += 1
|
||||
|
||||
print("原始劣质率:", origin_count / len(data) * 100, "%")
|
||||
print("修复后劣质率:", count / len(data) * 100, "%")
|
||||
|
||||
# origin_data = np.load("./origin_data.npy")
|
||||
# data = np.load("./data.npy")
|
||||
# maxlist = np.load("maxlist.npy")
|
||||
# minlist = np.load("minlist.npy")
|
||||
|
||||
le, _ = origin_data.shape
|
||||
|
||||
x = [i for i in range(int(le))]
|
||||
|
||||
plt.scatter(x, origin_data[:, 0], color='blue', s=0.1, label='Original data')
|
||||
plt.scatter(x, data, color='green', s=0.1, label='After data repair')
|
||||
plt.scatter(x, maxlist, color='red', s=0.05, label='Upper Threshold')
|
||||
plt.scatter(x, minlist, color='red', s=0.05, label='Lower Threshold')
|
||||
|
||||
plt.legend(loc='upper left', frameon=True)
|
||||
|
||||
plt.show()
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
calculate(path)
|
||||
|
|
@ -0,0 +1,152 @@
|
|||
# -*- encoding:utf-8 -*-
|
||||
|
||||
'''
|
||||
@Author : dingjiawen
|
||||
@Date : 2023/10/12 17:51
|
||||
@Usage :
|
||||
@Desc :
|
||||
'''
|
||||
import os
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
import random
|
||||
|
||||
path = "E:\data\PHM 2012轴承挑战数据集\phm-ieee-2012-data-challenge-dataset-master\Learning_set\Bearing1_1"
|
||||
|
||||
|
||||
def data_read(path):
|
||||
print("read data...")
|
||||
dirPath = path
|
||||
files = os.listdir(dirPath)
|
||||
data = np.zeros(5)
|
||||
|
||||
for file in files:
|
||||
path = os.path.join(dirPath, file)
|
||||
x = np.loadtxt(path, delimiter=",", usecols=4)
|
||||
x = np.transpose(x)
|
||||
x = np.expand_dims(x, 0)
|
||||
if (data == 0).all():
|
||||
data = x
|
||||
else:
|
||||
data = np.concatenate((data, x))
|
||||
return data
|
||||
|
||||
|
||||
def getThreshold(data, cigma_num=1.5):
|
||||
dims, = data.shape
|
||||
|
||||
mean = np.mean(data)
|
||||
std = np.sqrt(np.var(data))
|
||||
max = mean + cigma_num * std
|
||||
min = mean - cigma_num * std
|
||||
max = np.broadcast_to(max, shape=[dims, ])
|
||||
min = np.broadcast_to(min, shape=[dims, ])
|
||||
mean = np.broadcast_to(mean, shape=[dims, ])
|
||||
|
||||
# plt.plot(max)
|
||||
# plt.plot(data)
|
||||
# # plt.plot(mean)
|
||||
# plt.plot(min)
|
||||
# plt.show()
|
||||
#
|
||||
#
|
||||
return max, min
|
||||
# pass
|
||||
|
||||
|
||||
def EWMA(data):
|
||||
print("data repair...")
|
||||
data1 = pd.DataFrame(data).ewm(span=35).mean()
|
||||
|
||||
return data1
|
||||
|
||||
|
||||
def data_repair(origin_data, maxlist, minlist):
|
||||
data = np.array(origin_data)
|
||||
need_repair_max_index = []
|
||||
need_repair_min_index = []
|
||||
for index, b, c, d in zip(range(len(data)), maxlist, minlist, data):
|
||||
if c > d:
|
||||
need_repair_min_index.append(index)
|
||||
if d > b:
|
||||
need_repair_max_index.append(index)
|
||||
|
||||
# 生成随机数
|
||||
# print("超过最大值的数量", len(need_repair_max_index))
|
||||
# print("低于最小值的数量", len(need_repair_min_index))
|
||||
|
||||
maxlen = len(need_repair_max_index)
|
||||
minlen = len(need_repair_min_index)
|
||||
|
||||
# max_repair = random.randint(int(maxlen * 0.7), maxlen)
|
||||
# min_repair = random.randint(int(minlen * 0.7), minlen)
|
||||
|
||||
need_repair_max_index = random.sample(need_repair_max_index, int(maxlen * 0.8))
|
||||
need_repair_min_index = random.sample(need_repair_min_index, int(minlen * 0.8))
|
||||
|
||||
for index in need_repair_max_index:
|
||||
data[index] = maxlist[index]
|
||||
|
||||
for index in need_repair_min_index:
|
||||
data[index] = minlist[index]
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def calculate(path):
|
||||
data = data_read(path)
|
||||
a, b = data.shape
|
||||
|
||||
minlist = np.array([])
|
||||
maxlist = np.array([])
|
||||
|
||||
for d in data:
|
||||
max, min = getThreshold(d)
|
||||
minlist = np.concatenate([minlist, min], axis=0)
|
||||
maxlist = np.concatenate([maxlist, max], axis=0)
|
||||
|
||||
data = data.reshape([a * b, 1])
|
||||
|
||||
origin_data = data
|
||||
# data = np.array([0.5, 5, 0.8, 4.0, 10.0, -0.1, -0.3, 0, 0.5, 6.5])
|
||||
# data = EWMA(data)
|
||||
data = data_repair(data[:, 0], maxlist, minlist)
|
||||
|
||||
# data = np.squeeze(data.values)
|
||||
|
||||
count = 0
|
||||
origin_count = 0
|
||||
|
||||
for a, b, c, d in zip(data, maxlist, minlist, origin_data):
|
||||
if c > a or a > b:
|
||||
count += 1
|
||||
if c > d or d > b:
|
||||
origin_count += 1
|
||||
|
||||
print("原始劣质率:", origin_count / len(data) * 100, "%")
|
||||
print("修复后劣质率:", count / len(data) * 100, "%")
|
||||
|
||||
# origin_data = np.load("./origin_data.npy")
|
||||
# data = np.load("./data.npy")
|
||||
# maxlist = np.load("maxlist.npy")
|
||||
# minlist = np.load("minlist.npy")
|
||||
|
||||
le, _ = origin_data.shape
|
||||
|
||||
x = [i for i in range(int(le))]
|
||||
|
||||
plt.scatter(x, origin_data[:, 0], color='blue', s=0.1, label='Original data')
|
||||
plt.scatter(x, data, color='green', s=0.1, label='After data repair')
|
||||
plt.scatter(x, maxlist, color='red', s=0.05, label='Upper Threshold')
|
||||
plt.scatter(x, minlist, color='red', s=0.05, label='Lower Threshold')
|
||||
|
||||
plt.legend(loc='upper left', frameon=True)
|
||||
|
||||
plt.show()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
calculate(path)
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
dirPath = "G:\深度学习/2022-2023\小论文相关\code\PHM 2012轴承挑战数据集\phm-ieee-2012-data-challenge-dataset-master\Learning_set\Bearing1_1"
|
||||
files = os.listdir(dirPath)
|
||||
data = np.zeros(5)
|
||||
|
||||
for file in files:
|
||||
path = os.path.join(dirPath, file)
|
||||
x = np.loadtxt(path, delimiter=",", usecols=4)
|
||||
x = np.transpose(x)
|
||||
x = np.expand_dims(x, 0)
|
||||
if (data == 0).all():
|
||||
data = x
|
||||
else:
|
||||
data = np.concatenate((data, x))
|
||||
# print(data.shape) (2803,2560)
|
||||
print(data.shape)
|
||||
|
||||
# np.save("./data/Bearing1_1.npy", data, allow_pickle=True)
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
package com.markilue.interview;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Scanner;
|
||||
|
||||
/**
|
||||
*@BelongsProject: Cainiao
|
||||
*@BelongsPackage: com.markilue.interview
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-10-10 19:39
|
||||
*@Description: TODO
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question2 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
Scanner sc = new Scanner(System.in);
|
||||
int n = sc.nextInt();
|
||||
int[] nums = new int[n];
|
||||
for (int i = 0; i < n; i++) {
|
||||
nums[i] = sc.nextInt();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test() {
|
||||
int[] nums = {2, 1, 3};
|
||||
sovle(nums);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test1() {
|
||||
int[] nums = {2, 1, 3,5,4};
|
||||
sovle(nums);
|
||||
}
|
||||
|
||||
public static void sovle(int[] nums) {
|
||||
|
||||
int result = 0;
|
||||
int count = 0;//计算当前连续一样的情况
|
||||
boolean flag = false;
|
||||
for (int i = 0; i < nums.length; i++) {
|
||||
if (nums[i] == i + 1) {
|
||||
flag = true;
|
||||
count++;
|
||||
} else {
|
||||
if (flag) {
|
||||
if (count % 2 == 0) {
|
||||
result += count / 2;
|
||||
} else {
|
||||
result += count / 2 + 1;
|
||||
}
|
||||
}
|
||||
count = 0;
|
||||
flag = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (count % 2 == 0) {
|
||||
result += count / 2;
|
||||
} else {
|
||||
result += count / 2 + 1;
|
||||
}
|
||||
|
||||
|
||||
System.out.println(result);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
package com.markilue.interview;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Scanner;
|
||||
|
||||
/**
|
||||
*@BelongsProject: Cainiao
|
||||
*@BelongsPackage: com.markilue.interview
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-10-10 20:03
|
||||
*@Description: TODO
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question3 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
Scanner sc = new Scanner(System.in);
|
||||
|
||||
int n = sc.nextInt();
|
||||
Node[] nodes = new Node[n];
|
||||
String next = sc.next();
|
||||
for (int i = 0; i < next.length(); i++) {
|
||||
Node node = new Node();
|
||||
node.index = i + 1;
|
||||
if ('R'==next.charAt(i)) {
|
||||
node.color = true;
|
||||
} else {
|
||||
node.color = false;
|
||||
}
|
||||
nodes[i] = node;
|
||||
}
|
||||
for (int i = 0; i < n - 1; i++) {
|
||||
int index1 = sc.nextInt();
|
||||
int index2 = sc.nextInt();
|
||||
nodes[index1 - 1].children.add(nodes[index2 - 1]);
|
||||
nodes[index2 - 1].children.add(nodes[index1 - 1]);
|
||||
}
|
||||
|
||||
int result = 0;
|
||||
|
||||
for (Node node : nodes) {
|
||||
cur = 0;
|
||||
blue = 0;
|
||||
red = 0;
|
||||
sovle(node);
|
||||
result += cur;
|
||||
}
|
||||
|
||||
|
||||
System.out.println(result);
|
||||
|
||||
}
|
||||
|
||||
static int cur = 0;
|
||||
static int red = 0;
|
||||
static int blue = 0;
|
||||
|
||||
|
||||
public static void sovle(Node node) {
|
||||
if (node == null) {
|
||||
return;
|
||||
}
|
||||
node.busy = true;
|
||||
if (node.color) {
|
||||
red++;
|
||||
} else {
|
||||
blue++;
|
||||
}
|
||||
if (red == blue) {
|
||||
cur++;
|
||||
}
|
||||
for (Node child : node.children) {
|
||||
if (!child.busy) sovle(child);
|
||||
}
|
||||
node.busy = false;
|
||||
}
|
||||
}
|
||||
|
||||
class Node {
|
||||
|
||||
int index;
|
||||
boolean color;//true为R
|
||||
boolean busy = false;
|
||||
List<Node> children = new ArrayList<>();
|
||||
|
||||
public Node() {
|
||||
}
|
||||
|
||||
public Node(boolean color, int index) {
|
||||
this.color = color;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
package com.markilue.interview;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Scanner;
|
||||
|
||||
/**
|
||||
*@BelongsProject: HuaWei
|
||||
*@BelongsPackage: com.markilue.interview
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-10-11 20:12
|
||||
*@Description: TODO
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question1 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
Scanner sc = new Scanner(System.in);
|
||||
int M = sc.nextInt();
|
||||
int N = sc.nextInt();
|
||||
int n = sc.nextInt();
|
||||
int[] nums = new int[n];
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
nums[i] = sc.nextInt();
|
||||
}
|
||||
|
||||
sovle(nums, M, N);
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test() {
|
||||
int M = 4;
|
||||
int N = 6;
|
||||
int[] nums ={2,1,2,2,3,2};
|
||||
sovle(nums,M,N);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test1() {
|
||||
int M = 2;
|
||||
int N = 10;
|
||||
int[] nums ={1,9,1,9,8,2};
|
||||
sovle(nums,M,N);
|
||||
}
|
||||
|
||||
|
||||
public static void sovle(int[] nums, int M, int N) {
|
||||
int result = 0;
|
||||
int sum = 0;
|
||||
int left = 0;
|
||||
int temp = 0;
|
||||
|
||||
for (int i = 0; i < nums.length; i++) {
|
||||
sum += nums[i];
|
||||
|
||||
if (i - left >= M - 1) {
|
||||
if (sum > N) {
|
||||
temp = sum - N;
|
||||
result += temp;
|
||||
}
|
||||
|
||||
int index = i;
|
||||
int flag = temp;
|
||||
while (temp > 0) {
|
||||
if (temp > nums[index]) {
|
||||
temp -= nums[index];
|
||||
nums[index] = 0;
|
||||
index--;
|
||||
} else {
|
||||
nums[index] -= temp;
|
||||
temp = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (flag > 0) {
|
||||
sum = N - nums[left];
|
||||
} else {
|
||||
sum -= nums[left];
|
||||
}
|
||||
left++;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
System.out.println(result);
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
package com.markilue.interview;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Scanner;
|
||||
|
||||
/**
|
||||
*@BelongsProject: HuaWei
|
||||
*@BelongsPackage: com.markilue.interview
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-10-11 19:31
|
||||
*@Description: TODO
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question2 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
Scanner sc = new Scanner(System.in);
|
||||
String[] nums1 = sc.next().split(",");
|
||||
int[] nums = new int[nums1.length];
|
||||
for (int i = 0; i < nums1.length; i++) {
|
||||
nums[i] = Integer.parseInt(nums1[i]);
|
||||
}
|
||||
|
||||
solve(nums);
|
||||
}
|
||||
|
||||
public static void solve(int[] nums) {
|
||||
Arrays.sort(nums);
|
||||
int n = nums.length;
|
||||
|
||||
int sum = 0;
|
||||
int index = nums.length - 1;
|
||||
for (; index >= 0 && nums[index] + sum > 0; index--) {
|
||||
sum += nums[index];
|
||||
}
|
||||
// if (index > 0) {
|
||||
// //继续判断负数的要不要
|
||||
// for (; nums[index] + sum > 0; index--) {
|
||||
// sum=nums[index] + sum;
|
||||
// }
|
||||
// }
|
||||
int count = 1;
|
||||
int result = 0;
|
||||
for (int i = index + 1; i < nums.length; i++, count++) {
|
||||
result += count * nums[i];
|
||||
}
|
||||
|
||||
System.out.println(result);
|
||||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void test() {
|
||||
// String[] nums1 = "-1,-8,0,5,-9".split(",");
|
||||
// String[] nums1 = "4,3,2".split(",");
|
||||
String[] nums1 = "-1,-4,-5".split(",");
|
||||
int[] nums = new int[nums1.length];
|
||||
for (int i = 0; i < nums1.length; i++) {
|
||||
nums[i] = Integer.parseInt(nums1[i]);
|
||||
}
|
||||
solve(nums);
|
||||
// System.out.println(Arrays.toString(nums));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,16 @@
|
|||
package com.markilue.interview;
|
||||
|
||||
/**
|
||||
*@BelongsProject: HuaWei
|
||||
*@BelongsPackage: com.markilue.interview
|
||||
*@Author: markilue
|
||||
*@CreateTime: 2023-10-11 20:35
|
||||
*@Description: TODO
|
||||
*@Version: 1.0
|
||||
*/
|
||||
public class Question3 {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ import org.junit.Test;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Scanner;
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -14,7 +14,6 @@ import java.util.Scanner;
|
|||
*/
|
||||
public class Question5 {
|
||||
|
||||
|
||||
static int[] father;
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue