基于自编码器的心电信号异常检测(Pytorch)

news/2024/7/4 22:20:11/文章来源:https://blog.csdn.net/weixin_39402231/article/details/139755059

代码较为简单,很容易读懂。

# Importing necessary libraries for TensorFlow, pandas, numpy, and matplotlib
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import copy# Importing the PyTorch library
import torch# Importing additional libraries for data manipulation, visualization, and machine learning
import copy
import seaborn as sns
from pylab import rcParams
from matplotlib import rc
from sklearn.model_selection import train_test_split# Importing PyTorch modules for neural network implementation
from torch import nn, optim
import torch.nn.functional as F
import torch.nn as nn# Ignoring warnings to enhance code cleanliness
import warnings
warnings.filterwarnings('ignore')
df = pd.read_csv('http://storage.googleapis.com/download.tensorflow.org/data/ecg.csv',header=None)
df.head().T

df.describe()

df.isna().sum()
0      0
1      0
2      0
3      0
4      0..
136    0
137    0
138    0
139    0
140    0
Length: 141, dtype: int64
df.dtypes
0      float64
1      float64
2      float64
3      float64
4      float64...   
136    float64
137    float64
138    float64
139    float64
140    float64
Length: 141, dtype: object
new_columns = list(df.columns)
new_columns[-1] = 'target'
df.columns = new_columns
df.target.value_counts()
1.0    2919
0.0    2079
Name: target, dtype: int64
value_counts = df['target'].value_counts()# Plotting
plt.figure(figsize=(8, 6))
value_counts.plot(kind='bar', color='skyblue')
plt.title('Value Counts of Target Column')
plt.xlabel('Target Values')
plt.ylabel('Count')# Display the count values on top of the bars
for i, count in enumerate(value_counts):plt.text(i, count + 0.1, str(count), ha='center', va='bottom')plt.show()

classes = df.target.unique()def plot_ecg(data, class_name, ax, n_steps=10):# Convert data to a DataFrametime_series_df = pd.DataFrame(data)# Apply a moving average for smoothingsmooth_data = time_series_df.rolling(window=n_steps, min_periods=1).mean()# Calculate upper and lower bounds for confidence intervaldeviation = time_series_df.rolling(window=n_steps, min_periods=1).std()upper_bound = smooth_data + deviationlower_bound = smooth_data - deviation# Plot the smoothed dataax.plot(smooth_data, color='black', linewidth=2)# Plot the confidence intervalax.fill_between(time_series_df.index, lower_bound[0], upper_bound[0], color='black', alpha=0.2)# Set the titleax.set_title(class_name)
# Plotting setup
fig, axs = plt.subplots(nrows=len(classes) // 3 + 1,ncols=3,sharey=True,figsize=(14, 8)
)# Plot for each class
for i, cls in enumerate(classes):ax = axs.flat[i]data = df[df.target == cls].drop(labels='target', axis=1).mean(axis=0).to_numpy()plot_ecg(data, cls, ax)  # Using 'cls' directly as class name# Adjust layout and remove extra axes
fig.delaxes(axs.flat[-1])
fig.tight_layout()plt.show()

normal_df = df[df.target == 1].drop(labels='target', axis=1)
normal_df.shape
(2919, 140)
anomaly_df = df[df.target != 1].drop(labels='target', axis=1)
anomaly_df.shape
(2079, 140)
# Splitting the Dataset# Initial Train-Validation Split:
# The dataset 'normal_df' is divided into training and validation sets.
# 15% of the data is allocated to the validation set.
# The use of 'random_state=42' ensures reproducibility.train_df, val_df = train_test_split(normal_df,test_size=0.15,random_state=42
)# Further Splitting for Validation and Test:
# The validation set obtained in the previous step is further split into validation and test sets.
# 33% of the validation set is allocated to the test set.
# The same 'random_state=42' is used for consistency in randomization.val_df, test_df = train_test_split(val_df,test_size=0.30,random_state=42
)
# Function to Create a Dataset
def create_dataset(df):# Convert DataFrame to a list of sequences, each represented as a list of floatssequences = df.astype(np.float32).to_numpy().tolist()# Convert sequences to PyTorch tensors, each with shape (sequence_length, 1, num_features)dataset = [torch.tensor(s).unsqueeze(1).float() for s in sequences]# Extract dimensions of the datasetn_seq, seq_len, n_features = torch.stack(dataset).shape# Return the dataset, sequence length, and number of featuresreturn dataset, seq_len, n_features
# Create the training dataset from train_df
train_dataset, seq_len, n_features = create_dataset(train_df)# Create the validation dataset from val_df
val_dataset, _, _ = create_dataset(val_df)# Create the test dataset for normal cases from test_df
test_normal_dataset, _, _ = create_dataset(test_df)# Create the test dataset for anomalous cases from anomaly_df
test_anomaly_dataset, _, _ = create_dataset(anomaly_df)

Implementation of LSTM-Based Autoencoder for ECG Anomaly Detection

class Encoder(nn.Module):def __init__(self, seq_len, n_features, embedding_dim=64):super(Encoder, self).__init__()self.seq_len, self.n_features = seq_len, n_featuresself.embedding_dim, self.hidden_dim = embedding_dim, 2 * embedding_dimself.rnn1 = nn.LSTM(input_size=n_features,hidden_size=self.hidden_dim,num_layers=1,batch_first=True)self.rnn2 = nn.LSTM(input_size=self.hidden_dim,hidden_size=embedding_dim,num_layers=1,batch_first=True)def forward(self, x):x = x.reshape((1, self.seq_len, self.n_features))x, (_, _) = self.rnn1(x)x, (hidden_n, _) = self.rnn2(x)return hidden_n.reshape((self.n_features, self.embedding_dim))
class Decoder(nn.Module):def __init__(self, seq_len, input_dim=64, n_features=1):super(Decoder, self).__init__()self.seq_len, self.input_dim = seq_len, input_dimself.hidden_dim, self.n_features = 2 * input_dim, n_featuresself.rnn1 = nn.LSTM(input_size=input_dim,hidden_size=input_dim,num_layers=1,batch_first=True)self.rnn2 = nn.LSTM(input_size=input_dim,hidden_size=self.hidden_dim,num_layers=1,batch_first=True)self.output_layer = nn.Linear(self.hidden_dim, n_features)def forward(self, x):x = x.repeat(self.seq_len, self.n_features)x = x.reshape((self.n_features, self.seq_len, self.input_dim))x, (hidden_n, cell_n) = self.rnn1(x)x, (hidden_n, cell_n) = self.rnn2(x)x = x.reshape((self.seq_len, self.hidden_dim))return self.output_layer(x)
class Autoencoder(nn.Module):def __init__(self, seq_len, n_features, embedding_dim=64):super(Autoencoder, self).__init__()self.encoder = Encoder(seq_len, n_features, embedding_dim).to(device)self.decoder = Decoder(seq_len, embedding_dim, n_features).to(device)def forward(self, x):x = self.encoder(x)x = self.decoder(x)return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Autoencoder(seq_len, n_features, 128)
model = model.to(device)

Training and Visualization of ECG Autoencoder Model

def plot_input_reconstruction(model, dataset, epoch):model = model.eval()plt.figure(figsize=(10, 5))# Take the first sequence from the datasetseq_true = dataset[0].to(device)seq_pred = model(seq_true)with torch.no_grad():# Squeeze the sequences to ensure they are 1-dimensionalinput_sequence = seq_true.squeeze().cpu().numpy()reconstruction_sequence = seq_pred.squeeze().cpu().numpy()# Check the shape after squeezingif input_sequence.ndim != 1 or reconstruction_sequence.ndim != 1:raise ValueError("Input and reconstruction sequences must be 1-dimensional after squeezing.")# Plotting the sequencesplt.plot(input_sequence, label='Input Sequence', color='black')plt.plot(reconstruction_sequence, label='Reconstruction Sequence', color='red')plt.fill_between(range(len(input_sequence)), input_sequence, reconstruction_sequence, color='gray', alpha=0.5)plt.title(f'Input vs Reconstruction - Epoch {epoch}')plt.legend()plt.show()import torch
import numpy as np
import copydef train_model(model, train_dataset, val_dataset, n_epochs, save_path):optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)criterion = torch.nn.L1Loss(reduction='sum').to(device)history = {'train': [], 'val': []}best_model_wts = copy.deepcopy(model.state_dict())best_loss = float('inf')for epoch in range(1, n_epochs + 1):model.train()train_losses = []for seq_true in train_dataset:optimizer.zero_grad()seq_true = seq_true.to(device)seq_pred = model(seq_true)loss = criterion(seq_pred, seq_true)loss.backward()optimizer.step()train_losses.append(loss.item())val_losses = []model.eval()with torch.no_grad():for seq_true in val_dataset:seq_true = seq_true.to(device)seq_pred = model(seq_true)loss = criterion(seq_pred, seq_true)val_losses.append(loss.item())train_loss = np.mean(train_losses)val_loss = np.mean(val_losses)history['train'].append(train_loss)history['val'].append(val_loss)if val_loss < best_loss:best_loss = val_lossbest_model_wts = copy.deepcopy(model.state_dict())# Save the best model weightsprint("Saving best model")torch.save(model.state_dict(), save_path)print(f'Epoch {epoch}: train loss {train_loss} val loss {val_loss}')if epoch == 1 or epoch % 5 == 0:plot_input_reconstruction(model, val_dataset, epoch)# Load the best model weights before returningmodel.load_state_dict(best_model_wts)return model.eval(), history
save_path = 'best_model.pth'  # Replace with your actual path
model, history = train_model(model, train_dataset, val_dataset, 100, save_path)

ax = plt.figure().gca()ax.plot(history['train'],label='Train Loss', color='black')
ax.plot(history['val'],label='Val Loss', color='red')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'test'])
plt.title('Loss over training epochs')
plt.show();

ECG Anomaly Detection Model Evaluation and Visualization

model = Autoencoder(seq_len, n_features, 128)model.load_state_dict(torch.load('best_model.pth'))model = model.to(device)
model.eval()
Autoencoder((encoder): Encoder((rnn1): LSTM(1, 256, batch_first=True)(rnn2): LSTM(256, 128, batch_first=True))(decoder): Decoder((rnn1): LSTM(128, 128, batch_first=True)(rnn2): LSTM(128, 256, batch_first=True)(output_layer): Linear(in_features=256, out_features=1, bias=True))
)
def predict(model, dataset):predictions, losses = [], []criterion = nn.L1Loss(reduction='sum').to(device)with torch.no_grad():model = model.eval()for seq_true in dataset:seq_true = seq_true.to(device)seq_pred = model(seq_true)loss = criterion(seq_pred, seq_true)predictions.append(seq_pred.cpu().numpy().flatten())losses.append(loss.item())return predictions, losses
_, losses = predict(model, train_dataset)sns.distplot(losses, bins=50, kde=True, label='Train',color='black');#Visualising train loss

Threshold = 25
predictions, pred_losses = predict(model, test_normal_dataset)
sns.distplot(pred_losses, bins=50, kde=True,color='black')

correct = sum(l <= 25 for l in pred_losses)
print(f'Correct normal predictions: {correct}/{len(test_normal_dataset)}')
Correct normal predictions: 141/145
anomaly_dataset = test_anomaly_dataset[:len(test_normal_dataset)]
predictions, pred_losses = predict(model, anomaly_dataset)
sns.distplot(pred_losses, bins=50, kde=True,color='red');

correct = sum(l > 25 for l in pred_losses)
print(f'Correct anomaly predictions: {correct}/{len(anomaly_dataset)}')

Correct anomaly predictions: 145/145

def plot_prediction(data, model, title, ax):predictions, pred_losses = predict(model, [data])ax.plot(data, label='true',color='black')ax.plot(predictions[0], label='reconstructed',color='red')ax.set_title(f'{title} (loss: {np.around(pred_losses[0], 2)})')ax.legend()
fig, axs = plt.subplots(nrows=2,ncols=4,sharey=True,sharex=True,figsize=(22, 8)
)for i, data in enumerate(test_normal_dataset[:4]):plot_prediction(data, model, title='Normal', ax=axs[0, i])for i, data in enumerate(test_anomaly_dataset[:4]):plot_prediction(data, model, title='Anomaly', ax=axs[1, i])fig.tight_layout();

工学博士,担任《Mechanical System and Signal Processing》《中国电机工程学报》《控制与决策》等期刊审稿专家,擅长领域:现代信号处理,机器学习,深度学习,数字孪生,时间序列分析,设备缺陷检测、设备异常检测、设备智能故障诊断与健康管理PHM等。

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.luyixian.cn/news_show_1092834.aspx

如若内容造成侵权/违法违规/事实不符,请联系dt猫网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

基于STM32的智能仓库管理系统

目录 引言环境准备智能仓库管理系统基础代码实现&#xff1a;实现智能仓库管理系统 4.1 数据采集模块4.2 数据处理与分析4.3 通信模块实现4.4 用户界面与数据可视化应用场景&#xff1a;仓库管理与优化问题解决方案与优化收尾与总结 1. 引言 智能仓库管理系统通过使用STM32嵌…

智能AI在线人工智能对话源码系统 完整的代安装码+搭建部署教程

系统概述 智能 AI 在线人工智能对话源码系统是一款前沿的技术解决方案&#xff0c;它融合了人工智能的强大能力&#xff0c;为用户提供了一个高效、智能的对话平台。该系统基于先进的算法和模型&#xff0c;能够理解用户的输入&#xff0c;并以高度准确和自然的方式进行回应。…

2024全新升级!MindManager思维导图软件,让思维无限延伸

Hey朋友们✨&#xff01;今天要给大家安利一款我超级喜欢的办公神器——MindManager2024思维导图最新版本&#xff01;如果你跟我一样&#xff0c;经常需要整理思路、规划工作或学习计划&#xff0c;那么你一定不能错过它&#xff01;&#x1f389; MindManager思维导图工具绿…

【linux学习十七】文件服务管理

一、FTP FTP server:FTP(File Transfer Protocol,文件传输协议 )是 TCP/IP 协议组中的协议之一 软件包&#xff1a;vsftpd/安装 yum -y install vsftpd//准备文件 touch /var/ftp/abc.txt //注释:FTP服务器的主目录:“/var/ftp/”&#xff0c;是FTP程序分享内容的本机目录…

Mac环境 aab包转apks,并安装apks

一、下载下载bundletool工具 Releases google/bundletool GitHub 二、将下载bundletool.jar包、aab、keystore文件全部放到同一个目录下 例如我全部放到download目录下 转换命令行&#xff1a; java -jar bundletool-all-1.16.0.jar build-apks --modeuniversal --bundle…

Shell脚本 循环语句、函数、数组

目录 Shell循环语句 概念 for循环 语法格式 批量创建用户并设置初始密码示例 批量删除用户示例操作步骤 巡检测试主机连通性示例 while循环 语法格式 批量创建、删除用户示例 随机数 控制随机数范围 0 ~ 999 0 ~ 99 0 ~ 9 使用while和随机数实现猜价格示例 un…

docker将容器打包提交为镜像,再打包成tar包

将容器打包成镜像可以通过以下步骤来实现。这里以 Docker 为例&#xff0c;假设你已经安装了 Docker 并且有一个正在运行的容器。 1. 找到正在运行的容器 首先&#xff0c;你需要找到你想要打包成镜像的容器的 ID 或者名字。可以使用以下命令查看所有正在运行的容器&#xff…

八、yolov8模型预测和模型导出(目标检测)

模型查看 模型预测 模型导出 模型训练完成后&#xff0c;找到训练文件生成文件夹&#xff0c;里面包含wights、过程图、曲线图。 模型预测 1、在以下文件夹中放入需要预测的图&#xff1b; 2、找到detect文件下的predict.py文件&#xff0c;修改以下内容。 3、右键点击…

BL104应用在智慧零售多协议采集监控远程实时查看

在智慧零售领域&#xff0c;如今的市场竞争日益激烈&#xff0c;传统的零售模式已经难以满足消费者对服务和体验的高需求。智能化技术的引入&#xff0c;尤其是基于物联网的解决方案&#xff0c;成为提升零售业务效率和服务质量的关键。钡铼BL104 Modbus转MQTT网关作为一种先进…

JSAPI微信支付提示缺少total_fee

微信小程序云开发中使用微信支付。莫名其妙的报错&#xff1a; 这个报错严重图文不符&#xff0c;驴唇不对马嘴&#xff0c;难排查&#xff0c;很恶心。 原因可能是&#xff1a; 1、在微信支付中关联appid&#xff1b; 2、在小程序云开发控制台中授权&#xff1a;

keepalived高可用,LVS+keepalived的实现

概述&#xff1a; keepalived是集群高可用的一个技术&#xff0c;它是一个软件&#xff0c;与网络技术中VRRP协议的实现相类似&#xff0c;都是在若干个服务集群后虚拟出的一个对外提供服务的VIP(Virtual IP)&#xff0c;即虚拟IP&#xff0c;当某一台服务器发生故障时&#x…

高效电商数据分析:电商爬虫API与大数据技术的融合应用

一、引言 随着电子商务的迅猛发展和数据量的爆炸式增长&#xff0c;电商数据分析已成为企业决策的关键依据。在竞争激烈的电商市场中&#xff0c;如何高效、准确地获取并分析数据&#xff0c;以洞察市场趋势、优化运营策略、提升用户体验&#xff0c;成为电商企业面临的重要挑…

AI Workflow的敏捷开发:持续创新与优化的艺术

在人工智能的浪潮中&#xff0c;AI Workflow作为大模型落地的关键实践&#xff0c;正逐渐成为技术领域的新宠。然而&#xff0c;随着技术的发展&#xff0c;我们面临着一系列挑战&#xff0c;如何有效地应对这些挑战&#xff0c;实现AI Workflow的敏捷开发&#xff0c;成为了一…

手把手AI实战(四)让图片开口说话

一、成果展示 1.1 原始图片 1.2 合成的视频 二、步骤拆解 提供一张带完整人脸的图片&#xff0c;系统能够根据音频&#xff08;或者根据输入文本生成音频&#xff09;合成视频&#xff0c;也就是AI数字人。 本篇实战使用的AI平台是hedra&#xff0c;网址是&#xff1a;https://…

监督学习(二)线性分类

每个样本都有标签的机器学习称为监督学习。根据标签数值类型的不同&#xff0c;监督学习又可以分为回归问题和分类问题。分类和回归是监督学习的核心问题。 回归(regression)问题中的标签是连续值。分类(classification)问题中的标签是离散值。分类问题根据其类别数量又可分为…

代码随想录算法训练营第四十一天| 416. 分割等和子集

416. 分割等和子集 - 力扣&#xff08;LeetCode&#xff09; class Solution {public boolean canPartition(int[] nums) {int sum 0;for (int i0;i<nums.length;i){sum nums[i];}if(sum%2!0){return false;}int weight sum /2;// int[][] dp new int[nums.length][weig…

越复杂的CoT越有效吗?Complexity-Based Prompting for Multi-step Reasoning

Complexity-Based Prompting for Multi-step Reasoning 论文&#xff1a;https://openreview.net/pdf?idyf1icZHC-l9 Github&#xff1a;https://github.com/FranxYao/chain-of-thought-hub 发表位置&#xff1a;ICLR 2023 Complexity-Based Prompting for Multi-step Reason…

【Matlab】CNN-LSTM分类 卷积神经网络-长短期记忆神经网络组合模型(附代码)

资源下载&#xff1a; https://download.csdn.net/download/vvoennvv/89466499 分类算法资源合集&#xff1a;https://download.csdn.net/download/vvoennvv/89466519 目录 Matlab SVM支持向量机分类算法 Matlab RF随机森林分类算法 Matlab RBF径向基神经网络分类算法 Ma…

韩顺平0基础学java——第28天

p569-591 坦克大战&#xff01;&#xff08;绘图监听事件线程文件处理&#xff09; 绘图 绘图原理 Component类提供了两个和绘图相关最重要的方法: 1. paint(Graphics g)绘制组件的外观 2. repaint()刷新组件的外观。 当组件第一次在屏幕显示的时候,程序会自动的调用paint()…

通过 Setapp 使用 240 多款 Mac 生产力工具以及 GPT-4o

Setapp 是一项革命性的订阅服务&#xff0c;可以使用 240 多款 Mac 应用程序的综合套件&#xff0c;并配有强大的人工智能助手。 通过 Setapp 为你的工作效率和生产力增添魔力。 Setapp 官网&#xff1a;访问&#xff08;提供 7 天试用&#xff09; Setapp 的主要功能 AI 助手…