Gutiérrez-Portela, Fernando; Almenares-Mendoza, Florina; Calderón-Benavides, Liliana Evaluation of the performance of unsupervised learning algorithms for intrusion detection in unbalanced data environments Proceedings Article In: IEEE, 2024, ISSN: 2169-3536. Abstract | Links | BibTeX | Tags: anomaly detection, compromise, intrusion detection system, machine learning, metrics, Qursa, unsupervised models2024
@inproceedings{almenarez019,
title = {Evaluation of the performance of unsupervised learning algorithms for intrusion detection in unbalanced data environments},
author = {Fernando Gutiérrez-Portela and Florina Almenares-Mendoza and Liliana Calderón-Benavides},
url = {https://ieeexplore.ieee.org/document/10794744},
doi = {10.1109/ACCESS.2024.3516615},
issn = {2169-3536},
year = {2024},
date = {2024-12-12},
urldate = {2024-12-12},
publisher = {IEEE},
abstract = {In this study, the performance of different unsupervised machine learning algorithms used for intrusion detection within unbalanced data environments were analyzed; these algorithms included the K-means++ algorithm, density-based spatial clustering of applications with noise (DBSCAN), local outlier factor (LOF), and isolation forest (I-forest) using the BoT–IoT dataset. Performance metrics such as purity, homogeneity_score, completeness_score, v_measure_score, and adjusted_mutual_info_score were used to evaluate the effectiveness of algorithms in detecting various types of attacks such as distributed denial of service (DDoS), denial of service (DoS), and reconnaissance. Similarly, different methods were used for the automatic selection of the optimal number of clusters such as the elbow method, silhouette coefficient, Calinski–Harabasz index, and Davies–Bouldin index. Moreover, principal component analysis (PCA) was used to explain data variance and the influence of variables on intrusion detection. Results revealed that the K-means algorithm achieved 95% purity as well as 95% and 99% prediction accuracies for normal and abnormal data, respectively. The I-forest algorithm achieved 95% purity as well as 99% and 90% prediction accuracies for normal and abnormal data in a balanced dataset, respectively. These findings indicated that I-forest exhibited a low central processing unit (CPU) consumption rate of 10% on balanced data, outperforming DBSCAN, K-Means++, and LOF, with 16% consumption rates.},
keywords = {anomaly detection, compromise, intrusion detection system, machine learning, metrics, Qursa, unsupervised models},
pubstate = {published},
tppubtype = {inproceedings}
}
Publications
Evaluation of the performance of unsupervised learning algorithms for intrusion detection in unbalanced data environments Proceedings Article In: IEEE, 2024, ISSN: 2169-3536.2024