@inproceedings{07cb934c17594e62ad39c985cb7d2e63,
title = "Can Complexity Measures and Instance Hardness Measures Reflect the Actual Complexity of Microarray Data?",
abstract = "Despite the significant contribution of the research community in the context of the Microarray data analysis, little attention has been made in understanding the Microarray dataset characteristics using Complexity Measures and Instance Hardness Measures; thus, this study aims to examine the performance of both datasets with Microarray properties. The study assumes that since these measures are data dependent, they might also be negatively affected by complex data characteristics -like the classification algorithm- and provide values that do not reflect the actual data complexity. To investigate this, we have adopted a different experiment strategy than other works undertaken in this context by using a controlled environment with synthetic data that match Microarray properties to assess the effect of each data challenge individually without relying on the classification algorithm performance. The study argues that the experiment strategy adopted by others in correlating the classification algorithm performance to the performance of the measures is not a good independent indicator for validating the measures performance in estimating the actual data difficulty nor for showing the causes of the poor prediction of the learning algorithm{\textquoteright}s performance as both are data dependant. The experiment outcomes indicate that among 35 measures covered in this study the measures responded differently to each data challenge due to the different assumptions they adopted and their sensitivity to the different data challenges. Thus, the study has confirmed that complex data characteristics result in the measures not reflecting the actual data complexity.",
keywords = "Complexity Measures, High Dimensionality, Imbalanced Classes, Instance Hardness Measures, Small Sample size",
author = "{Al Hosni}, Omaimah and Andrew Starkey",
year = "2024",
month = feb,
day = "16",
doi = "10.1007/978-3-031-53969-5_33",
language = "English",
isbn = "9783031539688",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "445--462",
editor = "Giuseppe Nicosia and Varun Ojha and {La Malfa}, Emanuele and {La Malfa}, Gabriele and Pardalos, {Panos M.} and Renato Umeton",
booktitle = "Machine Learning, Optimization, and Data Science - 9th International Conference, LOD 2023",
address = "Germany",
note = "9th International Conference on Machine Learning, Optimization, and Data Science, LOD 2023 ; Conference date: 22-09-2023 Through 26-09-2023",
}