
@Article{iasc.2023.029533,
AUTHOR = {Mini Prince, P. M. Joe Prathap},
TITLE = {A Novel Approach to Design Distribution Preserving Framework for Big Data},
JOURNAL = {Intelligent Automation \& Soft Computing},
VOLUME = {35},
YEAR = {2023},
NUMBER = {3},
PAGES = {2789--2803},
URL = {http://www.techscience.com/iasc/v35n3/49380},
ISSN = {2326-005X},
ABSTRACT = {
<p>In several fields like financial dealing, industry, business, medicine, <i>et cetera</i>, Big Data (BD) has been utilized extensively, which is nothing but a collection of a huge amount of data. However, it is highly complicated along with time-consuming to process a massive amount of data. Thus, to design the Distribution Preserving Framework for BD, a novel methodology has been proposed utilizing Manhattan Distance (MD)-centered Partition Around Medoid (MD&#x2013;PAM) along with Conjugate Gradient Artificial Neural Network (CG-ANN), which undergoes various steps to reduce the complications of BD. Firstly, the data are processed in the pre-processing phase by mitigating the data repetition utilizing the map-reduce function; subsequently, the missing data are handled by substituting or by ignoring the missed values. After that, the data are transmuted into a normalized form. Next, to enhance the classification performance, the data&#x2019;s dimensionalities are minimized by employing Gaussian Kernel (GK)-Fisher Discriminant Analysis (GK-FDA). Afterwards, the processed data is submitted to the partitioning phase after transmuting it into a structured format. In the partition phase, by utilizing the MD-PAM, the data are partitioned along with grouped into a cluster. Lastly, by employing CG-ANN, the data are classified in the classification phase so that the needed data can be effortlessly retrieved by the user. To analogize the outcomes of the CG-ANN with the prevailing methodologies, the NSL-KDD openly accessible datasets are utilized. The experiential outcomes displayed that an efficient result along with a reduced computation cost was shown by the proposed CG-ANN. The proposed work outperforms well in terms of accuracy, sensitivity and specificity than the existing systems.</p>
},
DOI = {10.32604/iasc.2023.029533}
}



