
@Article{cmc.2020.09282,
AUTHOR = {Majed AbuSafiya},
TITLE = {Text Compression Based on Letter’s Prefix in the Word},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {64},
YEAR = {2020},
NUMBER = {1},
PAGES = {17--30},
URL = {http://www.techscience.com/cmc/v64n1/39129},
ISSN = {1546-2226},
ABSTRACT = {Huffman [Huffman (1952)] encoding is one of the most known compression 
algorithms. In its basic use, only one encoding is given for the same letter in text to 
compress. In this paper, a text compression algorithm that is based on Huffman encoding 
is proposed. Huffman encoding is used to give different encodings for the same letter 
depending on the prefix preceding it in the word. A deterministic finite automaton (DFA) 
that recognizes the words of the text is constructed. This DFA records the frequencies for 
letters that label the transitions. Every state will correspond to one of the prefixes of the 
words of the text. For every state, a different Huffman encoding is defined for the letters 
that label the transitions leaving that state. These Huffman encodings are then used to 
encode the letters of the words in the text. This algorithm was implemented and 
experimental study showed significant reduction in compression ratio over the basic 
Huffman encoding. However, more time is needed to construct these codes.},
DOI = {10.32604/cmc.2020.09282}
}



